From 1f3fcd85a0973fdfa0aa74ea0d31ebf13a218bba Mon Sep 17 00:00:00 2001
From: Matthieu Gautier <mgautier@kymeria.fr>
Date: Thu, 15 Mar 2018 15:27:13 +0100
Subject: [PATCH 1/4] Allow us to declare method to be deprecated.

---
 include/common.h    | 24 ++++++++++++++++++++++++
 include/meson.build |  1 +
 2 files changed, 25 insertions(+)
 create mode 100644 include/common.h
diff --git a/include/common.h b/include/common.h
new file mode 100644
index 000000000..5df556e9d
--- /dev/null
+++ b/include/common.h
@@ -0,0 +1,24 @@
+
+#ifndef _KIWIX_COMMON_H_
+#define _KIWIX_COMMON_H_
+
+#include <zim/zim.h>
+
+#ifdef __GNUC__
+#define DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#define DEPRECATED __declspec(deprecated)
+#else
+#praga message("WARNING: You need to implement DEPRECATED for this compiler")
+#define DEPRECATED
+#endif
+
+
+namespace kiwix {
+
+typedef zim::size_type size_type;
+typedef zim::offset_type offset_type;
+
+}
+
+#endif //_KIWIX_COMMON_H_
diff --git a/include/meson.build b/include/meson.build
index a3d6b1ea1..2e376c617 100644
--- a/include/meson.build
+++ b/include/meson.build
@@ -1,4 +1,5 @@
 headers = [
+  'common.h',
   'library.h',
   'manager.h',
   'opds_dumper.h',

From 135028c16a835df640ca835a99255ce8e53d781c Mon Sep 17 00:00:00 2001
From: Matthieu Gautier <mgautier@kymeria.fr>
Date: Thu, 15 Mar 2018 15:35:59 +0100
Subject: [PATCH 2/4] Introduce better API to manipulate entries in a zim file.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous API suffer different problems:
- It was difficult to handle articles redirecting to other article.
- It was not possible to get few information (title) without getting
  the whole content.

The new API introduce the new class `Entry` that act as a proxy to an
article in the zim file.

Methods of `Reader` now return an `Entry` and the user has to call
`Entry`'s methods to get useful information.
No redirection is made explicitly.
If an entry is not found, an exception is raised instead of returning
an invalid `Entry`.

The common pattern to get the content of an entry become :

```
std::string content;
try {
  auto entry = reader.getEntryFromPath(path);
  entry = entry.getFinalEntry();
  content = entry.getContent();
} catch (NoEntry& e) {
  ...
}
```

Older methods are keep (with the same behavior) but are marked as
deprecated.
---
 include/entry.h             | 191 ++++++++++++++++++++
 include/meson.build         |   1 +
 include/reader.h            | 154 +++++++++++-----
 src/android/kiwixreader.cpp |  80 +++------
 src/entry.cpp               | 138 +++++++++++++++
 src/meson.build             |   1 +
 src/reader.cpp              | 345 ++++++++++++++++++++++--------------
 src/xapianSearcher.cpp      |   9 +-
 8 files changed, 684 insertions(+), 235 deletions(-)
 create mode 100644 include/entry.h
 create mode 100644 src/entry.cpp

diff --git a/include/entry.h b/include/entry.h
new file mode 100644
index 000000000..af9067e29
--- /dev/null
+++ b/include/entry.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef KIWIX_ENTRY_H
+#define KIWIX_ENTRY_H
+
+#include <stdio.h>
+#include <zim/article.h>
+#include <exception>
+#include <string>
+#include "common.h"
+
+using namespace std;
+
+namespace kiwix
+{
+
+
+class NoEntry : public std::exception {};
+
+/**
+ * A entry represent an.. entry in a zim file.
+ */
+class Entry
+{
+  public:
+    /**
+     * Default constructor.
+     *
+     * Construct an invalid entry.
+     */
+    Entry() = default;
+
+    /**
+     * Construct an entry making reference to an zim article.
+     *
+     * @param article
+     */
+    Entry(zim::Article article);
+    virtual ~Entry() = default;
+
+    /**
+     * Get the path of the entry.
+     *
+     * The path is the "key" of an entry.
+     *
+     * @return the path of the entry.
+     */
+    std::string getPath() const;
+    
+    /**
+     * Get the title of the entry.
+     *
+     * @return the title of the entry.
+     */
+    std::string getTitle() const;
+        
+    /**
+     * Get the content of the entry.
+     *
+     * The string is a copy of the content.
+     * If you don't want to do a copy, use get_blob.
+     *
+     * @return the content of the entry.
+     */
+    std::string getContent() const;
+        
+    /**
+     * Get the blob of the entry.
+     *
+     * A blob make reference to the content without copying it.
+     *
+     * @param offset The starting offset of the blob.
+     * @return the blob of the entry.
+     */
+    zim::Blob   getBlob(offset_type offset = 0) const;
+        
+    /**
+     * Get the blob of the entry.
+     *
+     * A blob make reference to the content without copying it.
+     *
+     * @param offset The starting offset of the blob.
+     * @param size The size of the blob.
+     * @return the blob of the entry.
+     */
+    zim::Blob   getBlob(offset_type offset, size_type size) const;
+        
+    /**
+     * Get the info for direct access to the content of the entry.
+     *
+     * Some entry (ie binary ones) have their content plain stored
+     * in the zim file. Knowing the offset where the content is stored
+     * an user can directly read the content in the zim file bypassing the
+     * kiwix-lib/libzim.
+     *
+     * @return A pair specifying where to read the content.
+     *         The string is the real file to read (may be different that .zim
+     *         file if zim is cut).
+     *         The offset is the offset to read in the file.
+     *         Return <"",0> if is not possible to read directly.
+     */
+    std::pair<std::string, offset_type> getDirectAccessInfo() const;
+        
+    /**
+     * Get the size of the entry.
+     *
+     * @return the size of the entry.
+     */
+    size_type   getSize() const;
+
+    /**
+     * Get the mime_type of the entry.
+     *
+     * @return the mime_type of the entry.
+     */
+    std::string getMimetype() const;
+    
+    
+    /**
+     * Get if the entry is a redirect entry.
+     *
+     * @return True if the entry is a redirect.
+     */
+    bool isRedirect() const;
+
+    /**
+     * Get if the entry is a link target entry.
+     *
+     * @return True if the entry is a link target.
+     */
+    bool isLinkTarget() const;
+
+    /**
+     * Get if the entry is a deleted entry.
+     *
+     * @return True if the entry is a deleted entry.
+     */
+    bool isDeleted() const;
+
+    /**
+     * Get the entry pointed by this entry.
+     *
+     * @return the entry pointed.
+     * @throw NoEntry if the entry is not a redirected entry.
+     */
+    Entry getRedirectEntry() const;
+
+    /**
+     * Get the final entry pointed by this entry.
+     *
+     * Follow the redirection until a "not redirecting" entry is found.
+     * If the entry is not a redirected entry, return the entry itself.
+     *
+     * @return the final entry.
+     */
+    Entry getFinalEntry() const;
+
+    /**
+     * Convert the entry to a boolean value.
+     *
+     * @return True if the entry is valid.
+     */
+    explicit operator bool() const { return good(); }
+
+  private:
+    zim::Article article;
+    mutable zim::Article final_article;
+
+    bool good() const { return article.good(); }
+};
+
+}
+
+#endif // KIWIX_ENTRY_H
diff --git a/include/meson.build b/include/meson.build
index 2e376c617..1aca8b9ab 100644
--- a/include/meson.build
+++ b/include/meson.build
@@ -5,6 +5,7 @@ headers = [
   'opds_dumper.h',
   'downloader.h',
   'reader.h',
+  'entry.h',
   'searcher.h'
 ]
 
diff --git a/include/reader.h b/include/reader.h
index d522c6923..52f8c0ec2 100644
--- a/include/reader.h
+++ b/include/reader.h
@@ -29,6 +29,8 @@
 #include <map>
 #include <sstream>
 #include <string>
+#include "common.h"
+#include "entry.h"
 #include "common/pathTools.h"
 #include "common/stringTools.h"
 
@@ -38,7 +40,7 @@ namespace kiwix
 {
 
 /**
- * The Reader class is the class who allow to get an article content from a zim
+ * The Reader class is the class who allow to get an entry content from a zim
  * file.
  */
 class Reader
@@ -57,11 +59,11 @@ class Reader
   ~Reader();
 
   /**
-   * Get the number of "displayable" articles in the zim file.
+   * Get the number of "displayable" entries in the zim file.
    *
    * @return If the zim file has a /M/Counter metadata, return the number of
-   *         articles with the 'text/html' MIMEtype specified in the metadata.
-   *         Else return the number of articles in the 'A' namespace.
+   *         entries with the 'text/html' MIMEtype specified in the metadata.
+   *         Else return the number of entries in the 'A' namespace.
    */
   unsigned int getArticleCount() const;
 
@@ -69,16 +71,16 @@ class Reader
    * Get the number of media in the zim file.
    *
    * @return If the zim file has a /M/Counter metadata, return the number of
-   *         articles with the 'image/jpeg', 'image/gif' and 'image/png' in
+   *         entries with the 'image/jpeg', 'image/gif' and 'image/png' in
    *         the metadata.
-   *         Else return the number of articles in the 'I' namespace.
+   *         Else return the number of entries in the 'I' namespace.
    */
   unsigned int getMediaCount() const;
 
   /**
-   * Get the number of all articles in the zim file.
+   * Get the number of all entries in the zim file.
    *
-   * @return Return the number of all the articles, whatever their MIMEtype or
+   * @return Return the number of all the entries, whatever their MIMEtype or
    *         their namespace.
    */
   unsigned int getGlobalCount() const;
@@ -100,25 +102,54 @@ class Reader
   /**
    * Get the url of a random page.
    *
-   * @return Url of a random page. The page is picked from all articles in
+   * Deprecated : Use `getRandomPage` instead.
+   *
+   * @return Url of a random page. The page is picked from all entries in
    *         the 'A' namespace.
    *         The main page is excluded from the potential results.
    */
-  string getRandomPageUrl() const;
+  DEPRECATED string getRandomPageUrl() const;
+
+  /**
+   * Get a random page.
+   *
+   * @return A random Entry. The entry is picked from all entries in
+   *         the 'A' namespace.
+   *         The main entry is excluded from the potential results.
+   */
+  Entry getRandomPage() const;
 
   /**
    * Get the url of the first page.
    *
-   * @return Url of the first article in the 'A' namespace.
+   * Deprecated : Use `getFirstPage` instead.
+   *
+   * @return Url of the first entry in the 'A' namespace.
    */
-  string getFirstPageUrl() const;
+  DEPRECATED string getFirstPageUrl() const;
+
+  /**
+   * Get the entry of the first page.
+   *
+   * @return The first entry in the 'A' namespace.
+   */
+  Entry getFirstPage() const;
 
   /**
    * Get the url of the main page.
    *
+   * Deprecated : Use `getMainPage` instead.
+   *
    * @return Url of the main page as specified in the zim file.
    */
-  string getMainPageUrl() const;
+  DEPRECATED string getMainPageUrl() const;
+
+  /**
+   * Get the entry of the main page.
+   *
+   * @return Entry of the main page as specified in the zim file.
+   */
+  Entry getMainPage() const;
 
   /**
    * Get the content of a metadata.
@@ -207,6 +238,35 @@ class Reader
    */
   bool getFavicon(string& content, string& mimeType) const;
 
+  /**
+   * Get an entry associated to an path.
+   *
+   * @param path The path of the entry.
+   * @return The entry.
+   * @throw NoEntry If no entry correspond to the path.
+   */
+  Entry getEntryFromPath(const std::string& path) const;
+
+  /**
+   * Get an entry associated to an url encoded path.
+   *
+   * Equivalent to `getEntryFromPath(urlDecode(path));`
+   *
+   * @param path The url encoded path.
+   * @return The entry.
+   * @throw NoEntry If no entry correspond to the path.
+   */
+  Entry getEntryFromEncodedPath(const std::string& path) const;
+
+  /**
+   * Get un entry associated to a title.
+   *
+   * @param title The title.
+   * @return The entry
+   * throw NoEntry If no entry correspond to the url.
+   */
+  Entry getEntryFromTitle(const std::string& title) const;
+
   /**
    * Get the url of a page specified by a title.
    *
@@ -214,34 +274,34 @@ class Reader
    * @param[out] url the url of the page.
    * @return True if the page can be found.
    */
-  bool getPageUrlFromTitle(const string& title, string& url) const;
+  DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const;
 
   /**
-   * Get the mimetype of a article specified by a url.
+   * Get the mimetype of a entry specified by a url.
    *
-   * @param[in] url the url of the article.
-   * @param[out] mimetype the mimeType of the article.
+   * @param[in] url the url of the entry.
+   * @param[out] mimetype the mimeType of the entry.
    * @return True if the mimeType has been found.
    */
-  bool getMimeTypeByUrl(const string& url, string& mimeType) const;
+  DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;
 
   /**
-   * Get the content of an article specifed by a url.
+   * Get the content of an entry specifed by a url.
    *
    * Alias to `getContentByEncodedUrl`
    */
-  bool getContentByUrl(const string& url,
+  DEPRECATED bool getContentByUrl(const string& url,
                        string& content,
                        string& title,
                        unsigned int& contentLength,
                        string& contentType) const;
 
   /**
-   * Get the content of an article specified by a url encoded url.
+   * Get the content of an entry specified by a url encoded url.
    *
    * Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
    */
-  bool getContentByEncodedUrl(const string& url,
+  DEPRECATED bool getContentByEncodedUrl(const string& url,
                               string& content,
                               string& title,
                               unsigned int& contentLength,
@@ -249,48 +309,48 @@ class Reader
                               string& baseUrl) const;
 
   /**
-   * Get the content of an article specified by an url encoded url.
+   * Get the content of an entry specified by an url encoded url.
    *
    * Equivalent to getContentByEncodedUrl but without baseUrl.
    */
-  bool getContentByEncodedUrl(const string& url,
+  DEPRECATED bool getContentByEncodedUrl(const string& url,
                               string& content,
                               string& title,
                               unsigned int& contentLength,
                               string& contentType) const;
 
   /**
-   * Get the content of an article specified by a url.
+   * Get the content of an entry specified by a url.
    *
-   * @param[in] url The url of the article.
-   * @param[out] content The content of the article.
-   * @param[out] title the title of the article.
-   * @param[out] contentLength The size of the article (size of content).
-   * @param[out] contentType The mimeType of the article.
-   * @param[out] baseUrl Return the true url of the article.
-   *                     If the specified article is a redirection, contains
-   *                     the url of the targeted article.
-   * @return True if the article has been found.
+   * @param[in] url The url of the entry.
+   * @param[out] content The content of the entry.
+   * @param[out] title the title of the entry.
+   * @param[out] contentLength The size of the entry (size of content).
+   * @param[out] contentType The mimeType of the entry.
+   * @param[out] baseUrl Return the true url of the entry.
+   *                     If the specified entry is a redirection, contains
+   *                     the url of the targeted entry.
+   * @return True if the entry has been found.
    */
-  bool getContentByDecodedUrl(const string& url,
+  DEPRECATED bool getContentByDecodedUrl(const string& url,
                               string& content,
                               string& title,
                               unsigned int& contentLength,
                               string& contentType,
                               string& baseUrl) const;
   /**
-   * Get the content of an article specified by a url.
+   * Get the content of an entry specified by a url.
    *
    * Equivalent to getContentByDecodedUrl but withou the baseUrl.
    */
-  bool getContentByDecodedUrl(const string& url,
+  DEPRECATED bool getContentByDecodedUrl(const string& url,
                               string& content,
                               string& title,
                               unsigned int& contentLength,
                               string& contentType) const;
 
   /**
-   * Search for articles with title starting with prefix (case sensitive).
+   * Search for entries with title starting with prefix (case sensitive).
    *
    * Suggestions are stored in an internal vector and can be retrieved using
    * `getNextSuggestion` method.
@@ -308,7 +368,7 @@ class Reader
                          const bool reset = true);
 
   /**
-   * Search for articles for the given prefix.
+   * Search for entries for the given prefix.
    *
    * If the zim file has a internal fulltext index, the suggestions will be
    * searched using it.
@@ -328,10 +388,20 @@ class Reader
   /**
    * Check if the url exists in the zim file.
    *
+   * Deprecated : Use `pathExists` instead.
+   *
    * @param url the url to check.
    * @return True if the url exits in the zim file.
    */
-  bool urlExists(const string& url) const;
+  DEPRECATED bool urlExists(const string& url) const;
+
+  /**
+   * Check if the path exists in the zim file.
+   *
+   * @param path the path to check.
+   * @return True if the path exists in the zim file.
+   */
+  bool pathExists(const string& path) const;
 
   /**
    * Check if the zim file has a embedded fulltext index.
@@ -388,7 +458,7 @@ class Reader
    * @param[out] title The url (url).
    * @return True
    */
-  bool parseUrl(const string& url, char* ns, string& title) const;
+  DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const;
 
   /**
    * Return the total size of the zim file.
@@ -413,7 +483,7 @@ class Reader
    * @param[out] article The libzim article object.
    * @return True if the url is good (article.good()).
    */
-  bool getArticleObjectByDecodedUrl(const string& url,
+  DEPRECATED bool getArticleObjectByDecodedUrl(const string& url,
                                     zim::Article& article) const;
 
  protected:
diff --git a/src/android/kiwixreader.cpp b/src/android/kiwixreader.cpp
index ba3e562f4..769f25286 100644
--- a/src/android/kiwixreader.cpp
+++ b/src/android/kiwixreader.cpp
@@ -60,7 +60,7 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getMainPage(JNIEnv* env, jobject obj)
   jstring url;
 
   try {
-    std::string cUrl = READER->getMainPageUrl();
+    std::string cUrl = READER->getMainPage().getPath();
     url = c2jni(cUrl, env);
   } catch (...) {
     std::cerr << "Unable to get ZIM main page" << std::endl;
@@ -196,8 +196,8 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMimeType(
 
   std::string cUrl = jni2c(url, env);
   try {
-    std::string cMimeType;
-    READER->getMimeTypeByUrl(cUrl, cMimeType);
+    auto entry = READER->getEntryFromEncodedPath(cUrl);
+    auto cMimeType = entry.getMimetype();
     mimeType = c2jni(cMimeType, env);
   } catch (...) {
     std::cerr << "Unable to get mime-type for url " << cUrl << std::endl;
@@ -216,20 +216,20 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContent(
 
   /* Retrieve the content */
   std::string cUrl = jni2c(url, env);
-  std::string cData;
-  std::string cTitle;
-  std::string cMimeType;
   unsigned int cSize = 0;
 
   try {
-    if (READER->getContentByUrl(cUrl, cData, cTitle, cSize, cMimeType)) {
-      data = env->NewByteArray(cSize);
-      env->SetByteArrayRegion(
-          data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
-      setStringObjValue(cMimeType, mimeTypeObj, env);
-      setStringObjValue(cTitle, titleObj, env);
-      setIntObjValue(cSize, sizeObj, env);
-    }
+    auto entry = READER->getEntryFromEncodedPath(cUrl);
+    entry = entry.getFinalEntry();
+    cSize = entry.getSize();
+    setIntObjValue(cSize, sizeObj, env);
+
+    data = env->NewByteArray(cSize);
+    env->SetByteArrayRegion(
+        data, 0, cSize, reinterpret_cast<const jbyte*>(entry.getBlob().data()));
+
+    setStringObjValue(entry.getMimetype(), mimeTypeObj, env);
+    setStringObjValue(entry.getTitle(), titleObj, env);
   } catch (...) {
     std::cerr << "Unable to get content for url " << cUrl << std::endl;
   }
@@ -249,22 +249,13 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContentPa
   unsigned int cOffset = jni2c(offset);
   unsigned int cLen = jni2c(len);
   try {
-    zim::Article article;
-    READER->getArticleObjectByDecodedUrl(kiwix::urlDecode(cUrl), article);
-    if (! article.good()) {
-      return data;
-    }
-    int loopCounter = 0;
-    while (article.isRedirect() && ++loopCounter < 42) {
-      article = article.getRedirectArticle();
-    }
-    if (loopCounter == 42) {
-      return data;
-    }
+    auto entry = READER->getEntryFromEncodedPath(cUrl);
+    entry = entry.getFinalEntry();
+
     if (cLen == 0) {
-      setIntObjValue(article.getArticleSize(), sizeObj, env);
-    } else if (cOffset+cLen > article.getArticleSize()) {
-      auto blob = article.getData(cOffset, cLen);
+      setIntObjValue(entry.getSize(), sizeObj, env);
+    } else if (cOffset+cLen < entry.getSize()) {
+      auto blob = entry.getBlob(cOffset, cLen);
       data = env->NewByteArray(cLen);
       env->SetByteArrayRegion(
           data, 0, cLen, reinterpret_cast<const jbyte*>(blob.data()));
@@ -288,20 +279,9 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
 
    std::string cUrl = jni2c(url, env);
    try {
-    zim::Article article;
-    READER->getArticleObjectByDecodedUrl(kiwix::urlDecode(cUrl), article);
-    if (! article.good()) {
-      return pair;
-    }
-    int loopCounter = 0;
-    while (article.isRedirect() && ++loopCounter < 42) {
-      article = article.getRedirectArticle();
-    }
-    if (loopCounter == 42) {
-      return pair;
-    }
-
-    auto part_info = article.getDirectAccessInformation();
+    auto entry = READER->getEntryFromEncodedPath(cUrl);
+    entry = entry.getFinalEntry();
+    auto part_info = entry.getDirectAccessInfo();
     setPairObjValue(part_info.first, part_info.second, pair, env);
   } catch (...) {
     std::cerr << "Unable to locate direct access information for url " << cUrl
@@ -359,20 +339,18 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getPageUrlFromTitle(JNIEnv* env,
                                                            jstring title,
                                                            jobject urlObj)
 {
-  jboolean retVal = JNI_FALSE;
   std::string cTitle = jni2c(title, env);
-  std::string cUrl;
 
   try {
-    if (READER->getPageUrlFromTitle(cTitle, cUrl)) {
-      setStringObjValue(cUrl, urlObj, env);
-      retVal = JNI_TRUE;
-    }
+    auto entry = READER->getEntryFromTitle(cTitle);
+    entry = entry.getFinalEntry();
+    setStringObjValue(entry.getPath(), urlObj, env);
+    return JNI_TRUE;
   } catch (...) {
     std::cerr << "Unable to get URL for title " << cTitle << std::endl;
   }
 
-  return retVal;
+  return JNI_FALSE;
 }
 
 JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getTitle(
@@ -410,7 +388,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getRandomPage(
   std::string cUrl;
 
   try {
-    std::string cUrl = READER->getRandomPageUrl();
+    std::string cUrl = READER->getRandomPage().getPath();
     setStringObjValue(cUrl, urlObj, env);
     retVal = JNI_TRUE;
   } catch (...) {
diff --git a/src/entry.cpp b/src/entry.cpp
new file mode 100644
index 000000000..6436aa181
--- /dev/null
+++ b/src/entry.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "reader.h"
+#include <time.h>
+
+#include <zim/search.h>
+
+namespace kiwix
+{
+
+Entry::Entry(zim::Article article)
+  : article(article)
+{
+}
+
+#define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); }
+
+std::string Entry::getPath() const
+{
+  RETURN_IF_INVALID("");
+  return article.getLongUrl();
+}
+
+std::string Entry::getTitle() const
+{
+  RETURN_IF_INVALID("");
+  return article.getTitle();
+}
+
+std::string Entry::getContent() const
+{
+  RETURN_IF_INVALID("");
+  return article.getData();
+}
+
+zim::Blob Entry::getBlob(offset_type offset) const
+{
+  RETURN_IF_INVALID(zim::Blob());
+  return article.getData(offset);
+}
+
+zim::Blob Entry::getBlob(offset_type offset, size_type size) const
+{
+  RETURN_IF_INVALID(zim::Blob());
+  return article.getData(offset, size);
+}
+
+std::pair<std::string, offset_type> Entry::getDirectAccessInfo() const
+{
+  RETURN_IF_INVALID(std::make_pair("", 0));
+  return article.getDirectAccessInformation();
+}
+
+size_type Entry::getSize() const
+{
+  RETURN_IF_INVALID(0);
+  return article.getArticleSize();
+}
+
+std::string Entry::getMimetype() const
+{
+  RETURN_IF_INVALID("");
+  try {
+    return article.getMimeType();
+  } catch (exception& e) {
+    return "application/octet-stream";
+  }
+}
+
+bool Entry::isRedirect() const
+{
+  RETURN_IF_INVALID(false);
+  return article.isRedirect();
+}
+
+bool Entry::isLinkTarget() const
+{
+  RETURN_IF_INVALID(false);
+  return article.isLinktarget();
+}
+
+bool Entry::isDeleted() const
+{
+  RETURN_IF_INVALID(false);
+  return article.isDeleted();
+}
+
+Entry Entry::getRedirectEntry() const
+{
+  RETURN_IF_INVALID(Entry());
+  if ( !article.isRedirect() ) {
+    throw NoEntry();
+  }
+
+  auto targeted_article = article.getRedirectArticle();
+  if ( !targeted_article.good()) {
+    throw NoEntry();
+  }
+  return targeted_article;
+}
+
+Entry Entry::getFinalEntry() const
+{
+  RETURN_IF_INVALID(Entry());
+  if (final_article.good()) {
+    return final_article;
+  }
+
+  int loopCounter = 42;
+  final_article = article;
+  while (final_article.isRedirect() && loopCounter--) {
+    final_article = final_article.getRedirectArticle();
+    if ( !final_article.good()) {
+      throw NoEntry();
+    }
+  }
+
+  return final_article;
+}
+
+}
diff --git a/src/meson.build b/src/meson.build
index d3df0f4ad..f2be42e29 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -4,6 +4,7 @@ kiwix_sources = [
   'opds_dumper.cpp',
   'downloader.cpp',
   'reader.cpp',
+  'entry.cpp',
   'searcher.cpp',
   'common/base64.cpp',
   'common/pathTools.cpp',
diff --git a/src/reader.cpp b/src/reader.cpp
index 684b0ab84..0103366be 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -190,79 +190,88 @@ string Reader::getId() const
 /* Return a page url from a title */
 bool Reader::getPageUrlFromTitle(const string& title, string& url) const
 {
-  /* Extract the content from the zim file */
-  zim::Article article = this->zimFileHandler->getArticleByTitle('A', title);
-
-  if (!article.good()) {
+  try {
+    auto entry = getEntryFromTitle(title);
+    entry = entry.getFinalEntry();
+    url = entry.getPath();
+    return true;
+  } catch (NoEntry& e) {
     return false;
   }
-
-  unsigned int loopCounter = 0;
-  while (article.isRedirect() && loopCounter++ < 42) {
-    article = article.getRedirectArticle();
-  }
-
-  url = article.getLongUrl();
-  return true;
 }
 
 /* Return an URL from a title */
 string Reader::getRandomPageUrl() const
 {
+  return getRandomPage().getPath();
+}
+
+Entry Reader::getRandomPage() const
+{
+  if (!this->zimFileHandler) {
+    throw NoEntry();
+  }
+
   zim::Article article;
-  zim::size_type idx;
-  std::string mainPageUrl = this->getMainPageUrl();
+  std::string mainPagePath = this->getMainPage().getPath();
+  int watchdog = 42;
 
   do {
-    idx = this->firstArticleOffset
+    auto idx = this->firstArticleOffset
           + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
                              * this->nsACount);
     article = zimFileHandler->getArticle(idx);
-  } while (article.getLongUrl() == mainPageUrl);
+    if (!watchdog--) {
+      throw NoEntry();
+    }
+  } while (!article.good() && article.getLongUrl() == mainPagePath);
 
-  return article.getLongUrl();
+  return article;
 }
 
 /* Return the welcome page URL */
 string Reader::getMainPageUrl() const
 {
-  string url = "";
+  return getMainPage().getPath();
+}
 
-  if (this->zimFileHandler->getFileheader().hasMainPage()) {
-    zim::Article article = zimFileHandler->getArticle(
-        this->zimFileHandler->getFileheader().getMainPage());
-    url = article.getLongUrl();
-
-    if (url.empty()) {
-      url = getFirstPageUrl();
-    }
-  } else {
-    url = getFirstPageUrl();
+Entry Reader::getMainPage() const
+{
+  if (!this->zimFileHandler) {
+    throw NoEntry();
   }
 
-  return url;
+  string url = "";
+
+  zim::Article article;
+  if (this->zimFileHandler->getFileheader().hasMainPage())
+  {
+    article = zimFileHandler->getArticle(
+        this->zimFileHandler->getFileheader().getMainPage());
+  }
+
+  if (!article.good())
+  {
+    return getFirstPage();
+  }
+
+  return article;
 }
 
 bool Reader::getFavicon(string& content, string& mimeType) const
 {
-  unsigned int contentLength = 0;
-  string title;
+  static const char* const paths[] = {"-/favicon.png", "I/favicon.png", "I/favicon", "-/favicon"};
 
-  this->getContentByUrl("/-/favicon.png", content, title, contentLength, mimeType);
-
-  if (content.empty()) {
-    this->getContentByUrl("/I/favicon.png", content, title, contentLength, mimeType);
-
-    if (content.empty()) {
-      this->getContentByUrl("/I/favicon", content, title, contentLength, mimeType);
-
-      if (content.empty()) {
-        this->getContentByUrl("/-/favicon", content, title, contentLength, mimeType);
-      }
-    }
+  for (auto &path: paths) {
+    try {
+      auto entry = getEntryFromPath(path);
+      content = entry.getContent();
+      mimeType = entry.getMimetype();
+      return true;
+    } catch(NoEntry& e) {};
   }
 
-  return content.empty() ? false : true;
+  return false;
 }
 
 string Reader::getZimFilePath() const
@@ -272,11 +281,13 @@ string Reader::getZimFilePath() const
 /* Return a metatag value */
 bool Reader::getMetatag(const string& name, string& value) const
 {
-  unsigned int contentLength = 0;
-  string contentType = "";
-  string title;
-
-  return this->getContentByUrl("/M/" + name, value, title, contentLength, contentType);
+  try {
+    auto entry = getEntryFromPath("M/"+name);
+    value = entry.getContent();
+    return true;
+  } catch(NoEntry& e) {
+    return false;
+  }
 }
 
 string Reader::getTitle() const
@@ -375,12 +386,26 @@ string Reader::getOrigId() const
 /* Return the first page URL */
 string Reader::getFirstPageUrl() const
 {
-  zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
-  zim::Article article = zimFileHandler->getArticle(firstPageOffset);
-  return article.getLongUrl();
+  return getFirstPage().getPath();
 }
 
-bool Reader::parseUrl(const string& url, char* ns, string& title) const
+Entry Reader::getFirstPage() const
+{
+  if (!this->zimFileHandler) {
+    throw NoEntry();
+  }
+
+  auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
+  auto article = zimFileHandler->getArticle(firstPageOffset);
+
+  if (! article.good()) {
+    throw NoEntry();
+  }
+
+  return article;
+}
+
+bool _parseUrl(const string& url, char* ns, string& title)
 {
   /* Offset to visit the url */
   unsigned int urlLength = url.size();
@@ -414,6 +439,52 @@ bool Reader::parseUrl(const string& url, char* ns, string& title) const
   return true;
 }
 
+bool Reader::parseUrl(const string& url, char* ns, string& title) const
+{
+  return _parseUrl(url, ns, title);
+}
+
+Entry Reader::getEntryFromPath(const std::string& path) const
+{
+  char ns = 0;
+  std::string short_url;
+
+  if (!this->zimFileHandler) {
+    throw NoEntry();
+  }
+  _parseUrl(path, &ns, short_url);
+
+  if (short_url.empty() && ns == 0) {
+    return getMainPage();
+  }
+
+  auto article = zimFileHandler->getArticle(ns, short_url);
+  if (!article.good()) {
+    throw NoEntry();
+  }
+
+  return article;
+}
+
+Entry Reader::getEntryFromEncodedPath(const std::string& path) const
+{
+  return getEntryFromPath(urlDecode(path));
+}
+
+Entry Reader::getEntryFromTitle(const std::string& title) const
+{
+  if (!this->zimFileHandler) {
+    throw NoEntry();
+  }
+
+  auto article = this->zimFileHandler->getArticleByTitle('A', title);
+  if (!article.good()) {
+    throw NoEntry();
+  }
+
+  return article;
+}
+
 /* Return article by url */
 bool Reader::getArticleObjectByDecodedUrl(const string& url,
                                           zim::Article& article) const
@@ -425,11 +496,11 @@ bool Reader::getArticleObjectByDecodedUrl(const string& url,
   /* Parse the url */
   char ns = 0;
   string urlStr;
-  this->parseUrl(url, &ns, urlStr);
+  _parseUrl(url, &ns, urlStr);
 
   /* Main page */
   if (urlStr.empty() && ns == 0) {
-    this->parseUrl(this->getMainPageUrl(), &ns, urlStr);
+    _parseUrl(this->getMainPage().getPath(), &ns, urlStr);
   }
 
   /* Extract the content from the zim file */
@@ -440,26 +511,53 @@ bool Reader::getArticleObjectByDecodedUrl(const string& url,
 /* Return the mimeType without the content */
 bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
 {
-  if (this->zimFileHandler == NULL) {
-    return false;
-  }
-
-  zim::Article article;
-  if (this->getArticleObjectByDecodedUrl(url, article)) {
-    try {
-      mimeType = article.getMimeType();
-    } catch (exception& e) {
-      cerr << "Unable to get the mimetype for " << url << ":" << e.what()
-           << endl;
-      mimeType = "application/octet-stream";
-    }
+  try {
+    auto entry = getEntryFromPath(url);
+    mimeType = entry.getMimetype();
     return true;
-  } else {
+  } catch (NoEntry& e) {
     mimeType = "";
     return false;
   }
 }
 
+bool get_content_by_decoded_url(const Reader& reader,
+                                const string& url,
+                                string& content,
+                                string& title,
+                                unsigned int& contentLength,
+                                string& contentType,
+                                string& baseUrl)
+{
+  content = "";
+  contentType = "";
+  contentLength = 0;
+
+  try {
+    auto entry = reader.getEntryFromPath(url);
+    entry = entry.getFinalEntry();
+    baseUrl = entry.getPath();
+    contentType = entry.getMimetype();
+    content = entry.getContent();
+    contentLength = entry.getSize();
+    title = entry.getTitle();
+
+    /* Try to set a stub HTML header/footer if necesssary */
+    if (contentType.find("text/html") != string::npos
+      && content.find("<body") == std::string::npos
+      && content.find("<BODY") == std::string::npos) {
+      content = "<html><head><title>" + title +
+              "</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
+              "charset=utf-8\" /></head><body>" +
+              content + "</body></html>";
+    }
+    return true;
+  } catch (NoEntry& e) {
+    return false;
+  }
+}
+
+
 /* Get a content from a zim file */
 bool Reader::getContentByUrl(const string& url,
                              string& content,
@@ -467,7 +565,14 @@ bool Reader::getContentByUrl(const string& url,
                              unsigned int& contentLength,
                              string& contentType) const
 {
-  return this->getContentByEncodedUrl(url, content, title, contentLength, contentType);
+  std::string stubRedirectUrl;
+  return get_content_by_decoded_url(*this,
+                                kiwix::urlDecode(url),
+                                content,
+                                title,
+                                contentLength,
+                                contentType,
+                                stubRedirectUrl);
 }
 
 bool Reader::getContentByEncodedUrl(const string& url,
@@ -477,8 +582,13 @@ bool Reader::getContentByEncodedUrl(const string& url,
                                     string& contentType,
                                     string& baseUrl) const
 {
-  return this->getContentByDecodedUrl(
-      kiwix::urlDecode(url), content, title, contentLength, contentType, baseUrl);
+  return get_content_by_decoded_url(*this,
+                                kiwix::urlDecode(url),
+                                content,
+                                title,
+                                contentLength,
+                                contentType,
+                                baseUrl);
 }
 
 bool Reader::getContentByEncodedUrl(const string& url,
@@ -488,12 +598,13 @@ bool Reader::getContentByEncodedUrl(const string& url,
                                     string& contentType) const
 {
   std::string stubRedirectUrl;
-  return this->getContentByEncodedUrl(kiwix::urlDecode(url),
-                                      content,
-                                      title,
-                                      contentLength,
-                                      contentType,
-                                      stubRedirectUrl);
+  return get_content_by_decoded_url(*this,
+                                kiwix::urlDecode(url),
+                                content,
+                                title,
+                                contentLength,
+                                contentType,
+                                stubRedirectUrl);
 }
 
 bool Reader::getContentByDecodedUrl(const string& url,
@@ -503,12 +614,13 @@ bool Reader::getContentByDecodedUrl(const string& url,
                                     string& contentType) const
 {
   std::string stubRedirectUrl;
-  return this->getContentByDecodedUrl(kiwix::urlDecode(url),
-                                      content,
-                                      title,
-                                      contentLength,
-                                      contentType,
-                                      stubRedirectUrl);
+  return get_content_by_decoded_url(*this,
+                                url,
+                                content,
+                                title,
+                                contentLength,
+                                contentType,
+                                stubRedirectUrl);
 }
 
 bool Reader::getContentByDecodedUrl(const string& url,
@@ -518,63 +630,26 @@ bool Reader::getContentByDecodedUrl(const string& url,
                                     string& contentType,
                                     string& baseUrl) const
 {
-  content = "";
-  contentType = "";
-  contentLength = 0;
-
-  zim::Article article;
-  if (!this->getArticleObjectByDecodedUrl(url, article)) {
-    return false;
-  }
-
-  /* If redirect */
-  unsigned int loopCounter = 0;
-  while (article.isRedirect() && loopCounter++ < 42) {
-    article = article.getRedirectArticle();
-  }
-
-  if (loopCounter < 42) {
-    /* Compute base url (might be different from the url if redirects */
-    baseUrl
-        = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
-
-    /* Get the content mime-type */
-    try {
-      contentType
-          = string(article.getMimeType().data(), article.getMimeType().size());
-    } catch (exception& e) {
-      cerr << "Unable to get the mimetype for " << baseUrl << ":" << e.what()
-           << endl;
-      contentType = "application/octet-stream";
-    }
-
-    /* Get the data */
-    content = string(article.getData().data(), article.getArticleSize());
-    title = article.getTitle();
-  }
-
-  /* Try to set a stub HTML header/footer if necesssary */
-  if (contentType.find("text/html") != string::npos
-      && content.find("<body") == std::string::npos
-      && content.find("<BODY") == std::string::npos) {
-    content = "<html><head><title>" + article.getTitle() +
-              "</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
-              "charset=utf-8\" /></head><body>" +
-              content + "</body></html>";
-  }
-
-  /* Get the data length */
-  contentLength = article.getArticleSize();
-
-  return true;
+  return get_content_by_decoded_url(*this,
+                                url,
+                                content,
+                                title,
+                                contentLength,
+                                contentType,
+                                baseUrl);
 }
 
 /* Check if an article exists */
 bool Reader::urlExists(const string& url) const
+{
+  return pathExists(url);
+}
+
+bool Reader::pathExists(const string& path) const
 {
   char ns = 0;
   string titleStr;
-  this->parseUrl(url, &ns, titleStr);
+  _parseUrl(path, &ns, titleStr);
   titleStr = "/" + titleStr;
   zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
   return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
@@ -583,7 +658,7 @@ bool Reader::urlExists(const string& url) const
 /* Does the ZIM file has a fulltext index */
 bool Reader::hasFulltextIndex() const
 {
-  return ( this->urlExists("/Z/fulltextIndex/xapian")
+  return ( this->pathExists("/Z/fulltextIndex/xapian")
         && !zimFileHandler->is_multiPart() );
 }
 
diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp
index a281bcb2c..e3102b49d 100644
--- a/src/xapianSearcher.cpp
+++ b/src/xapianSearcher.cpp
@@ -193,13 +193,8 @@ std::string XapianResult::get_content()
   if (!searcher->reader) {
     return "";
   }
-  std::string content;
-  std::string title;
-  unsigned int contentLength;
-  std::string contentType;
-  searcher->reader->getContentByUrl(
-      get_url(), content, title, contentLength, contentType);
-  return content;
+  auto entry = searcher->reader->getEntryFromEncodedPath(get_url());
+  return entry.getContent();
 }
 
 int XapianResult::get_size()

From 1dd828e79cb790b9d3b9d70fb400410a3332abc5 Mon Sep 17 00:00:00 2001
From: Matthieu Gautier <mgautier@kymeria.fr>
Date: Wed, 18 Apr 2018 15:39:08 +0200
Subject: [PATCH 3/4] Fix pathExists and check for correct path for xapian
 index.

The correct path for xapian database should be "X/fulltext/xapian",
not "Z//fulltextIndex/xapian".

So lets check for the right path and fallback to the wrong one (but
used in old zims).

The double '/' in the path is a bug of zimwriterfs and is specific
to the xapian database.
We must handle this correctly in `hasFulltextIndex` and not (buggly) in
`pathExists`.
(Hopefully, it seems that pathExists were used only by hasFulltextIndex)
---
 src/reader.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/reader.cpp b/src/reader.cpp
index 0103366be..ccbede73c 100644
--- a/src/reader.cpp
+++ b/src/reader.cpp
@@ -647,10 +647,14 @@ bool Reader::urlExists(const string& url) const
 
 bool Reader::pathExists(const string& path) const
 {
+  if (!zimFileHandler)
+  {
+    return false;
+  }
+
   char ns = 0;
   string titleStr;
   _parseUrl(path, &ns, titleStr);
-  titleStr = "/" + titleStr;
   zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
   return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
 }
@@ -658,8 +662,13 @@ bool Reader::pathExists(const string& path) const
 /* Does the ZIM file has a fulltext index */
 bool Reader::hasFulltextIndex() const
 {
-  return ( this->pathExists("/Z/fulltextIndex/xapian")
-        && !zimFileHandler->is_multiPart() );
+  if (!zimFileHandler || zimFileHandler->is_multiPart() )
+  {
+    return false;
+  }
+
+  return ( pathExists("Z//fulltextIndex/xapian")
+        || pathExists("X/fulltext/xapian"));
 }
 
 /* Search titles by prefix */

From 68665693c509ab777266767c0ecec7d104ada219 Mon Sep 17 00:00:00 2001
From: Chris Li <chrisshwli@gmail.com>
Date: Wed, 18 Apr 2018 10:33:35 -0400
Subject: [PATCH 4/4] fixed some typos in the docs string

---
 include/entry.h  | 2 +-
 include/reader.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/entry.h b/include/entry.h
index af9067e29..c47d33912 100644
--- a/include/entry.h
+++ b/include/entry.h
@@ -50,7 +50,7 @@ class Entry
     /**
      * Construct an entry making reference to an zim article.
      *
-     * @param article
+     * @param article a zim::Article object
      */
     Entry(zim::Article article);
     virtual ~Entry() = default;
diff --git a/include/reader.h b/include/reader.h
index 52f8c0ec2..4301ea35c 100644
--- a/include/reader.h
+++ b/include/reader.h
@@ -280,7 +280,7 @@ class Reader
    * Get the mimetype of a entry specified by a url.
    *
    * @param[in] url the url of the entry.
-   * @param[out] mimetype the mimeType of the entry.
+   * @param[out] mimeType the mimeType of the entry.
    * @return True if the mimeType has been found.
    */
   DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;
@@ -356,7 +356,7 @@ class Reader
    * `getNextSuggestion` method.
    *
    * @param prefix The prefix to search.
-   * @param suggestionCount How many suggestions to search for.
+   * @param suggestionsCount How many suggestions to search for.
    * @param reset If true, remove previous suggestions in the internal vector.
    *              If false, add suggestions to the internal vector
    *              (until internal vector size is suggestionCount (or no more
@@ -380,7 +380,7 @@ class Reader
    * The internal vector will be reset.
    *
    * @param prefix The prefix to search for.
-   * @param suggestionCount How many suggestions to search for.
+   * @param suggestionsCount How many suggestions to search for.
    */
   bool searchSuggestionsSmart(const string& prefix,
                               unsigned int suggestionsCount);