Introduce better API to manipulate entries in a zim file.

The previous API suffer different problems: - It was difficult to handle articles redirecting to other article. - It was not possible to get few information (title) without getting the whole content. The new API introduce the new class `Entry` that act as a proxy to an article in the zim file. Methods of `Reader` now return an `Entry` and the user has to call `Entry`'s methods to get useful information. No redirection is made explicitly. If an entry is not found, an exception is raised instead of returning an invalid `Entry`. The common pattern to get the content of an entry become : ``` std::string content; try { auto entry = reader.getEntryFromPath(path); entry = entry.getFinalEntry(); content = entry.getContent(); } catch (NoEntry& e) { ... } ``` Older methods are keep (with the same behavior) but are marked as deprecated.
2025-06-26 10:11:30 +00:00 · 2018-03-15 15:35:59 +01:00
parent 1f3fcd85a0
commit 135028c16a
8 changed files with 684 additions and 235 deletions
--- a/include/reader.h
+++ b/include/reader.h
@ -29,6 +29,8 @@
 #include <map>
 #include <sstream>
 #include <string>
+#include "common.h"
+#include "entry.h"
 #include "common/pathTools.h"
 #include "common/stringTools.h"

@ -38,7 +40,7 @@ namespace kiwix
 {

 /**
- * The Reader class is the class who allow to get an article content from a zim
+ * The Reader class is the class who allow to get an entry content from a zim
 * file.
 */
 class Reader
@ -57,11 +59,11 @@ class Reader
  ~Reader();

  /**
-   * Get the number of "displayable" articles in the zim file.
+   * Get the number of "displayable" entries in the zim file.
   *
   * @return If the zim file has a /M/Counter metadata, return the number of
-   *         articles with the 'text/html' MIMEtype specified in the metadata.
-   *         Else return the number of articles in the 'A' namespace.
+   *         entries with the 'text/html' MIMEtype specified in the metadata.
+   *         Else return the number of entries in the 'A' namespace.
   */
  unsigned int getArticleCount() const;

@ -69,16 +71,16 @@ class Reader
   * Get the number of media in the zim file.
   *
   * @return If the zim file has a /M/Counter metadata, return the number of
-   *         articles with the 'image/jpeg', 'image/gif' and 'image/png' in
+   *         entries with the 'image/jpeg', 'image/gif' and 'image/png' in
   *         the metadata.
-   *         Else return the number of articles in the 'I' namespace.
+   *         Else return the number of entries in the 'I' namespace.
   */
  unsigned int getMediaCount() const;

  /**
-   * Get the number of all articles in the zim file.
+   * Get the number of all entries in the zim file.
   *
-   * @return Return the number of all the articles, whatever their MIMEtype or
+   * @return Return the number of all the entries, whatever their MIMEtype or
   *         their namespace.
   */
  unsigned int getGlobalCount() const;
@ -100,25 +102,54 @@ class Reader
  /**
   * Get the url of a random page.
   *
-   * @return Url of a random page. The page is picked from all articles in
+   * Deprecated : Use `getRandomPage` instead.
+   *
+   * @return Url of a random page. The page is picked from all entries in
   *         the 'A' namespace.
   *         The main page is excluded from the potential results.
   */
-  string getRandomPageUrl() const;
+  DEPRECATED string getRandomPageUrl() const;
+
+  /**
+   * Get a random page.
+   *
+   * @return A random Entry. The entry is picked from all entries in
+   *         the 'A' namespace.
+   *         The main entry is excluded from the potential results.
+   */
+  Entry getRandomPage() const;

  /**
   * Get the url of the first page.
   *
-   * @return Url of the first article in the 'A' namespace.
+   * Deprecated : Use `getFirstPage` instead.
+   *
+   * @return Url of the first entry in the 'A' namespace.
   */
-  string getFirstPageUrl() const;
+  DEPRECATED string getFirstPageUrl() const;
+
+  /**
+   * Get the entry of the first page.
+   *
+   * @return The first entry in the 'A' namespace.
+   */
+  Entry getFirstPage() const;

  /**
   * Get the url of the main page.
   *
+   * Deprecated : Use `getMainPage` instead.
+   *
   * @return Url of the main page as specified in the zim file.
   */
-  string getMainPageUrl() const;
+  DEPRECATED string getMainPageUrl() const;
+
+  /**
+   * Get the entry of the main page.
+   *
+   * @return Entry of the main page as specified in the zim file.
+   */
+  Entry getMainPage() const;

  /**
   * Get the content of a metadata.
@ -207,6 +238,35 @@ class Reader
   */
  bool getFavicon(string& content, string& mimeType) const;

+  /**
+   * Get an entry associated to an path.
+   *
+   * @param path The path of the entry.
+   * @return The entry.
+   * @throw NoEntry If no entry correspond to the path.
+   */
+  Entry getEntryFromPath(const std::string& path) const;
+
+  /**
+   * Get an entry associated to an url encoded path.
+   *
+   * Equivalent to `getEntryFromPath(urlDecode(path));`
+   *
+   * @param path The url encoded path.
+   * @return The entry.
+   * @throw NoEntry If no entry correspond to the path.
+   */
+  Entry getEntryFromEncodedPath(const std::string& path) const;
+
+  /**
+   * Get un entry associated to a title.
+   *
+   * @param title The title.
+   * @return The entry
+   * throw NoEntry If no entry correspond to the url.
+   */
+  Entry getEntryFromTitle(const std::string& title) const;
+
  /**
   * Get the url of a page specified by a title.
   *
@ -214,34 +274,34 @@ class Reader
   * @param[out] url the url of the page.
   * @return True if the page can be found.
   */
-  bool getPageUrlFromTitle(const string& title, string& url) const;
+  DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const;

  /**
-   * Get the mimetype of a article specified by a url.
+   * Get the mimetype of a entry specified by a url.
   *
-   * @param[in] url the url of the article.
-   * @param[out] mimetype the mimeType of the article.
+   * @param[in] url the url of the entry.
+   * @param[out] mimetype the mimeType of the entry.
   * @return True if the mimeType has been found.
   */
-  bool getMimeTypeByUrl(const string& url, string& mimeType) const;
+  DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;

  /**
-   * Get the content of an article specifed by a url.
+   * Get the content of an entry specifed by a url.
   *
   * Alias to `getContentByEncodedUrl`
   */
-  bool getContentByUrl(const string& url,
+  DEPRECATED bool getContentByUrl(const string& url,
                       string& content,
                       string& title,
                       unsigned int& contentLength,
                       string& contentType) const;

  /**
-   * Get the content of an article specified by a url encoded url.
+   * Get the content of an entry specified by a url encoded url.
   *
   * Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
   */
-  bool getContentByEncodedUrl(const string& url,
+  DEPRECATED bool getContentByEncodedUrl(const string& url,
                              string& content,
                              string& title,
                              unsigned int& contentLength,
@ -249,48 +309,48 @@ class Reader
                              string& baseUrl) const;

  /**
-   * Get the content of an article specified by an url encoded url.
+   * Get the content of an entry specified by an url encoded url.
   *
   * Equivalent to getContentByEncodedUrl but without baseUrl.
   */
-  bool getContentByEncodedUrl(const string& url,
+  DEPRECATED bool getContentByEncodedUrl(const string& url,
                              string& content,
                              string& title,
                              unsigned int& contentLength,
                              string& contentType) const;

  /**
-   * Get the content of an article specified by a url.
+   * Get the content of an entry specified by a url.
   *
-   * @param[in] url The url of the article.
-   * @param[out] content The content of the article.
-   * @param[out] title the title of the article.
-   * @param[out] contentLength The size of the article (size of content).
-   * @param[out] contentType The mimeType of the article.
-   * @param[out] baseUrl Return the true url of the article.
-   *                     If the specified article is a redirection, contains
-   *                     the url of the targeted article.
-   * @return True if the article has been found.
+   * @param[in] url The url of the entry.
+   * @param[out] content The content of the entry.
+   * @param[out] title the title of the entry.
+   * @param[out] contentLength The size of the entry (size of content).
+   * @param[out] contentType The mimeType of the entry.
+   * @param[out] baseUrl Return the true url of the entry.
+   *                     If the specified entry is a redirection, contains
+   *                     the url of the targeted entry.
+   * @return True if the entry has been found.
   */
-  bool getContentByDecodedUrl(const string& url,
+  DEPRECATED bool getContentByDecodedUrl(const string& url,
                              string& content,
                              string& title,
                              unsigned int& contentLength,
                              string& contentType,
                              string& baseUrl) const;
  /**
-   * Get the content of an article specified by a url.
+   * Get the content of an entry specified by a url.
   *
   * Equivalent to getContentByDecodedUrl but withou the baseUrl.
   */
-  bool getContentByDecodedUrl(const string& url,
+  DEPRECATED bool getContentByDecodedUrl(const string& url,
                              string& content,
                              string& title,
                              unsigned int& contentLength,
                              string& contentType) const;

  /**
-   * Search for articles with title starting with prefix (case sensitive).
+   * Search for entries with title starting with prefix (case sensitive).
   *
   * Suggestions are stored in an internal vector and can be retrieved using
   * `getNextSuggestion` method.
@ -308,7 +368,7 @@ class Reader
                         const bool reset = true);

  /**
-   * Search for articles for the given prefix.
+   * Search for entries for the given prefix.
   *
   * If the zim file has a internal fulltext index, the suggestions will be
   * searched using it.
@ -328,10 +388,20 @@ class Reader
  /**
   * Check if the url exists in the zim file.
   *
+   * Deprecated : Use `pathExists` instead.
+   *
   * @param url the url to check.
   * @return True if the url exits in the zim file.
   */
-  bool urlExists(const string& url) const;
+  DEPRECATED bool urlExists(const string& url) const;
+
+  /**
+   * Check if the path exists in the zim file.
+   *
+   * @param path the path to check.
+   * @return True if the path exists in the zim file.
+   */
+  bool pathExists(const string& path) const;

  /**
   * Check if the zim file has a embedded fulltext index.
@ -388,7 +458,7 @@ class Reader
   * @param[out] title The url (url).
   * @return True
   */
-  bool parseUrl(const string& url, char* ns, string& title) const;
+  DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const;

  /**
   * Return the total size of the zim file.
@ -413,7 +483,7 @@ class Reader
   * @param[out] article The libzim article object.
   * @return True if the url is good (article.good()).
   */
-  bool getArticleObjectByDecodedUrl(const string& url,
+  DEPRECATED bool getArticleObjectByDecodedUrl(const string& url,
                                    zim::Article& article) const;

 protected: