Merge pull request #42 from kiwix/search_in_libzim

Search in libzim
2017-04-11 13:26:23 +02:00 · 2017-04-11 13:26:23 +02:00 · 4485cc8d0f
parent 93b53cc6d0 3be4d92c53
commit 4485cc8d0f
9 changed files with 276 additions and 231 deletions
--- a/include/meson.build
+++ b/include/meson.build
@ -5,10 +5,6 @@ headers = [
  'searcher.h'
 ]

-if xapian_dep.found()
-  headers += ['xapianSearcher.h']
-endif
-
 install_headers(headers, subdir:'kiwix')

 install_headers(
--- a/include/reader.h
+++ b/include/reader.h
@ -43,45 +43,45 @@ namespace kiwix {
    ~Reader();

    void reset();
-    unsigned int getArticleCount();
-    unsigned int getMediaCount();
-    unsigned int getGlobalCount();
-    string getZimFilePath();
-    string getId();
-    string getRandomPageUrl();
-    string getFirstPageUrl();
-    string getMainPageUrl();
-    bool getMetatag(const string &url, string &content);
-    string getTitle();
-    string getDescription();
-    string getLanguage();
-    string getName();
-    string getTags();
-    string getDate();
-    string getCreator();
-    string getPublisher();
-    string getOrigId();
-    bool getFavicon(string &content, string &mimeType);
-    bool getPageUrlFromTitle(const string &title, string &url);
-    bool getMimeTypeByUrl(const string &url, string &mimeType);
-    bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
-    bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
-    bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
-    bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
-    bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
+    unsigned int getArticleCount() const;
+    unsigned int getMediaCount() const;
+    unsigned int getGlobalCount() const;
+    string getZimFilePath() const;
+    string getId() const;
+    string getRandomPageUrl() const;
+    string getFirstPageUrl() const;
+    string getMainPageUrl() const;
+    bool getMetatag(const string &url, string &content) const;
+    string getTitle() const;
+    string getDescription() const;
+    string getLanguage() const;
+    string getName() const;
+    string getTags() const;
+    string getDate() const;
+    string getCreator() const;
+    string getPublisher() const;
+    string getOrigId() const;
+    bool getFavicon(string &content, string &mimeType) const;
+    bool getPageUrlFromTitle(const string &title, string &url) const;
+    bool getMimeTypeByUrl(const string &url, string &mimeType) const;
+    bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
+    bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
+    bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
+    bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
+    bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
    bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true);
    bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
-    bool urlExists(const string &url);
-    bool hasFulltextIndex();
-    std::vector<std::string> getTitleVariants(const std::string &title);
+    bool urlExists(const string &url) const;
+    bool hasFulltextIndex() const;
+    std::vector<std::string> getTitleVariants(const std::string &title) const;
    bool getNextSuggestion(string &title);
    bool getNextSuggestion(string &title, string &url);
-    bool canCheckIntegrity();
-    bool isCorrupted();
-    bool parseUrl(const string &url, char *ns, string &title);
-    unsigned int getFileSize();
-    zim::File* getZimFileHandler();
-    bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article);
+    bool canCheckIntegrity() const;
+    bool isCorrupted() const;
+    bool parseUrl(const string &url, char *ns, string &title) const;
+    unsigned int getFileSize() const;
+    zim::File* getZimFileHandler() const;
+    bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const;

  protected:
    zim::File* zimFileHandler;
@ -96,7 +96,7 @@ namespace kiwix {
    std::vector< std::vector<std::string> >::iterator suggestionsOffset;

  private:
-    std::map<std::string, unsigned int> parseCounterMetadata();
+    std::map<const std::string, unsigned int> parseCounterMetadata() const;
  };

 }
--- a/include/searcher.h
+++ b/include/searcher.h
@ -35,30 +35,31 @@

 using namespace std;

-class Result
-{
-  public:
-    virtual ~Result() {};
-    virtual std::string get_url() = 0;
-    virtual std::string get_title() = 0;
-    virtual int get_score() = 0;
-    virtual std::string get_snippet() = 0;
-    virtual int get_wordCount() = 0;
-    virtual int get_size() = 0;
-};
-
 namespace kiwix {
+  class Reader;
+  class Result {
+    public:
+      virtual ~Result() {};
+      virtual std::string get_url() = 0;
+      virtual std::string get_title() = 0;
+      virtual int get_score() = 0;
+      virtual std::string get_snippet() = 0;
+      virtual int get_wordCount() = 0;
+      virtual int get_size() = 0;
+  };

+
+  struct SearcherInternal;
  class Searcher {

  public:
-    Searcher();
-    virtual ~Searcher();
+    Searcher(Reader* reader);
+    ~Searcher();

    void search(std::string &search, unsigned int resultStart,
 		unsigned int resultEnd, const bool verbose=false);
-    virtual Result* getNextResult() = 0;
-    virtual void restart_search() = 0;
+    Result* getNextResult();
+    void restart_search();
    unsigned int getEstimatedResultCount();
    bool setProtocolPrefix(const std::string prefix);
    bool setSearchProtocolPrefix(const std::string prefix);
@ -71,10 +72,12 @@ namespace kiwix {
    
  protected:
    std::string beautifyInteger(const unsigned int number);
-    virtual void closeIndex() = 0;
-    virtual void searchInIndex(string &search, const unsigned int resultStart,
-			       const unsigned int resultEnd, const bool verbose=false) = 0;
+    void closeIndex() ;
+    void searchInIndex(string &search, const unsigned int resultStart,
+	               const unsigned int resultEnd, const bool verbose=false);

+    Reader* reader;
+    SearcherInternal* internal;
    std::string searchPattern;
    std::string protocolPrefix;
    std::string searchProtocolPrefix;
--- a/meson.build
+++ b/meson.build
@ -1,6 +1,7 @@
 project('kiwixlib', 'cpp',
  version : '0.1.0',
-  license : 'GPL')
+  license : 'GPL',
+  default_options : ['c_std=c11', 'cpp_std=c++11'])

 compiler = meson.get_compiler('cpp')
 find_library_in_compiler = meson.version().version_compare('>=0.31.0')
@ -61,9 +62,7 @@ else
  endif
 endif

-xapian_dep = dependency('xapian-core', required:false)
-
-all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
+all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep]
 if has_ctpp2_dep
  all_deps += [ctpp2_dep]
 endif
@ -80,9 +79,6 @@ subdir('static')
 subdir('src')

 pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
-if xapian_dep.found()
-    pkg_requires += ['xapian-core']
-endif

 extra_libs = []
 extra_cflags = ''
--- a/src/android/kiwix.cpp
+++ b/src/android/kiwix.cpp
@ -9,7 +9,7 @@

 #include "unicode/putil.h"
 #include "reader.h"
-#include "xapianSearcher.h"
+#include "searcher.h"
 #include "common/base64.h"

 #include <android/log.h>
@ -23,7 +23,7 @@

 /* global variables */
 kiwix::Reader *reader = NULL;
-kiwix::XapianSearcher *searcher = NULL;
+kiwix::Searcher *searcher = NULL;

 static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
  searcher = NULL;
  try {
    if (searcher != NULL) delete searcher;
-    searcher = new kiwix::XapianSearcher(cPath, NULL);
+    searcher = new kiwix::Searcher(reader);
  } catch (...) {
    searcher = NULL;
    retVal = JNI_FALSE;
@ -460,7 +460,7 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
  (JNIEnv *env, jclass obj, jstring query, jint count) {
  std::string cQuery = jni2c(query, env);
  unsigned int cCount = jni2c(count);
-  Result *p_result;
+  kiwix::Result *p_result;
  std::string result;
      
  pthread_mutex_lock(&searcherLock);
--- a/src/common/networkTools.cpp
+++ b/src/common/networkTools.cpp
@ -85,7 +85,7 @@ std::map<std::string, std::string> kiwix::getNetworkInterfaces() {

    /* some systems have ifr_addr.sa_len and adjust the length that
     * way, but not mine. weird */
-#ifndef linux
+#ifndef __linux__
    len=IFNAMSIZ + ifreq->ifr_addr.sa_len;
 #else
    len=sizeof *ifreq;
--- a/src/meson.build
+++ b/src/meson.build
@ -8,16 +8,10 @@ kiwix_sources = [
  'common/regexTools.cpp',
  'common/stringTools.cpp',
  'common/networkTools.cpp',
-  'common/otherTools.cpp',
-  'xapian/htmlparse.cc',
-  'xapian/myhtmlparse.cc'
+  'common/otherTools.cpp'
 ]
 kiwix_sources += lib_resources

-if xapian_dep.found()
-  kiwix_sources += ['xapianSearcher.cpp']
-endif
-
 if get_option('android')
  subdir('android')
 endif
--- a/src/reader.cpp
+++ b/src/reader.cpp
@ -87,7 +87,7 @@ namespace kiwix {
    }
  }

-  zim::File* Reader::getZimFileHandler() {
+  zim::File* Reader::getZimFileHandler() const {
    return this->zimFileHandler;
  }

@ -96,14 +96,14 @@ namespace kiwix {
    this->currentArticleOffset = this->firstArticleOffset;
  }

-  std::map<std::string, unsigned int> Reader::parseCounterMetadata() {
-    std::map<std::string, unsigned int> counters;
-    string content, mimeType, item, counterString;
-    unsigned int contentLength, counter;
-    string counterUrl = "/M/Counter";
+  std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const {
+    std::map<const std::string, unsigned int> counters;
+    string mimeType, item, counterString;
+    unsigned int counter;

-    this->getContentByUrl(counterUrl, content, contentLength, mimeType);
-    stringstream ssContent(content);
+    zim::Article article = this->zimFileHandler->getArticle('M',"Counter");
+
+    stringstream ssContent(article.getData());

    while(getline(ssContent, item,  ';')) {
      stringstream ssItem(item);
@ -119,8 +119,8 @@ namespace kiwix {
  }

  /* Get the count of articles which can be indexed/displayed */
-  unsigned int Reader::getArticleCount() {
-    std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
+  unsigned int Reader::getArticleCount() const {
+    std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
    unsigned int counter = 0;

    if (counterMap.empty()) {
@ -135,8 +135,8 @@ namespace kiwix {
  }

  /* Get the count of medias content in the ZIM file */
-  unsigned int Reader::getMediaCount() {
-    std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
+  unsigned int Reader::getMediaCount() const {
+    std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
    unsigned int counter = 0;

    if (counterMap.empty())
@ -161,43 +161,38 @@ namespace kiwix {
  }

  /* Get the total of all items of a ZIM file, redirects included */
-  unsigned int Reader::getGlobalCount() {
+  unsigned int Reader::getGlobalCount() const {
    return this->zimFileHandler->getCountArticles();
  }

  /* Return the UID of the ZIM file */
-  string Reader::getId() {
+  string Reader::getId() const {
    std::ostringstream s;
    s << this->zimFileHandler->getFileheader().getUuid();
    return  s.str();
  }

  /* Return a page url from a title */
-  bool Reader::getPageUrlFromTitle(const string &title, string &url) {
+  bool Reader::getPageUrlFromTitle(const string &title, string &url) const {
    /* Extract the content from the zim file */
-    std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findxByTitle('A', title);
+    zim::Article article = this->zimFileHandler->getArticleByTitle('A', title);

-    /* Test if the article was found */
-    if (resultPair.first == true) {
-
-      /* Get the article */
-      zim::Article article = *resultPair.second;
-
-      /* If redirect */
-      unsigned int loopCounter = 0;
-      while (article.isRedirect() && loopCounter++<42) {
-	article = article.getRedirectArticle();
-      }
-
-      url = article.getLongUrl();
-      return true;
+    if ( ! article.good() )
+    {
+        return false;
    }

-    return false;
+    unsigned int loopCounter = 0;
+    while (article.isRedirect() && loopCounter++<42) {
+	article = article.getRedirectArticle();
+    }
+
+    url = article.getLongUrl();
+    return true;
  }

  /* Return an URL from a title*/
-  string Reader::getRandomPageUrl() {
+  string Reader::getRandomPageUrl() const {
    zim::Article article;
    zim::size_type idx;
    std::string mainPageUrl = this->getMainPageUrl();
@ -208,11 +203,11 @@ namespace kiwix {
      article = zimFileHandler->getArticle(idx);
    } while (article.getLongUrl() == mainPageUrl);

-    return article.getLongUrl().c_str();
+    return article.getLongUrl();
  }

  /* Return the welcome page URL */
-  string Reader::getMainPageUrl() {
+  string Reader::getMainPageUrl() const {
    string url = "";

    if (this->zimFileHandler->getFileheader().hasMainPage()) {
@ -229,7 +224,7 @@ namespace kiwix {
    return url;
  }

-  bool Reader::getFavicon(string &content, string &mimeType) {
+  bool Reader::getFavicon(string &content, string &mimeType) const {
    unsigned int contentLength = 0;

    this->getContentByUrl( "/-/favicon.png", content,
@ -254,12 +249,12 @@ namespace kiwix {
    return content.empty() ? false : true;
  }

-  string Reader::getZimFilePath() {
+  string Reader::getZimFilePath() const {
    return this->zimFilePath;
  }

  /* Return a metatag value */
-  bool Reader::getMetatag(const string &name, string &value) {
+  bool Reader::getMetatag(const string &name, string &value) const {
    unsigned int contentLength = 0;
    string contentType = "";

@ -267,7 +262,7 @@ namespace kiwix {
 				  contentLength, contentType);
  }

-  string Reader::getTitle() {
+  string Reader::getTitle() const {
    string value;
    this->getMetatag("Title", value);
    if (value.empty()) {
@ -279,19 +274,19 @@ namespace kiwix {
    return value;
  }

-  string Reader::getName() {
+  string Reader::getName() const {
    string value;
    this->getMetatag("Name", value);
    return value;
  }

-  string Reader::getTags() {
+  string Reader::getTags() const {
    string value;
    this->getMetatag("Tags", value);
    return value;
  }

-  string Reader::getDescription() {
+  string Reader::getDescription() const{
    string value;
    this->getMetatag("Description", value);

@ -303,31 +298,31 @@ namespace kiwix {
    return value;
  }

-  string Reader::getLanguage() {
+  string Reader::getLanguage() const {
    string value;
    this->getMetatag("Language", value);
    return value;
  }

-  string Reader::getDate() {
+  string Reader::getDate() const {
    string value;
    this->getMetatag("Date", value);
    return value;
  }

-  string Reader::getCreator() {
+  string Reader::getCreator() const {
    string value;
    this->getMetatag("Creator", value);
    return value;
  }

-  string Reader::getPublisher() {
+  string Reader::getPublisher() const {
    string value;
    this->getMetatag("Publisher", value);
    return value;
  }

-  string Reader::getOrigId() {
+  string Reader::getOrigId() const {
    string value;
    this->getMetatag("startfileuid", value);
    if(value.empty())
@ -355,17 +350,13 @@ namespace kiwix {
  }

  /* Return the first page URL */
-  string Reader::getFirstPageUrl() {
-    string url;
-
+  string Reader::getFirstPageUrl() const {
    zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
    zim::Article article = zimFileHandler->getArticle(firstPageOffset);
-    url = article.getLongUrl();
-
-    return url;
+    return article.getLongUrl();
  }

-  bool Reader::parseUrl(const string &url, char *ns, string &title) {
+  bool Reader::parseUrl(const string &url, char *ns, string &title) const {
    /* Offset to visit the url */
    unsigned int urlLength = url.size();
    unsigned int offset = 0;
@ -395,130 +386,113 @@ namespace kiwix {
  }

  /* Return article by url */
-  bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) {
-    bool retVal = false;
-    
-    if (this->zimFileHandler != NULL) {
-      
-      /* Parse the url */
-      char ns = 0;
-      string titleStr;
-      this->parseUrl(url, &ns, titleStr);
-      
-      /* Main page */
-      if (titleStr.empty() && ns == 0) {
-	this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
-      }
-      
-      /* Extract the content from the zim file */
-      std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
-      
-      /* Test if the article was found */
-      if (resultPair.first == true) {
-	article = zimFileHandler->getArticle(resultPair.second.getIndex());
-	retVal = true;
-      }
-
+  bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const {
+    if (this->zimFileHandler == NULL) {
+        return false;
    }
-    
-    return retVal;
+
+    /* Parse the url */
+    char ns = 0;
+    string urlStr;
+    this->parseUrl(url, &ns, urlStr);
+      
+    /* Main page */
+    if (urlStr.empty() && ns == 0) {
+        this->parseUrl(this->getMainPageUrl(), &ns, urlStr);
+    }
+
+    /* Extract the content from the zim file */
+    article = zimFileHandler->getArticle(ns, urlStr);
+    return article.good();
  }

  /* Return the mimeType without the content */
-  bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) {
-    bool retVal = false;
-
-    if (this->zimFileHandler != NULL) {
-
-      zim::Article article;
-      if (this->getArticleObjectByDecodedUrl(url, article)) {
-	  try {
-	    mimeType = string(article.getMimeType().data(), article.getMimeType().size());
-	  } catch (exception &e) {
-	    cerr << "Unable to get the mimetype for "<< url << ":" << e.what() << endl;
-	    mimeType = "application/octet-stream";
-	  }	
-	  retVal = true;
-      } else {
-	mimeType = "";
-      }
-     
+  bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const {
+    if (this->zimFileHandler == NULL) {
+        return false;
    }

-    return retVal;
+    zim::Article article;
+    if (this->getArticleObjectByDecodedUrl(url, article)) {
+        try {
+            mimeType = article.getMimeType();
+        } catch (exception &e) {
+            cerr << "Unable to get the mimetype for " << url << ":" << e.what() << endl;
+            mimeType = "application/octet-stream";
+        }
+        return true;
+    } else {
+        mimeType = "";
+        return false;
+    }
  }

  /* Get a content from a zim file */
-  bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
+  bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
    return this->getContentByEncodedUrl(url, content, contentLength, contentType);
  }

-  bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
+  bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const {
    return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl);
  }

-  bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
+  bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
    std::string stubRedirectUrl;
    return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); 
  }

-  bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
+  bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
    std::string stubRedirectUrl;
    return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
  }

-  bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
-    bool retVal = false;
+  bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const {
    content="";
    contentType="";
    contentLength = 0;
-    if (this->zimFileHandler != NULL) {

-      zim::Article article;
-      if (this->getArticleObjectByDecodedUrl(url, article)) {
-	
-	/* If redirect */
-	unsigned int loopCounter = 0;
-	while (article.isRedirect() && loopCounter++<42) {
-	  article = article.getRedirectArticle();
-	}
-
-	if (loopCounter < 42) {
-	  /* Compute base url (might be different from the url if redirects */
-	  baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
-	  
-	  /* Get the content mime-type */
-	  try {
-	    contentType = string(article.getMimeType().data(), article.getMimeType().size());
-	  } catch (exception &e) {
-	    cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
-	    contentType = "application/octet-stream";
-	  }
-
-	  /* Get the data */
-	  content = string(article.getData().data(), article.getArticleSize());
-	}
-
-	/* Try to set a stub HTML header/footer if necesssary */
-	if (contentType.find("text/html") != string::npos && 
-	    content.find("<body") == std::string::npos &&
-	    content.find("<BODY") == std::string::npos) {
-	  content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
-	}
-
-	/* Get the data length */
-	contentLength = article.getArticleSize();
-
-	/* Set return value */
-	retVal = true;
-      }
+    zim::Article article;
+    if ( ! this->getArticleObjectByDecodedUrl(url, article)) {
+        return false;
    }

-    return retVal;
+    /* If redirect */
+    unsigned int loopCounter = 0;
+    while (article.isRedirect() && loopCounter++<42) {
+        article = article.getRedirectArticle();
+    }
+
+    if (loopCounter < 42) {
+        /* Compute base url (might be different from the url if redirects */
+        baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
+
+        /* Get the content mime-type */
+        try {
+            contentType = string(article.getMimeType().data(), article.getMimeType().size());
+        } catch (exception &e) {
+            cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
+            contentType = "application/octet-stream";
+        }
+
+        /* Get the data */
+        content = string(article.getData().data(), article.getArticleSize());
+    }
+
+    /* Try to set a stub HTML header/footer if necesssary */
+    if (contentType.find("text/html") != string::npos &&
+        content.find("<body") == std::string::npos &&
+        content.find("<BODY") == std::string::npos) {
+        content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
+    }
+
+    /* Get the data length */
+    contentLength = article.getArticleSize();
+
+    return true;
  }

  /* Check if an article exists */
-  bool Reader::urlExists(const string &url) {
+  bool Reader::urlExists(const string &url) const {
    char ns = 0;
    string titleStr;
    this->parseUrl(url, &ns, titleStr);
@ -528,7 +502,7 @@ namespace kiwix {
  }

  /* Does the ZIM file has a fulltext index */
-  bool Reader::hasFulltextIndex() {
+  bool Reader::hasFulltextIndex() const {
    return this->urlExists("/Z/fulltextIndex/xapian");
  }

@ -604,7 +578,7 @@ namespace kiwix {
    return retVal;
  }

-  std::vector<std::string> Reader::getTitleVariants(const std::string &title) {
+  std::vector<std::string> Reader::getTitleVariants(const std::string &title) const {
    std::vector<std::string> variants;
    variants.push_back(title);
    variants.push_back(kiwix::ucFirst(title));
@ -660,12 +634,12 @@ namespace kiwix {
  }

  /* Check if the file has as checksum */
-  bool Reader::canCheckIntegrity() {
+  bool Reader::canCheckIntegrity() const {
    return this->zimFileHandler->getChecksum() != "";
  }

  /* Return true if corrupted, false otherwise */
-  bool Reader::isCorrupted() {
+  bool Reader::isCorrupted() const {
    try {
      if (this->zimFileHandler->verify() == true)
 	return false;
@ -678,7 +652,7 @@ namespace kiwix {
  }

  /* Return the file size, works also for splitted files */
-  unsigned int Reader::getFileSize() {
+  unsigned int Reader::getFileSize() const {
    zim::File *file = this->getZimFileHandler();
    zim::offset_type size = 0;

--- a/src/searcher.cpp
+++ b/src/searcher.cpp
@ -18,8 +18,11 @@
 */

 #include "searcher.h"
+#include "reader.h"
 #include "kiwixlib-resources.h"

+#include <zim/search.h>
+
 #ifdef ENABLE_CTPP2
 #include <ctpp2/CDT.hpp>
 #include <ctpp2/CTPP2FileLogger.hpp>
@ -32,8 +35,39 @@ using namespace CTPP;

 namespace kiwix {

+  class _Result : public Result {
+    public:
+      _Result(Searcher* searcher, zim::Search::iterator& iterator);
+      virtual ~_Result() {};
+
+      virtual std::string get_url();
+      virtual std::string get_title();
+      virtual int get_score();
+      virtual std::string get_snippet();
+      virtual int get_wordCount();
+      virtual int get_size();
+
+    private:
+      Searcher* searcher;
+      zim::Search::iterator iterator;
+  };
+
+  struct SearcherInternal {
+    const zim::Search *_search;
+    zim::Search::iterator current_iterator;
+
+    SearcherInternal() :  _search(NULL) {}
+    ~SearcherInternal() {
+        if ( _search != NULL )
+            delete _search;
+    }
+
+  };
+
  /* Constructor */
-  Searcher::Searcher() :
+  Searcher::Searcher(Reader* reader) :
+    reader(reader),
+    internal(new SearcherInternal()),
    searchPattern(""),
    protocolPrefix("zim://"),
    searchProtocolPrefix("search://?"),
@ -47,7 +81,9 @@ namespace kiwix {
  }
  
  /* Destructor */
-  Searcher::~Searcher() {}
+  Searcher::~Searcher() {
+      delete internal;
+  }
  
  /* Search strings in the database */
  void Searcher::search(std::string &search, unsigned int resultStart,
@ -80,12 +116,28 @@ namespace kiwix {
      this->resultStart = resultStart;
      this->resultEnd = resultEnd;
      string unaccentedSearch = removeAccents(search);
-      searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
+      internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
+      internal->current_iterator = internal->_search->begin();
+      this->estimatedResultCount = internal->_search->get_matches_estimated();
    }

    return;
  }

+  void Searcher::restart_search() {
+    internal->current_iterator = internal->_search->begin();
+  }
+
+  Result* Searcher::getNextResult() {
+    if (internal->current_iterator != internal->_search->end()) {
+      Result* result = new _Result(this, internal->current_iterator);
+      internal->current_iterator++;
+      return result;
+    }
+    return NULL;
+  }
+
+
  /* Reset the results */
  void Searcher::reset() {
    this->estimatedResultCount = 0;
@ -112,6 +164,36 @@ namespace kiwix {
    this->contentHumanReadableId = contentHumanReadableId;
  }

+  _Result::_Result(Searcher* searcher, zim::Search::iterator& iterator):
+    searcher(searcher),
+    iterator(iterator)
+  {
+  }
+
+  std::string _Result::get_url() {
+    return iterator.get_url();
+  }
+
+  std::string _Result::get_title() {
+    return iterator.get_title();
+  }
+
+  int _Result::get_score() {
+    return iterator.get_score();
+  }
+
+  std::string _Result::get_snippet() {
+    return iterator.get_snippet();
+  }
+
+  int _Result::get_size() {
+      return iterator.get_size();
+  }
+
+  int _Result::get_wordCount() {
+      return iterator.get_wordCount();
+  }
+
 #ifdef ENABLE_CTPP2
  
  string Searcher::getHtml() {