From 94670847ef31bfe101f3b325a60664e8928da5ef Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Apr 2017 00:32:41 +0200 Subject: [PATCH 1/6] Use const when possible in the reader. Most read operation do not modify the content. So let's use const as far as possible. --- include/reader.h | 72 +++++++++++++++++++++---------------------- src/reader.cpp | 80 +++++++++++++++++++++++------------------------- 2 files changed, 75 insertions(+), 77 deletions(-) diff --git a/include/reader.h b/include/reader.h index f9bf9db0a..ccc2ef4c3 100644 --- a/include/reader.h +++ b/include/reader.h @@ -43,45 +43,45 @@ namespace kiwix { ~Reader(); void reset(); - unsigned int getArticleCount(); - unsigned int getMediaCount(); - unsigned int getGlobalCount(); - string getZimFilePath(); - string getId(); - string getRandomPageUrl(); - string getFirstPageUrl(); - string getMainPageUrl(); - bool getMetatag(const string &url, string &content); - string getTitle(); - string getDescription(); - string getLanguage(); - string getName(); - string getTags(); - string getDate(); - string getCreator(); - string getPublisher(); - string getOrigId(); - bool getFavicon(string &content, string &mimeType); - bool getPageUrlFromTitle(const string &title, string &url); - bool getMimeTypeByUrl(const string &url, string &mimeType); - bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); - bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl); - bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); - bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl); - bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); + unsigned int getArticleCount() const; + unsigned int getMediaCount() const; + unsigned int getGlobalCount() const; + string getZimFilePath() const; + string getId() const; + string getRandomPageUrl() const; + string getFirstPageUrl() const; + string getMainPageUrl() const; + bool getMetatag(const string &url, string &content) const; + string getTitle() const; + string getDescription() const; + string getLanguage() const; + string getName() const; + string getTags() const; + string getDate() const; + string getCreator() const; + string getPublisher() const; + string getOrigId() const; + bool getFavicon(string &content, string &mimeType) const; + bool getPageUrlFromTitle(const string &title, string &url) const; + bool getMimeTypeByUrl(const string &url, string &mimeType) const; + bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; + bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const; + bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; + bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const; + bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true); bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount); - bool urlExists(const string &url); - bool hasFulltextIndex(); - std::vector getTitleVariants(const std::string &title); + bool urlExists(const string &url) const; + bool hasFulltextIndex() const; + std::vector getTitleVariants(const std::string &title) const; bool getNextSuggestion(string &title); bool getNextSuggestion(string &title, string &url); - bool canCheckIntegrity(); - bool isCorrupted(); - bool parseUrl(const string &url, char *ns, string &title); - unsigned int getFileSize(); - zim::File* getZimFileHandler(); - bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article); + bool canCheckIntegrity() const; + bool isCorrupted() const; + bool parseUrl(const string &url, char *ns, string &title) const; + unsigned int getFileSize() const; + zim::File* getZimFileHandler() const; + bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const; protected: zim::File* zimFileHandler; @@ -96,7 +96,7 @@ namespace kiwix { std::vector< std::vector >::iterator suggestionsOffset; private: - std::map parseCounterMetadata(); + std::map parseCounterMetadata() const; }; } diff --git a/src/reader.cpp b/src/reader.cpp index 85bbe722d..8a58d60cd 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -87,7 +87,7 @@ namespace kiwix { } } - zim::File* Reader::getZimFileHandler() { + zim::File* Reader::getZimFileHandler() const { return this->zimFileHandler; } @@ -96,8 +96,8 @@ namespace kiwix { this->currentArticleOffset = this->firstArticleOffset; } - std::map Reader::parseCounterMetadata() { - std::map counters; + std::map Reader::parseCounterMetadata() const { + std::map counters; string content, mimeType, item, counterString; unsigned int contentLength, counter; string counterUrl = "/M/Counter"; @@ -119,8 +119,8 @@ namespace kiwix { } /* Get the count of articles which can be indexed/displayed */ - unsigned int Reader::getArticleCount() { - std::map counterMap = this->parseCounterMetadata(); + unsigned int Reader::getArticleCount() const { + std::map counterMap = this->parseCounterMetadata(); unsigned int counter = 0; if (counterMap.empty()) { @@ -135,8 +135,8 @@ namespace kiwix { } /* Get the count of medias content in the ZIM file */ - unsigned int Reader::getMediaCount() { - std::map counterMap = this->parseCounterMetadata(); + unsigned int Reader::getMediaCount() const { + std::map counterMap = this->parseCounterMetadata(); unsigned int counter = 0; if (counterMap.empty()) @@ -161,19 +161,19 @@ namespace kiwix { } /* Get the total of all items of a ZIM file, redirects included */ - unsigned int Reader::getGlobalCount() { + unsigned int Reader::getGlobalCount() const { return this->zimFileHandler->getCountArticles(); } /* Return the UID of the ZIM file */ - string Reader::getId() { + string Reader::getId() const { std::ostringstream s; s << this->zimFileHandler->getFileheader().getUuid(); return s.str(); } /* Return a page url from a title */ - bool Reader::getPageUrlFromTitle(const string &title, string &url) { + bool Reader::getPageUrlFromTitle(const string &title, string &url) const { /* Extract the content from the zim file */ std::pair resultPair = zimFileHandler->findxByTitle('A', title); @@ -197,7 +197,7 @@ namespace kiwix { } /* Return an URL from a title*/ - string Reader::getRandomPageUrl() { + string Reader::getRandomPageUrl() const { zim::Article article; zim::size_type idx; std::string mainPageUrl = this->getMainPageUrl(); @@ -212,7 +212,7 @@ namespace kiwix { } /* Return the welcome page URL */ - string Reader::getMainPageUrl() { + string Reader::getMainPageUrl() const { string url = ""; if (this->zimFileHandler->getFileheader().hasMainPage()) { @@ -229,7 +229,7 @@ namespace kiwix { return url; } - bool Reader::getFavicon(string &content, string &mimeType) { + bool Reader::getFavicon(string &content, string &mimeType) const { unsigned int contentLength = 0; this->getContentByUrl( "/-/favicon.png", content, @@ -254,12 +254,12 @@ namespace kiwix { return content.empty() ? false : true; } - string Reader::getZimFilePath() { + string Reader::getZimFilePath() const { return this->zimFilePath; } /* Return a metatag value */ - bool Reader::getMetatag(const string &name, string &value) { + bool Reader::getMetatag(const string &name, string &value) const { unsigned int contentLength = 0; string contentType = ""; @@ -267,7 +267,7 @@ namespace kiwix { contentLength, contentType); } - string Reader::getTitle() { + string Reader::getTitle() const { string value; this->getMetatag("Title", value); if (value.empty()) { @@ -279,19 +279,19 @@ namespace kiwix { return value; } - string Reader::getName() { + string Reader::getName() const { string value; this->getMetatag("Name", value); return value; } - string Reader::getTags() { + string Reader::getTags() const { string value; this->getMetatag("Tags", value); return value; } - string Reader::getDescription() { + string Reader::getDescription() const{ string value; this->getMetatag("Description", value); @@ -303,31 +303,31 @@ namespace kiwix { return value; } - string Reader::getLanguage() { + string Reader::getLanguage() const { string value; this->getMetatag("Language", value); return value; } - string Reader::getDate() { + string Reader::getDate() const { string value; this->getMetatag("Date", value); return value; } - string Reader::getCreator() { + string Reader::getCreator() const { string value; this->getMetatag("Creator", value); return value; } - string Reader::getPublisher() { + string Reader::getPublisher() const { string value; this->getMetatag("Publisher", value); return value; } - string Reader::getOrigId() { + string Reader::getOrigId() const { string value; this->getMetatag("startfileuid", value); if(value.empty()) @@ -355,9 +355,7 @@ namespace kiwix { } /* Return the first page URL */ - string Reader::getFirstPageUrl() { - string url; - + string Reader::getFirstPageUrl() const { zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); zim::Article article = zimFileHandler->getArticle(firstPageOffset); url = article.getLongUrl(); @@ -365,7 +363,7 @@ namespace kiwix { return url; } - bool Reader::parseUrl(const string &url, char *ns, string &title) { + bool Reader::parseUrl(const string &url, char *ns, string &title) const { /* Offset to visit the url */ unsigned int urlLength = url.size(); unsigned int offset = 0; @@ -395,7 +393,7 @@ namespace kiwix { } /* Return article by url */ - bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) { + bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const { bool retVal = false; if (this->zimFileHandler != NULL) { @@ -425,7 +423,7 @@ namespace kiwix { } /* Return the mimeType without the content */ - bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) { + bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const { bool retVal = false; if (this->zimFileHandler != NULL) { @@ -449,25 +447,25 @@ namespace kiwix { } /* Get a content from a zim file */ - bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) { + bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { return this->getContentByEncodedUrl(url, content, contentLength, contentType); } - bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) { + bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl); } - bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) { + bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { std::string stubRedirectUrl; return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); } - bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) { + bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { std::string stubRedirectUrl; return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); } - bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) { + bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { bool retVal = false; content=""; contentType=""; @@ -518,7 +516,7 @@ namespace kiwix { } /* Check if an article exists */ - bool Reader::urlExists(const string &url) { + bool Reader::urlExists(const string &url) const { char ns = 0; string titleStr; this->parseUrl(url, &ns, titleStr); @@ -528,7 +526,7 @@ namespace kiwix { } /* Does the ZIM file has a fulltext index */ - bool Reader::hasFulltextIndex() { + bool Reader::hasFulltextIndex() const { return this->urlExists("/Z/fulltextIndex/xapian"); } @@ -604,7 +602,7 @@ namespace kiwix { return retVal; } - std::vector Reader::getTitleVariants(const std::string &title) { + std::vector Reader::getTitleVariants(const std::string &title) const { std::vector variants; variants.push_back(title); variants.push_back(kiwix::ucFirst(title)); @@ -660,12 +658,12 @@ namespace kiwix { } /* Check if the file has as checksum */ - bool Reader::canCheckIntegrity() { + bool Reader::canCheckIntegrity() const { return this->zimFileHandler->getChecksum() != ""; } /* Return true if corrupted, false otherwise */ - bool Reader::isCorrupted() { + bool Reader::isCorrupted() const { try { if (this->zimFileHandler->verify() == true) return false; @@ -678,7 +676,7 @@ namespace kiwix { } /* Return the file size, works also for splitted files */ - unsigned int Reader::getFileSize() { + unsigned int Reader::getFileSize() const { zim::File *file = this->getZimFileHandler(); zim::offset_type size = 0; From 37f29da63ee90b40ee2a21f62c60f11588c29822 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Apr 2017 00:39:02 +0200 Subject: [PATCH 2/6] Beautify a bit the code. No real change. Just do less code or use higher level API. --- src/reader.cpp | 194 ++++++++++++++++++++++--------------------------- 1 file changed, 85 insertions(+), 109 deletions(-) diff --git a/src/reader.cpp b/src/reader.cpp index 8a58d60cd..39fd8d901 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -98,12 +98,12 @@ namespace kiwix { std::map Reader::parseCounterMetadata() const { std::map counters; - string content, mimeType, item, counterString; - unsigned int contentLength, counter; - string counterUrl = "/M/Counter"; + string mimeType, item, counterString; + unsigned int counter; - this->getContentByUrl(counterUrl, content, contentLength, mimeType); - stringstream ssContent(content); + zim::Article article = this->zimFileHandler->getArticle('M',"Counter"); + + stringstream ssContent(article.getData()); while(getline(ssContent, item, ';')) { stringstream ssItem(item); @@ -175,25 +175,20 @@ namespace kiwix { /* Return a page url from a title */ bool Reader::getPageUrlFromTitle(const string &title, string &url) const { /* Extract the content from the zim file */ - std::pair resultPair = zimFileHandler->findxByTitle('A', title); + zim::Article article = this->zimFileHandler->getArticleByTitle('A', title); - /* Test if the article was found */ - if (resultPair.first == true) { - - /* Get the article */ - zim::Article article = *resultPair.second; - - /* If redirect */ - unsigned int loopCounter = 0; - while (article.isRedirect() && loopCounter++<42) { - article = article.getRedirectArticle(); - } - - url = article.getLongUrl(); - return true; + if ( ! article.good() ) + { + return false; } - return false; + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++<42) { + article = article.getRedirectArticle(); + } + + url = article.getLongUrl(); + return true; } /* Return an URL from a title*/ @@ -208,7 +203,7 @@ namespace kiwix { article = zimFileHandler->getArticle(idx); } while (article.getLongUrl() == mainPageUrl); - return article.getLongUrl().c_str(); + return article.getLongUrl(); } /* Return the welcome page URL */ @@ -358,9 +353,7 @@ namespace kiwix { string Reader::getFirstPageUrl() const { zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); zim::Article article = zimFileHandler->getArticle(firstPageOffset); - url = article.getLongUrl(); - - return url; + return article.getLongUrl(); } bool Reader::parseUrl(const string &url, char *ns, string &title) const { @@ -394,56 +387,44 @@ namespace kiwix { /* Return article by url */ bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const { - bool retVal = false; - - if (this->zimFileHandler != NULL) { - - /* Parse the url */ - char ns = 0; - string titleStr; - this->parseUrl(url, &ns, titleStr); - - /* Main page */ - if (titleStr.empty() && ns == 0) { - this->parseUrl(this->getMainPageUrl(), &ns, titleStr); - } - - /* Extract the content from the zim file */ - std::pair resultPair = zimFileHandler->findx(ns, titleStr); - - /* Test if the article was found */ - if (resultPair.first == true) { - article = zimFileHandler->getArticle(resultPair.second.getIndex()); - retVal = true; - } - + if (this->zimFileHandler == NULL) { + return false; } - - return retVal; + + /* Parse the url */ + char ns = 0; + string urlStr; + this->parseUrl(url, &ns, urlStr); + + /* Main page */ + if (urlStr.empty() && ns == 0) { + this->parseUrl(this->getMainPageUrl(), &ns, urlStr); + } + + /* Extract the content from the zim file */ + article = zimFileHandler->getArticle(ns, urlStr); + return article.good(); } /* Return the mimeType without the content */ bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const { - bool retVal = false; - - if (this->zimFileHandler != NULL) { - - zim::Article article; - if (this->getArticleObjectByDecodedUrl(url, article)) { - try { - mimeType = string(article.getMimeType().data(), article.getMimeType().size()); - } catch (exception &e) { - cerr << "Unable to get the mimetype for "<< url << ":" << e.what() << endl; - mimeType = "application/octet-stream"; - } - retVal = true; - } else { - mimeType = ""; - } - + if (this->zimFileHandler == NULL) { + return false; } - return retVal; + zim::Article article; + if (this->getArticleObjectByDecodedUrl(url, article)) { + try { + mimeType = article.getMimeType(); + } catch (exception &e) { + cerr << "Unable to get the mimetype for " << url << ":" << e.what() << endl; + mimeType = "application/octet-stream"; + } + return true; + } else { + mimeType = ""; + return false; + } } /* Get a content from a zim file */ @@ -466,53 +447,48 @@ namespace kiwix { } bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { - bool retVal = false; content=""; contentType=""; contentLength = 0; - if (this->zimFileHandler != NULL) { - zim::Article article; - if (this->getArticleObjectByDecodedUrl(url, article)) { - - /* If redirect */ - unsigned int loopCounter = 0; - while (article.isRedirect() && loopCounter++<42) { - article = article.getRedirectArticle(); - } - - if (loopCounter < 42) { - /* Compute base url (might be different from the url if redirects */ - baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl(); - - /* Get the content mime-type */ - try { - contentType = string(article.getMimeType().data(), article.getMimeType().size()); - } catch (exception &e) { - cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl; - contentType = "application/octet-stream"; - } - - /* Get the data */ - content = string(article.getData().data(), article.getArticleSize()); - } - - /* Try to set a stub HTML header/footer if necesssary */ - if (contentType.find("text/html") != string::npos && - content.find("" + content + ""; - } - - /* Get the data length */ - contentLength = article.getArticleSize(); - - /* Set return value */ - retVal = true; - } + zim::Article article; + if ( ! this->getArticleObjectByDecodedUrl(url, article)) { + return false; } - return retVal; + /* If redirect */ + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++<42) { + article = article.getRedirectArticle(); + } + + if (loopCounter < 42) { + /* Compute base url (might be different from the url if redirects */ + baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl(); + + /* Get the content mime-type */ + try { + contentType = string(article.getMimeType().data(), article.getMimeType().size()); + } catch (exception &e) { + cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl; + contentType = "application/octet-stream"; + } + + /* Get the data */ + content = string(article.getData().data(), article.getArticleSize()); + } + + /* Try to set a stub HTML header/footer if necesssary */ + if (contentType.find("text/html") != string::npos && + content.find("" + content + ""; + } + + /* Get the data length */ + contentLength = article.getArticleSize(); + + return true; } /* Check if an article exists */ From 5ca419bee79c9310fa68b96a9bdee6ef1a5860cc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Apr 2017 00:49:12 +0200 Subject: [PATCH 3/6] =?UTF-8?q?Use=20the=20new=20search=20API=C2=A0in=20zi?= =?UTF-8?q?mlib.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not use xapian anymore. This is all handled by zimlib. --- include/meson.build | 4 --- include/searcher.h | 41 +++++++++++---------- meson.build | 7 +--- src/meson.build | 8 +---- src/searcher.cpp | 88 +++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 109 insertions(+), 39 deletions(-) diff --git a/include/meson.build b/include/meson.build index 608423170..0e890d707 100644 --- a/include/meson.build +++ b/include/meson.build @@ -5,10 +5,6 @@ headers = [ 'searcher.h' ] -if xapian_dep.found() - headers += ['xapianSearcher.h'] -endif - install_headers(headers, subdir:'kiwix') install_headers( diff --git a/include/searcher.h b/include/searcher.h index f8999501a..2a1dd3960 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -35,30 +35,31 @@ using namespace std; -class Result -{ - public: - virtual ~Result() {}; - virtual std::string get_url() = 0; - virtual std::string get_title() = 0; - virtual int get_score() = 0; - virtual std::string get_snippet() = 0; - virtual int get_wordCount() = 0; - virtual int get_size() = 0; -}; - namespace kiwix { + class Reader; + class Result { + public: + virtual ~Result() {}; + virtual std::string get_url() = 0; + virtual std::string get_title() = 0; + virtual int get_score() = 0; + virtual std::string get_snippet() = 0; + virtual int get_wordCount() = 0; + virtual int get_size() = 0; + }; + + struct SearcherInternal; class Searcher { public: - Searcher(); - virtual ~Searcher(); + Searcher(Reader* reader); + ~Searcher(); void search(std::string &search, unsigned int resultStart, unsigned int resultEnd, const bool verbose=false); - virtual Result* getNextResult() = 0; - virtual void restart_search() = 0; + Result* getNextResult(); + void restart_search(); unsigned int getEstimatedResultCount(); bool setProtocolPrefix(const std::string prefix); bool setSearchProtocolPrefix(const std::string prefix); @@ -71,10 +72,12 @@ namespace kiwix { protected: std::string beautifyInteger(const unsigned int number); - virtual void closeIndex() = 0; - virtual void searchInIndex(string &search, const unsigned int resultStart, - const unsigned int resultEnd, const bool verbose=false) = 0; + void closeIndex() ; + void searchInIndex(string &search, const unsigned int resultStart, + const unsigned int resultEnd, const bool verbose=false); + Reader* reader; + SearcherInternal* internal; std::string searchPattern; std::string protocolPrefix; std::string searchProtocolPrefix; diff --git a/meson.build b/meson.build index 75f256eef..19fce8c4f 100644 --- a/meson.build +++ b/meson.build @@ -61,9 +61,7 @@ else endif endif -xapian_dep = dependency('xapian-core', required:false) - -all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep] +all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep] if has_ctpp2_dep all_deps += [ctpp2_dep] endif @@ -80,9 +78,6 @@ subdir('static') subdir('src') pkg_requires = ['libzim', 'icu-i18n', 'pugixml'] -if xapian_dep.found() - pkg_requires += ['xapian-core'] -endif extra_libs = [] extra_cflags = '' diff --git a/src/meson.build b/src/meson.build index 13a77332d..5cbac1f57 100644 --- a/src/meson.build +++ b/src/meson.build @@ -8,16 +8,10 @@ kiwix_sources = [ 'common/regexTools.cpp', 'common/stringTools.cpp', 'common/networkTools.cpp', - 'common/otherTools.cpp', - 'xapian/htmlparse.cc', - 'xapian/myhtmlparse.cc' + 'common/otherTools.cpp' ] kiwix_sources += lib_resources -if xapian_dep.found() - kiwix_sources += ['xapianSearcher.cpp'] -endif - if get_option('android') subdir('android') endif diff --git a/src/searcher.cpp b/src/searcher.cpp index f458c6129..380ecda4c 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -18,8 +18,11 @@ */ #include "searcher.h" +#include "reader.h" #include "kiwixlib-resources.h" +#include + #ifdef ENABLE_CTPP2 #include #include @@ -32,8 +35,39 @@ using namespace CTPP; namespace kiwix { + class _Result : public Result { + public: + _Result(Searcher* searcher, zim::Search::iterator& iterator); + virtual ~_Result() {}; + + virtual std::string get_url(); + virtual std::string get_title(); + virtual int get_score(); + virtual std::string get_snippet(); + virtual int get_wordCount(); + virtual int get_size(); + + private: + Searcher* searcher; + zim::Search::iterator iterator; + }; + + struct SearcherInternal { + const zim::Search *_search; + zim::Search::iterator current_iterator; + + SearcherInternal() : _search(NULL) {} + ~SearcherInternal() { + if ( _search != NULL ) + delete _search; + } + + }; + /* Constructor */ - Searcher::Searcher() : + Searcher::Searcher(Reader* reader) : + reader(reader), + internal(new SearcherInternal()), searchPattern(""), protocolPrefix("zim://"), searchProtocolPrefix("search://?"), @@ -47,7 +81,9 @@ namespace kiwix { } /* Destructor */ - Searcher::~Searcher() {} + Searcher::~Searcher() { + delete internal; + } /* Search strings in the database */ void Searcher::search(std::string &search, unsigned int resultStart, @@ -80,12 +116,28 @@ namespace kiwix { this->resultStart = resultStart; this->resultEnd = resultEnd; string unaccentedSearch = removeAccents(search); - searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose); + internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd); + internal->current_iterator = internal->_search->begin(); + this->estimatedResultCount = internal->_search->get_matches_estimated(); } return; } + void Searcher::restart_search() { + internal->current_iterator = internal->_search->begin(); + } + + Result* Searcher::getNextResult() { + if (internal->current_iterator != internal->_search->end()) { + Result* result = new _Result(this, internal->current_iterator); + internal->current_iterator++; + return result; + } + return NULL; + } + + /* Reset the results */ void Searcher::reset() { this->estimatedResultCount = 0; @@ -112,6 +164,36 @@ namespace kiwix { this->contentHumanReadableId = contentHumanReadableId; } + _Result::_Result(Searcher* searcher, zim::Search::iterator& iterator): + searcher(searcher), + iterator(iterator) + { + } + + std::string _Result::get_url() { + return iterator.get_url(); + } + + std::string _Result::get_title() { + return iterator.get_title(); + } + + int _Result::get_score() { + return iterator.get_score(); + } + + std::string _Result::get_snippet() { + return iterator.get_snippet(); + } + + int _Result::get_size() { + return iterator.get_size(); + } + + int _Result::get_wordCount() { + return iterator.get_wordCount(); + } + #ifdef ENABLE_CTPP2 string Searcher::getHtml() { From 9abdc6ce02b11cb9d534484f231641e8a56706d7 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Apr 2017 12:24:36 +0200 Subject: [PATCH 4/6] Move to c++11. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zimlib move to c++11 and so, we need a c++11 compiler. --- meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 19fce8c4f..f89ce8e60 100644 --- a/meson.build +++ b/meson.build @@ -1,6 +1,7 @@ project('kiwixlib', 'cpp', version : '0.1.0', - license : 'GPL') + license : 'GPL', + default_options : ['c_std=c11', 'cpp_std=c++11']) compiler = meson.get_compiler('cpp') find_library_in_compiler = meson.version().version_compare('>=0.31.0') From 44a77f58467c536c12525f906c317202fd6712ed Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Apr 2017 12:25:05 +0200 Subject: [PATCH 5/6] Update android jni wrapper to new API. --- src/android/kiwix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index 94a6ab79d..f13c7d9d3 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -9,7 +9,7 @@ #include "unicode/putil.h" #include "reader.h" -#include "xapianSearcher.h" +#include "searcher.h" #include "common/base64.h" #include @@ -23,7 +23,7 @@ /* global variables */ kiwix::Reader *reader = NULL; -kiwix::XapianSearcher *searcher = NULL; +kiwix::Searcher *searcher = NULL; static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER; @@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN searcher = NULL; try { if (searcher != NULL) delete searcher; - searcher = new kiwix::XapianSearcher(cPath, NULL); + searcher = new kiwix::Searcher(reader); } catch (...) { searcher = NULL; retVal = JNI_FALSE; @@ -460,7 +460,7 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery (JNIEnv *env, jclass obj, jstring query, jint count) { std::string cQuery = jni2c(query, env); unsigned int cCount = jni2c(count); - Result *p_result; + kiwix::Result *p_result; std::string result; pthread_mutex_lock(&searcherLock); From 3be4d92c537c64518250373df13214a3f5976552 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Apr 2017 15:31:03 +0200 Subject: [PATCH 6/6] Correctly check if we are compiling for linux or not. In C++11 `linux` is not a reserved word, so compilators do not define it. A correct way to check if we are compiling for linux is to check for `__linux__`. --- src/common/networkTools.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/networkTools.cpp b/src/common/networkTools.cpp index 268a49bed..92c4211bc 100644 --- a/src/common/networkTools.cpp +++ b/src/common/networkTools.cpp @@ -85,7 +85,7 @@ std::map kiwix::getNetworkInterfaces() { /* some systems have ifr_addr.sa_len and adjust the length that * way, but not mine. weird */ -#ifndef linux +#ifndef __linux__ len=IFNAMSIZ + ifreq->ifr_addr.sa_len; #else len=sizeof *ifreq;