From 4a51dd9e009f8c2e41bf6bffd7bdbb74c349d422 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 4 Jul 2017 16:57:22 +0200 Subject: [PATCH 1/5] Fix memory link. If a `searcher` is already created we must delete it. If we set the pointer to NULL before, we will never delete it. --- src/android/kiwix.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index fa8daafb7..2de373164 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -486,7 +486,6 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex( std::string cPath = jni2c(path, env); pthread_mutex_lock(&searcherLock); - searcher = NULL; try { if (searcher != NULL) { delete searcher; From 8d39b0b3433255fc9ef7ef4336e26b5b979b6f5a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 5 Jul 2017 11:28:08 +0200 Subject: [PATCH 2/5] Search result objects now have a get_content method. This was not necessary when searching in only one zim file as `url` was enough to get the article (and so the content). If we want to search in several zim in the same time, we need a way to get the content directly. --- include/searcher.h | 1 + include/xapianSearcher.h | 1 + src/searcher.cpp | 8 ++++++++ src/xapianSearcher.cpp | 22 +++++++++++++++++----- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/include/searcher.h b/include/searcher.h index e5549778f..af28e521f 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -46,6 +46,7 @@ class Result virtual std::string get_title() = 0; virtual int get_score() = 0; virtual std::string get_snippet() = 0; + virtual std::string get_content() = 0; virtual int get_wordCount() = 0; virtual int get_size() = 0; }; diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index 907ca733e..dcbe5647a 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -43,6 +43,7 @@ class XapianResult : public Result virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); + virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); diff --git a/src/searcher.cpp b/src/searcher.cpp index 4bfeab355..d83adc7fb 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -45,6 +45,7 @@ class _Result : public Result virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); + virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); @@ -241,6 +242,13 @@ std::string _Result::get_snippet() { return iterator.get_snippet(); } +std::string _Result::get_content() +{ + if (iterator->good()) { + return iterator->getData(); + } + return ""; +} int _Result::get_size() { return iterator.get_size(); diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index aa0223d99..206888155 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -177,11 +177,10 @@ std::string XapianResult::get_snippet() We parse it and use the html dump to avoid remove html tags in the content and be able to nicely cut the text at random place. */ MyHtmlParser htmlParser; - std::string content; - unsigned int contentLength; - std::string contentType; - searcher->reader->getContentByUrl( - get_url(), content, contentLength, contentType); + std::string content = get_content(); + if (content.empty()) { + return content; + } try { htmlParser.parse_html(content, "UTF-8", true); } catch (...) { @@ -189,6 +188,19 @@ std::string XapianResult::get_snippet() return searcher->results.snippet(htmlParser.dump, 500); } +std::string XapianResult::get_content() +{ + if (!searcher->reader) { + return ""; + } + std::string content; + unsigned int contentLength; + std::string contentType; + searcher->reader->getContentByUrl( + get_url(), content, contentLength, contentType); + return content; +} + int XapianResult::get_size() { if (searcher->valuesmap.empty()) { From 3991e648edac4b74eeebb1fe093c03ce83a4c525 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 12 Jul 2017 18:36:57 +0200 Subject: [PATCH 3/5] Be able to get the reader index from a search result. --- include/searcher.h | 1 + include/xapianSearcher.h | 1 + src/searcher.cpp | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/include/searcher.h b/include/searcher.h index af28e521f..3801be91b 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -49,6 +49,7 @@ class Result virtual std::string get_content() = 0; virtual int get_wordCount() = 0; virtual int get_size() = 0; + virtual int get_readerIndex() = 0; }; struct SearcherInternal; diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index dcbe5647a..8c0cb3a71 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -46,6 +46,7 @@ class XapianResult : public Result virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); + virtual int get_readerIndex() { return 0; }; private: XapianSearcher* searcher; diff --git a/src/searcher.cpp b/src/searcher.cpp index d83adc7fb..a60a605cd 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -48,6 +48,7 @@ class _Result : public Result virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); + virtual int get_readerIndex(); private: Searcher* searcher; @@ -257,6 +258,10 @@ int _Result::get_wordCount() { return iterator.get_wordCount(); } +int _Result::get_readerIndex() +{ + return iterator.get_fileIndex(); +} #ifdef ENABLE_CTPP2 string Searcher::getHtml() From 9cc329dbd27067be48f25784380ca9c1e5590f5a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 5 Jul 2017 11:35:36 +0200 Subject: [PATCH 4/5] Support multi-zims search in kiwix-lib. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the code was already in zimlib. It is mainly a update of the code using zimlib. No JNI change for now to not break the API. --- include/searcher.h | 5 ++++- src/android/kiwix.cpp | 10 ++++++++- src/searcher.cpp | 47 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/include/searcher.h b/include/searcher.h index 3801be91b..c9db988dc 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -56,9 +56,11 @@ struct SearcherInternal; class Searcher { public: + Searcher(); Searcher(const string& xapianDirectoryPath, Reader* reader); ~Searcher(); + void add_reader(Reader* reader, const std::string& humanReaderName); void search(std::string& search, unsigned int resultStart, unsigned int resultEnd, @@ -84,7 +86,8 @@ class Searcher const unsigned int resultEnd, const bool verbose = false); - Reader* reader; + std::vector readers; + std::vector humanReaderNames; SearcherInternal* internal; std::string searchPattern; std::string protocolPrefix; diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index 2de373164..119b37e2b 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -490,7 +490,15 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex( if (searcher != NULL) { delete searcher; } - searcher = new kiwix::Searcher(cPath, reader); + if (!reader || !reader->hasFulltextIndex()) { + // Use old API (no embedded full text index). + searcher = new kiwix::Searcher(cPath, reader); + } else { + // Use the new API. We don't care about the human readable name as + // we don't use it (in android). + searcher = new kiwix::Searcher(); + searcher->add_reader(reader, ""); + } } catch (...) { searcher = NULL; retVal = JNI_FALSE; diff --git a/src/searcher.cpp b/src/searcher.cpp index a60a605cd..4966b7654 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -74,8 +74,7 @@ struct SearcherInternal { /* Constructor */ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader) - : reader(reader), - internal(new SearcherInternal()), + : internal(new SearcherInternal()), searchPattern(""), protocolPrefix("zim://"), searchProtocolPrefix("search://?"), @@ -91,11 +90,32 @@ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader) } } +Searcher::Searcher() + : internal(new SearcherInternal()), + searchPattern(""), + protocolPrefix("zim://"), + searchProtocolPrefix("search://?"), + resultCountPerPage(0), + estimatedResultCount(0), + resultStart(0), + resultEnd(0) +{ + template_ct2 = RESOURCE::results_ct2; + loadICUExternalTables(); +} + /* Destructor */ Searcher::~Searcher() { delete internal; } + +void Searcher::add_reader(Reader* reader, const std::string& humanReadableName) +{ + this->readers.push_back(reader); + this->humanReaderNames.push_back(humanReadableName); +} + /* Search strings in the database */ void Searcher::search(std::string& search, unsigned int resultStart, @@ -135,8 +155,15 @@ void Searcher::search(std::string& search, this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated(); } else { - internal->_search = this->reader->getZimFileHandler()->search( - unaccentedSearch, resultStart, resultEnd); + std::vector zims; + for (auto current = this->readers.begin(); current != this->readers.end(); + current++) { + zims.push_back((*current)->getZimFileHandler()); + } + zim::Search* search = new zim::Search(zims); + search->set_query(unaccentedSearch); + search->set_range(resultStart, resultEnd); + internal->_search = search; internal->current_iterator = internal->_search->begin(); this->estimatedResultCount = internal->_search->get_matches_estimated(); } @@ -192,8 +219,16 @@ void Searcher::suggestions(std::string& search, const bool verbose) * We do not support that. */ this->estimatedResultCount = 0; } else { - internal->_search = this->reader->getZimFileHandler()->suggestions( - unaccentedSearch, resultStart, resultEnd); + std::vector zims; + for (auto current = this->readers.begin(); current != this->readers.end(); + current++) { + zims.push_back((*current)->getZimFileHandler()); + } + zim::Search* search = new zim::Search(zims); + search->set_query(unaccentedSearch); + search->set_range(resultStart, resultEnd); + search->set_suggestion_mode(true); + internal->_search = search; internal->current_iterator = internal->_search->begin(); this->estimatedResultCount = internal->_search->get_matches_estimated(); } From bc5f4f5de46b2946cc48c90e499e0bfebdb11bfc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 12 Jul 2017 18:41:30 +0200 Subject: [PATCH 5/5] Use right contentId to generate the article url in search template. As we do multisearch, we must use the associated contentID of the result to generate the url. --- src/searcher.cpp | 1 + static/results.tmpl | 78 ++++++++++++++++++++++++++++++++------------- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/src/searcher.cpp b/src/searcher.cpp index 4966b7654..aae7a5267 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -314,6 +314,7 @@ string Searcher::getHtml() result["title"] = p_result->get_title(); result["url"] = p_result->get_url(); result["snippet"] = p_result->get_snippet(); + result["contentId"] = humanReaderNames[p_result->get_readerIndex()]; if (p_result->get_size() >= 0) { result["size"] = kiwix::beautifyInteger(p_result->get_size()); diff --git a/static/results.tmpl b/static/results.tmpl index 1d95b8012..88f2f9499 100644 --- a/static/results.tmpl +++ b/static/results.tmpl @@ -92,36 +92,68 @@ Search: <TMPL_var searchPattern> - - + +
- Results - of for No result were found for + + Results + + - + of + + for + + + + No result were found for +
-
    +
      -
    • - ... -
      words
      -
    • +
    • + + + + + + ... + + + +
      words
      +
      +
    • -
    +
- - + + +