From c44b2acb56c385bf73f10108616684991a2e88e9 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 24 May 2017 16:07:46 +0200 Subject: [PATCH] Re-add xapian searcher in kiwix-lib. libzim only know how to read embedded full text index in a zim file. This is nice as we want to embedded the full text index in zim file and not have separated full text index. However, we still have some zim+separated index we have to read. So we have to support the search in separated index for a while. --- include/meson.build | 4 ++++ include/searcher.h | 2 +- include/xapianSearcher.h | 5 +++-- meson.build | 7 ++++++- src/android/kiwix.cpp | 2 +- src/meson.build | 8 +++++++- src/searcher.cpp | 36 +++++++++++++++++++++++++++++------- src/xapianSearcher.cpp | 20 ++------------------ 8 files changed, 53 insertions(+), 31 deletions(-) diff --git a/include/meson.build b/include/meson.build index 0e890d707..608423170 100644 --- a/include/meson.build +++ b/include/meson.build @@ -5,6 +5,10 @@ headers = [ 'searcher.h' ] +if xapian_dep.found() + headers += ['xapianSearcher.h'] +endif + install_headers(headers, subdir:'kiwix') install_headers( diff --git a/include/searcher.h b/include/searcher.h index 2a1dd3960..3a53e8604 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -53,7 +53,7 @@ namespace kiwix { class Searcher { public: - Searcher(Reader* reader); + Searcher(const string &xapianDirectoryPath, Reader* reader); ~Searcher(); void search(std::string &search, unsigned int resultStart, diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index e11c03e68..8604ae9c8 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -57,7 +57,7 @@ namespace kiwix { } }; - class XapianSearcher : public Searcher { + class XapianSearcher { friend class XapianResult; public: XapianSearcher(const string &xapianDirectoryPath, Reader* reader); @@ -67,6 +67,8 @@ namespace kiwix { virtual Result* getNextResult(); void restart_search(); + Xapian::MSet results; + protected: void closeIndex(); void openIndex(const string &xapianDirectoryPath); @@ -79,7 +81,6 @@ namespace kiwix { Xapian::QueryParser queryParser; Xapian::Stem stemmer; Xapian::SimpleStopper stopper; - Xapian::MSet results; Xapian::MSetIterator current_result; std::map valuesmap; }; diff --git a/meson.build b/meson.build index f1da51e32..23dd1c23e 100644 --- a/meson.build +++ b/meson.build @@ -62,7 +62,9 @@ else endif endif -all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep] +xapian_dep = dependency('xapian-core', required:false) + +all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep] if has_ctpp2_dep all_deps += [ctpp2_dep] endif @@ -79,6 +81,9 @@ subdir('static') subdir('src') pkg_requires = ['libzim', 'icu-i18n', 'pugixml'] +if xapian_dep.found() + pkg_requires += ['xapian-core'] +endif extra_libs = [] extra_cflags = '' diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index f13c7d9d3..279b0b21f 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN searcher = NULL; try { if (searcher != NULL) delete searcher; - searcher = new kiwix::Searcher(reader); + searcher = new kiwix::Searcher(cPath, reader); } catch (...) { searcher = NULL; retVal = JNI_FALSE; diff --git a/src/meson.build b/src/meson.build index c6e4e3f1c..5c0915416 100644 --- a/src/meson.build +++ b/src/meson.build @@ -8,10 +8,16 @@ kiwix_sources = [ 'common/regexTools.cpp', 'common/stringTools.cpp', 'common/networkTools.cpp', - 'common/otherTools.cpp' + 'common/otherTools.cpp', + 'xapian/htmlparse.cc', + 'xapian/myhtmlparse.cc' ] kiwix_sources += lib_resources +if xapian_dep.found() + kiwix_sources += ['xapianSearcher.cpp'] +endif + if get_option('android') subdir('android') install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family() diff --git a/src/searcher.cpp b/src/searcher.cpp index 380ecda4c..074620840 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -18,6 +18,7 @@ */ #include "searcher.h" +#include "xapianSearcher.h" #include "reader.h" #include "kiwixlib-resources.h" @@ -54,18 +55,25 @@ namespace kiwix { struct SearcherInternal { const zim::Search *_search; + XapianSearcher *_xapianSearcher; zim::Search::iterator current_iterator; - SearcherInternal() : _search(NULL) {} + + SearcherInternal() : + _search(NULL), + _xapianSearcher(NULL) + {} ~SearcherInternal() { if ( _search != NULL ) delete _search; + if ( _xapianSearcher != NULL ) + delete _xapianSearcher; } }; /* Constructor */ - Searcher::Searcher(Reader* reader) : + Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) : reader(reader), internal(new SearcherInternal()), searchPattern(""), @@ -78,6 +86,9 @@ namespace kiwix { { template_ct2 = RESOURCE::results_ct2; loadICUExternalTables(); + if ( !reader || !reader->hasFulltextIndex() ) { + internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader); + } } /* Destructor */ @@ -116,20 +127,31 @@ namespace kiwix { this->resultStart = resultStart; this->resultEnd = resultEnd; string unaccentedSearch = removeAccents(search); - internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd); - internal->current_iterator = internal->_search->begin(); - this->estimatedResultCount = internal->_search->get_matches_estimated(); + if ( internal->_xapianSearcher ) { + internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose); + this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated(); + } else { + internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd); + internal->current_iterator = internal->_search->begin(); + this->estimatedResultCount = internal->_search->get_matches_estimated(); + } } return; } void Searcher::restart_search() { - internal->current_iterator = internal->_search->begin(); + if ( internal->_xapianSearcher ) { + internal->_xapianSearcher->restart_search(); + } else { + internal->current_iterator = internal->_search->begin(); + } } Result* Searcher::getNextResult() { - if (internal->current_iterator != internal->_search->end()) { + if ( internal->_xapianSearcher ) { + return internal->_xapianSearcher->getNextResult(); + } else if (internal->current_iterator != internal->_search->end()) { Result* result = new _Result(this, internal->current_iterator); internal->current_iterator++; return result; diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index 7e0fab28f..1b9a6298f 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -46,27 +46,14 @@ std::map read_valuesmap(const std::string &s) { /* Constructor */ XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) - : Searcher(), - reader(reader) + : reader(reader) { this->openIndex(xapianDirectoryPath); } /* Open Xapian readable database */ void XapianSearcher::openIndex(const string &directoryPath) { - try - { - zim::File zimFile = zim::File(directoryPath); - zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian"); - if (!xapianArticle.good()) - throw NoXapianIndexInZim(); - zim::offset_type dbOffset = xapianArticle.getOffset(); - int databasefd = open(directoryPath.c_str(), O_RDONLY); - lseek(databasefd, dbOffset, SEEK_SET); - this->readableDatabase = Xapian::Database(databasefd); - } catch (...) { - this->readableDatabase = Xapian::Database(directoryPath); - } + this->readableDatabase = Xapian::Database(directoryPath); this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); this->language = this->readableDatabase.get_metadata("language"); this->stopwords = this->readableDatabase.get_metadata("stopwords"); @@ -121,9 +108,6 @@ std::map read_valuesmap(const std::string &s) { /* Get the results */ this->results = enquire.get_mset(resultStart, resultEnd - resultStart); this->current_result = this->results.begin(); - - /* Update the global resultCount value*/ - this->estimatedResultCount = this->results.get_matches_estimated(); } /* Get next result */