Merge pull request #54 from kiwix/searchinxapian

Re-add xapian searcher in kiwix-lib.
This commit is contained in:
Matthieu Gautier 2017-05-24 18:28:39 +02:00 committed by GitHub
commit 7132775d67
8 changed files with 53 additions and 31 deletions

View File

@ -5,6 +5,10 @@ headers = [
'searcher.h' 'searcher.h'
] ]
if xapian_dep.found()
headers += ['xapianSearcher.h']
endif
install_headers(headers, subdir:'kiwix') install_headers(headers, subdir:'kiwix')
install_headers( install_headers(

View File

@ -53,7 +53,7 @@ namespace kiwix {
class Searcher { class Searcher {
public: public:
Searcher(Reader* reader); Searcher(const string &xapianDirectoryPath, Reader* reader);
~Searcher(); ~Searcher();
void search(std::string &search, unsigned int resultStart, void search(std::string &search, unsigned int resultStart,

View File

@ -57,7 +57,7 @@ namespace kiwix {
} }
}; };
class XapianSearcher : public Searcher { class XapianSearcher {
friend class XapianResult; friend class XapianResult;
public: public:
XapianSearcher(const string &xapianDirectoryPath, Reader* reader); XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
@ -67,6 +67,8 @@ namespace kiwix {
virtual Result* getNextResult(); virtual Result* getNextResult();
void restart_search(); void restart_search();
Xapian::MSet results;
protected: protected:
void closeIndex(); void closeIndex();
void openIndex(const string &xapianDirectoryPath); void openIndex(const string &xapianDirectoryPath);
@ -79,7 +81,6 @@ namespace kiwix {
Xapian::QueryParser queryParser; Xapian::QueryParser queryParser;
Xapian::Stem stemmer; Xapian::Stem stemmer;
Xapian::SimpleStopper stopper; Xapian::SimpleStopper stopper;
Xapian::MSet results;
Xapian::MSetIterator current_result; Xapian::MSetIterator current_result;
std::map<std::string, int> valuesmap; std::map<std::string, int> valuesmap;
}; };

View File

@ -62,7 +62,9 @@ else
endif endif
endif endif
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep] xapian_dep = dependency('xapian-core', required:false)
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
if has_ctpp2_dep if has_ctpp2_dep
all_deps += [ctpp2_dep] all_deps += [ctpp2_dep]
endif endif
@ -79,6 +81,9 @@ subdir('static')
subdir('src') subdir('src')
pkg_requires = ['libzim', 'icu-i18n', 'pugixml'] pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
if xapian_dep.found()
pkg_requires += ['xapian-core']
endif
extra_libs = [] extra_libs = []
extra_cflags = '' extra_cflags = ''

View File

@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
searcher = NULL; searcher = NULL;
try { try {
if (searcher != NULL) delete searcher; if (searcher != NULL) delete searcher;
searcher = new kiwix::Searcher(reader); searcher = new kiwix::Searcher(cPath, reader);
} catch (...) { } catch (...) {
searcher = NULL; searcher = NULL;
retVal = JNI_FALSE; retVal = JNI_FALSE;

View File

@ -8,10 +8,16 @@ kiwix_sources = [
'common/regexTools.cpp', 'common/regexTools.cpp',
'common/stringTools.cpp', 'common/stringTools.cpp',
'common/networkTools.cpp', 'common/networkTools.cpp',
'common/otherTools.cpp' 'common/otherTools.cpp',
'xapian/htmlparse.cc',
'xapian/myhtmlparse.cc'
] ]
kiwix_sources += lib_resources kiwix_sources += lib_resources
if xapian_dep.found()
kiwix_sources += ['xapianSearcher.cpp']
endif
if get_option('android') if get_option('android')
subdir('android') subdir('android')
install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family() install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family()

View File

@ -18,6 +18,7 @@
*/ */
#include "searcher.h" #include "searcher.h"
#include "xapianSearcher.h"
#include "reader.h" #include "reader.h"
#include "kiwixlib-resources.h" #include "kiwixlib-resources.h"
@ -54,18 +55,25 @@ namespace kiwix {
struct SearcherInternal { struct SearcherInternal {
const zim::Search *_search; const zim::Search *_search;
XapianSearcher *_xapianSearcher;
zim::Search::iterator current_iterator; zim::Search::iterator current_iterator;
SearcherInternal() : _search(NULL) {}
SearcherInternal() :
_search(NULL),
_xapianSearcher(NULL)
{}
~SearcherInternal() { ~SearcherInternal() {
if ( _search != NULL ) if ( _search != NULL )
delete _search; delete _search;
if ( _xapianSearcher != NULL )
delete _xapianSearcher;
} }
}; };
/* Constructor */ /* Constructor */
Searcher::Searcher(Reader* reader) : Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
reader(reader), reader(reader),
internal(new SearcherInternal()), internal(new SearcherInternal()),
searchPattern(""), searchPattern(""),
@ -78,6 +86,9 @@ namespace kiwix {
{ {
template_ct2 = RESOURCE::results_ct2; template_ct2 = RESOURCE::results_ct2;
loadICUExternalTables(); loadICUExternalTables();
if ( !reader || !reader->hasFulltextIndex() ) {
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
}
} }
/* Destructor */ /* Destructor */
@ -116,20 +127,31 @@ namespace kiwix {
this->resultStart = resultStart; this->resultStart = resultStart;
this->resultEnd = resultEnd; this->resultEnd = resultEnd;
string unaccentedSearch = removeAccents(search); string unaccentedSearch = removeAccents(search);
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd); if ( internal->_xapianSearcher ) {
internal->current_iterator = internal->_search->begin(); internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
this->estimatedResultCount = internal->_search->get_matches_estimated(); this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated();
} else {
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
} }
return; return;
} }
void Searcher::restart_search() { void Searcher::restart_search() {
internal->current_iterator = internal->_search->begin(); if ( internal->_xapianSearcher ) {
internal->_xapianSearcher->restart_search();
} else {
internal->current_iterator = internal->_search->begin();
}
} }
Result* Searcher::getNextResult() { Result* Searcher::getNextResult() {
if (internal->current_iterator != internal->_search->end()) { if ( internal->_xapianSearcher ) {
return internal->_xapianSearcher->getNextResult();
} else if (internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(this, internal->current_iterator); Result* result = new _Result(this, internal->current_iterator);
internal->current_iterator++; internal->current_iterator++;
return result; return result;

View File

@ -46,27 +46,14 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
/* Constructor */ /* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
: Searcher(), : reader(reader)
reader(reader)
{ {
this->openIndex(xapianDirectoryPath); this->openIndex(xapianDirectoryPath);
} }
/* Open Xapian readable database */ /* Open Xapian readable database */
void XapianSearcher::openIndex(const string &directoryPath) { void XapianSearcher::openIndex(const string &directoryPath) {
try this->readableDatabase = Xapian::Database(directoryPath);
{
zim::File zimFile = zim::File(directoryPath);
zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian");
if (!xapianArticle.good())
throw NoXapianIndexInZim();
zim::offset_type dbOffset = xapianArticle.getOffset();
int databasefd = open(directoryPath.c_str(), O_RDONLY);
lseek(databasefd, dbOffset, SEEK_SET);
this->readableDatabase = Xapian::Database(databasefd);
} catch (...) {
this->readableDatabase = Xapian::Database(directoryPath);
}
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language"); this->language = this->readableDatabase.get_metadata("language");
this->stopwords = this->readableDatabase.get_metadata("stopwords"); this->stopwords = this->readableDatabase.get_metadata("stopwords");
@ -121,9 +108,6 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
/* Get the results */ /* Get the results */
this->results = enquire.get_mset(resultStart, resultEnd - resultStart); this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
this->current_result = this->results.begin(); this->current_result = this->results.begin();
/* Update the global resultCount value*/
this->estimatedResultCount = this->results.get_matches_estimated();
} }
/* Get next result */ /* Get next result */