Merge pull request #54 from kiwix/searchinxapian

Re-add xapian searcher in kiwix-lib.
This commit is contained in:
Matthieu Gautier 2017-05-24 18:28:39 +02:00 committed by GitHub
commit 7132775d67
8 changed files with 53 additions and 31 deletions

View File

@ -5,6 +5,10 @@ headers = [
'searcher.h'
]
if xapian_dep.found()
headers += ['xapianSearcher.h']
endif
install_headers(headers, subdir:'kiwix')
install_headers(

View File

@ -53,7 +53,7 @@ namespace kiwix {
class Searcher {
public:
Searcher(Reader* reader);
Searcher(const string &xapianDirectoryPath, Reader* reader);
~Searcher();
void search(std::string &search, unsigned int resultStart,

View File

@ -57,7 +57,7 @@ namespace kiwix {
}
};
class XapianSearcher : public Searcher {
class XapianSearcher {
friend class XapianResult;
public:
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
@ -67,6 +67,8 @@ namespace kiwix {
virtual Result* getNextResult();
void restart_search();
Xapian::MSet results;
protected:
void closeIndex();
void openIndex(const string &xapianDirectoryPath);
@ -79,7 +81,6 @@ namespace kiwix {
Xapian::QueryParser queryParser;
Xapian::Stem stemmer;
Xapian::SimpleStopper stopper;
Xapian::MSet results;
Xapian::MSetIterator current_result;
std::map<std::string, int> valuesmap;
};

View File

@ -62,7 +62,9 @@ else
endif
endif
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep]
xapian_dep = dependency('xapian-core', required:false)
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
if has_ctpp2_dep
all_deps += [ctpp2_dep]
endif
@ -79,6 +81,9 @@ subdir('static')
subdir('src')
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
if xapian_dep.found()
pkg_requires += ['xapian-core']
endif
extra_libs = []
extra_cflags = ''

View File

@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
searcher = NULL;
try {
if (searcher != NULL) delete searcher;
searcher = new kiwix::Searcher(reader);
searcher = new kiwix::Searcher(cPath, reader);
} catch (...) {
searcher = NULL;
retVal = JNI_FALSE;

View File

@ -8,10 +8,16 @@ kiwix_sources = [
'common/regexTools.cpp',
'common/stringTools.cpp',
'common/networkTools.cpp',
'common/otherTools.cpp'
'common/otherTools.cpp',
'xapian/htmlparse.cc',
'xapian/myhtmlparse.cc'
]
kiwix_sources += lib_resources
if xapian_dep.found()
kiwix_sources += ['xapianSearcher.cpp']
endif
if get_option('android')
subdir('android')
install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family()

View File

@ -18,6 +18,7 @@
*/
#include "searcher.h"
#include "xapianSearcher.h"
#include "reader.h"
#include "kiwixlib-resources.h"
@ -54,18 +55,25 @@ namespace kiwix {
struct SearcherInternal {
const zim::Search *_search;
XapianSearcher *_xapianSearcher;
zim::Search::iterator current_iterator;
SearcherInternal() : _search(NULL) {}
SearcherInternal() :
_search(NULL),
_xapianSearcher(NULL)
{}
~SearcherInternal() {
if ( _search != NULL )
delete _search;
if ( _xapianSearcher != NULL )
delete _xapianSearcher;
}
};
/* Constructor */
Searcher::Searcher(Reader* reader) :
Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
reader(reader),
internal(new SearcherInternal()),
searchPattern(""),
@ -78,6 +86,9 @@ namespace kiwix {
{
template_ct2 = RESOURCE::results_ct2;
loadICUExternalTables();
if ( !reader || !reader->hasFulltextIndex() ) {
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
}
}
/* Destructor */
@ -116,20 +127,31 @@ namespace kiwix {
this->resultStart = resultStart;
this->resultEnd = resultEnd;
string unaccentedSearch = removeAccents(search);
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
if ( internal->_xapianSearcher ) {
internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated();
} else {
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
}
return;
}
void Searcher::restart_search() {
internal->current_iterator = internal->_search->begin();
if ( internal->_xapianSearcher ) {
internal->_xapianSearcher->restart_search();
} else {
internal->current_iterator = internal->_search->begin();
}
}
Result* Searcher::getNextResult() {
if (internal->current_iterator != internal->_search->end()) {
if ( internal->_xapianSearcher ) {
return internal->_xapianSearcher->getNextResult();
} else if (internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(this, internal->current_iterator);
internal->current_iterator++;
return result;

View File

@ -46,27 +46,14 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
/* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
: Searcher(),
reader(reader)
: reader(reader)
{
this->openIndex(xapianDirectoryPath);
}
/* Open Xapian readable database */
void XapianSearcher::openIndex(const string &directoryPath) {
try
{
zim::File zimFile = zim::File(directoryPath);
zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian");
if (!xapianArticle.good())
throw NoXapianIndexInZim();
zim::offset_type dbOffset = xapianArticle.getOffset();
int databasefd = open(directoryPath.c_str(), O_RDONLY);
lseek(databasefd, dbOffset, SEEK_SET);
this->readableDatabase = Xapian::Database(databasefd);
} catch (...) {
this->readableDatabase = Xapian::Database(directoryPath);
}
this->readableDatabase = Xapian::Database(directoryPath);
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language");
this->stopwords = this->readableDatabase.get_metadata("stopwords");
@ -121,9 +108,6 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
/* Get the results */
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
this->current_result = this->results.begin();
/* Update the global resultCount value*/
this->estimatedResultCount = this->results.get_matches_estimated();
}
/* Get next result */