mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #54 from kiwix/searchinxapian
Re-add xapian searcher in kiwix-lib.
This commit is contained in:
commit
7132775d67
|
@ -5,6 +5,10 @@ headers = [
|
|||
'searcher.h'
|
||||
]
|
||||
|
||||
if xapian_dep.found()
|
||||
headers += ['xapianSearcher.h']
|
||||
endif
|
||||
|
||||
install_headers(headers, subdir:'kiwix')
|
||||
|
||||
install_headers(
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace kiwix {
|
|||
class Searcher {
|
||||
|
||||
public:
|
||||
Searcher(Reader* reader);
|
||||
Searcher(const string &xapianDirectoryPath, Reader* reader);
|
||||
~Searcher();
|
||||
|
||||
void search(std::string &search, unsigned int resultStart,
|
||||
|
|
|
@ -57,7 +57,7 @@ namespace kiwix {
|
|||
}
|
||||
};
|
||||
|
||||
class XapianSearcher : public Searcher {
|
||||
class XapianSearcher {
|
||||
friend class XapianResult;
|
||||
public:
|
||||
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
|
||||
|
@ -67,6 +67,8 @@ namespace kiwix {
|
|||
virtual Result* getNextResult();
|
||||
void restart_search();
|
||||
|
||||
Xapian::MSet results;
|
||||
|
||||
protected:
|
||||
void closeIndex();
|
||||
void openIndex(const string &xapianDirectoryPath);
|
||||
|
@ -79,7 +81,6 @@ namespace kiwix {
|
|||
Xapian::QueryParser queryParser;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::MSet results;
|
||||
Xapian::MSetIterator current_result;
|
||||
std::map<std::string, int> valuesmap;
|
||||
};
|
||||
|
|
|
@ -62,7 +62,9 @@ else
|
|||
endif
|
||||
endif
|
||||
|
||||
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep]
|
||||
xapian_dep = dependency('xapian-core', required:false)
|
||||
|
||||
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
|
||||
if has_ctpp2_dep
|
||||
all_deps += [ctpp2_dep]
|
||||
endif
|
||||
|
@ -79,6 +81,9 @@ subdir('static')
|
|||
subdir('src')
|
||||
|
||||
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
|
||||
if xapian_dep.found()
|
||||
pkg_requires += ['xapian-core']
|
||||
endif
|
||||
|
||||
extra_libs = []
|
||||
extra_cflags = ''
|
||||
|
|
|
@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
|
|||
searcher = NULL;
|
||||
try {
|
||||
if (searcher != NULL) delete searcher;
|
||||
searcher = new kiwix::Searcher(reader);
|
||||
searcher = new kiwix::Searcher(cPath, reader);
|
||||
} catch (...) {
|
||||
searcher = NULL;
|
||||
retVal = JNI_FALSE;
|
||||
|
|
|
@ -8,10 +8,16 @@ kiwix_sources = [
|
|||
'common/regexTools.cpp',
|
||||
'common/stringTools.cpp',
|
||||
'common/networkTools.cpp',
|
||||
'common/otherTools.cpp'
|
||||
'common/otherTools.cpp',
|
||||
'xapian/htmlparse.cc',
|
||||
'xapian/myhtmlparse.cc'
|
||||
]
|
||||
kiwix_sources += lib_resources
|
||||
|
||||
if xapian_dep.found()
|
||||
kiwix_sources += ['xapianSearcher.cpp']
|
||||
endif
|
||||
|
||||
if get_option('android')
|
||||
subdir('android')
|
||||
install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family()
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
|
||||
#include "searcher.h"
|
||||
#include "xapianSearcher.h"
|
||||
#include "reader.h"
|
||||
#include "kiwixlib-resources.h"
|
||||
|
||||
|
@ -54,18 +55,25 @@ namespace kiwix {
|
|||
|
||||
struct SearcherInternal {
|
||||
const zim::Search *_search;
|
||||
XapianSearcher *_xapianSearcher;
|
||||
zim::Search::iterator current_iterator;
|
||||
|
||||
SearcherInternal() : _search(NULL) {}
|
||||
|
||||
SearcherInternal() :
|
||||
_search(NULL),
|
||||
_xapianSearcher(NULL)
|
||||
{}
|
||||
~SearcherInternal() {
|
||||
if ( _search != NULL )
|
||||
delete _search;
|
||||
if ( _xapianSearcher != NULL )
|
||||
delete _xapianSearcher;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* Constructor */
|
||||
Searcher::Searcher(Reader* reader) :
|
||||
Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
|
||||
reader(reader),
|
||||
internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
|
@ -78,6 +86,9 @@ namespace kiwix {
|
|||
{
|
||||
template_ct2 = RESOURCE::results_ct2;
|
||||
loadICUExternalTables();
|
||||
if ( !reader || !reader->hasFulltextIndex() ) {
|
||||
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
|
||||
}
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
|
@ -116,20 +127,31 @@ namespace kiwix {
|
|||
this->resultStart = resultStart;
|
||||
this->resultEnd = resultEnd;
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
if ( internal->_xapianSearcher ) {
|
||||
internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
|
||||
this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated();
|
||||
} else {
|
||||
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Searcher::restart_search() {
|
||||
if ( internal->_xapianSearcher ) {
|
||||
internal->_xapianSearcher->restart_search();
|
||||
} else {
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
}
|
||||
}
|
||||
|
||||
Result* Searcher::getNextResult() {
|
||||
if (internal->current_iterator != internal->_search->end()) {
|
||||
if ( internal->_xapianSearcher ) {
|
||||
return internal->_xapianSearcher->getNextResult();
|
||||
} else if (internal->current_iterator != internal->_search->end()) {
|
||||
Result* result = new _Result(this, internal->current_iterator);
|
||||
internal->current_iterator++;
|
||||
return result;
|
||||
|
|
|
@ -46,27 +46,14 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
|||
|
||||
/* Constructor */
|
||||
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
|
||||
: Searcher(),
|
||||
reader(reader)
|
||||
: reader(reader)
|
||||
{
|
||||
this->openIndex(xapianDirectoryPath);
|
||||
}
|
||||
|
||||
/* Open Xapian readable database */
|
||||
void XapianSearcher::openIndex(const string &directoryPath) {
|
||||
try
|
||||
{
|
||||
zim::File zimFile = zim::File(directoryPath);
|
||||
zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian");
|
||||
if (!xapianArticle.good())
|
||||
throw NoXapianIndexInZim();
|
||||
zim::offset_type dbOffset = xapianArticle.getOffset();
|
||||
int databasefd = open(directoryPath.c_str(), O_RDONLY);
|
||||
lseek(databasefd, dbOffset, SEEK_SET);
|
||||
this->readableDatabase = Xapian::Database(databasefd);
|
||||
} catch (...) {
|
||||
this->readableDatabase = Xapian::Database(directoryPath);
|
||||
}
|
||||
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
|
||||
this->language = this->readableDatabase.get_metadata("language");
|
||||
this->stopwords = this->readableDatabase.get_metadata("stopwords");
|
||||
|
@ -121,9 +108,6 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
|||
/* Get the results */
|
||||
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
|
||||
this->current_result = this->results.begin();
|
||||
|
||||
/* Update the global resultCount value*/
|
||||
this->estimatedResultCount = this->results.get_matches_estimated();
|
||||
}
|
||||
|
||||
/* Get next result */
|
||||
|
|
Loading…
Reference in New Issue