mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #54 from kiwix/searchinxapian
Re-add xapian searcher in kiwix-lib.
This commit is contained in:
commit
7132775d67
|
@ -5,6 +5,10 @@ headers = [
|
||||||
'searcher.h'
|
'searcher.h'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if xapian_dep.found()
|
||||||
|
headers += ['xapianSearcher.h']
|
||||||
|
endif
|
||||||
|
|
||||||
install_headers(headers, subdir:'kiwix')
|
install_headers(headers, subdir:'kiwix')
|
||||||
|
|
||||||
install_headers(
|
install_headers(
|
||||||
|
|
|
@ -53,7 +53,7 @@ namespace kiwix {
|
||||||
class Searcher {
|
class Searcher {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Searcher(Reader* reader);
|
Searcher(const string &xapianDirectoryPath, Reader* reader);
|
||||||
~Searcher();
|
~Searcher();
|
||||||
|
|
||||||
void search(std::string &search, unsigned int resultStart,
|
void search(std::string &search, unsigned int resultStart,
|
||||||
|
|
|
@ -57,7 +57,7 @@ namespace kiwix {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class XapianSearcher : public Searcher {
|
class XapianSearcher {
|
||||||
friend class XapianResult;
|
friend class XapianResult;
|
||||||
public:
|
public:
|
||||||
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
|
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
|
||||||
|
@ -67,6 +67,8 @@ namespace kiwix {
|
||||||
virtual Result* getNextResult();
|
virtual Result* getNextResult();
|
||||||
void restart_search();
|
void restart_search();
|
||||||
|
|
||||||
|
Xapian::MSet results;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void closeIndex();
|
void closeIndex();
|
||||||
void openIndex(const string &xapianDirectoryPath);
|
void openIndex(const string &xapianDirectoryPath);
|
||||||
|
@ -79,7 +81,6 @@ namespace kiwix {
|
||||||
Xapian::QueryParser queryParser;
|
Xapian::QueryParser queryParser;
|
||||||
Xapian::Stem stemmer;
|
Xapian::Stem stemmer;
|
||||||
Xapian::SimpleStopper stopper;
|
Xapian::SimpleStopper stopper;
|
||||||
Xapian::MSet results;
|
|
||||||
Xapian::MSetIterator current_result;
|
Xapian::MSetIterator current_result;
|
||||||
std::map<std::string, int> valuesmap;
|
std::map<std::string, int> valuesmap;
|
||||||
};
|
};
|
||||||
|
|
|
@ -62,7 +62,9 @@ else
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep]
|
xapian_dep = dependency('xapian-core', required:false)
|
||||||
|
|
||||||
|
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
|
||||||
if has_ctpp2_dep
|
if has_ctpp2_dep
|
||||||
all_deps += [ctpp2_dep]
|
all_deps += [ctpp2_dep]
|
||||||
endif
|
endif
|
||||||
|
@ -79,6 +81,9 @@ subdir('static')
|
||||||
subdir('src')
|
subdir('src')
|
||||||
|
|
||||||
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
|
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
|
||||||
|
if xapian_dep.found()
|
||||||
|
pkg_requires += ['xapian-core']
|
||||||
|
endif
|
||||||
|
|
||||||
extra_libs = []
|
extra_libs = []
|
||||||
extra_cflags = ''
|
extra_cflags = ''
|
||||||
|
|
|
@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
|
||||||
searcher = NULL;
|
searcher = NULL;
|
||||||
try {
|
try {
|
||||||
if (searcher != NULL) delete searcher;
|
if (searcher != NULL) delete searcher;
|
||||||
searcher = new kiwix::Searcher(reader);
|
searcher = new kiwix::Searcher(cPath, reader);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
searcher = NULL;
|
searcher = NULL;
|
||||||
retVal = JNI_FALSE;
|
retVal = JNI_FALSE;
|
||||||
|
|
|
@ -8,10 +8,16 @@ kiwix_sources = [
|
||||||
'common/regexTools.cpp',
|
'common/regexTools.cpp',
|
||||||
'common/stringTools.cpp',
|
'common/stringTools.cpp',
|
||||||
'common/networkTools.cpp',
|
'common/networkTools.cpp',
|
||||||
'common/otherTools.cpp'
|
'common/otherTools.cpp',
|
||||||
|
'xapian/htmlparse.cc',
|
||||||
|
'xapian/myhtmlparse.cc'
|
||||||
]
|
]
|
||||||
kiwix_sources += lib_resources
|
kiwix_sources += lib_resources
|
||||||
|
|
||||||
|
if xapian_dep.found()
|
||||||
|
kiwix_sources += ['xapianSearcher.cpp']
|
||||||
|
endif
|
||||||
|
|
||||||
if get_option('android')
|
if get_option('android')
|
||||||
subdir('android')
|
subdir('android')
|
||||||
install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family()
|
install_dir = 'kiwix-lib/jniLibs/' + host_machine.cpu_family()
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "searcher.h"
|
#include "searcher.h"
|
||||||
|
#include "xapianSearcher.h"
|
||||||
#include "reader.h"
|
#include "reader.h"
|
||||||
#include "kiwixlib-resources.h"
|
#include "kiwixlib-resources.h"
|
||||||
|
|
||||||
|
@ -54,18 +55,25 @@ namespace kiwix {
|
||||||
|
|
||||||
struct SearcherInternal {
|
struct SearcherInternal {
|
||||||
const zim::Search *_search;
|
const zim::Search *_search;
|
||||||
|
XapianSearcher *_xapianSearcher;
|
||||||
zim::Search::iterator current_iterator;
|
zim::Search::iterator current_iterator;
|
||||||
|
|
||||||
SearcherInternal() : _search(NULL) {}
|
|
||||||
|
SearcherInternal() :
|
||||||
|
_search(NULL),
|
||||||
|
_xapianSearcher(NULL)
|
||||||
|
{}
|
||||||
~SearcherInternal() {
|
~SearcherInternal() {
|
||||||
if ( _search != NULL )
|
if ( _search != NULL )
|
||||||
delete _search;
|
delete _search;
|
||||||
|
if ( _xapianSearcher != NULL )
|
||||||
|
delete _xapianSearcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
Searcher::Searcher(Reader* reader) :
|
Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
|
||||||
reader(reader),
|
reader(reader),
|
||||||
internal(new SearcherInternal()),
|
internal(new SearcherInternal()),
|
||||||
searchPattern(""),
|
searchPattern(""),
|
||||||
|
@ -78,6 +86,9 @@ namespace kiwix {
|
||||||
{
|
{
|
||||||
template_ct2 = RESOURCE::results_ct2;
|
template_ct2 = RESOURCE::results_ct2;
|
||||||
loadICUExternalTables();
|
loadICUExternalTables();
|
||||||
|
if ( !reader || !reader->hasFulltextIndex() ) {
|
||||||
|
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Destructor */
|
/* Destructor */
|
||||||
|
@ -116,20 +127,31 @@ namespace kiwix {
|
||||||
this->resultStart = resultStart;
|
this->resultStart = resultStart;
|
||||||
this->resultEnd = resultEnd;
|
this->resultEnd = resultEnd;
|
||||||
string unaccentedSearch = removeAccents(search);
|
string unaccentedSearch = removeAccents(search);
|
||||||
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
|
if ( internal->_xapianSearcher ) {
|
||||||
internal->current_iterator = internal->_search->begin();
|
internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
|
||||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated();
|
||||||
|
} else {
|
||||||
|
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
|
||||||
|
internal->current_iterator = internal->_search->begin();
|
||||||
|
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Searcher::restart_search() {
|
void Searcher::restart_search() {
|
||||||
internal->current_iterator = internal->_search->begin();
|
if ( internal->_xapianSearcher ) {
|
||||||
|
internal->_xapianSearcher->restart_search();
|
||||||
|
} else {
|
||||||
|
internal->current_iterator = internal->_search->begin();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Result* Searcher::getNextResult() {
|
Result* Searcher::getNextResult() {
|
||||||
if (internal->current_iterator != internal->_search->end()) {
|
if ( internal->_xapianSearcher ) {
|
||||||
|
return internal->_xapianSearcher->getNextResult();
|
||||||
|
} else if (internal->current_iterator != internal->_search->end()) {
|
||||||
Result* result = new _Result(this, internal->current_iterator);
|
Result* result = new _Result(this, internal->current_iterator);
|
||||||
internal->current_iterator++;
|
internal->current_iterator++;
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -46,27 +46,14 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
|
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
|
||||||
: Searcher(),
|
: reader(reader)
|
||||||
reader(reader)
|
|
||||||
{
|
{
|
||||||
this->openIndex(xapianDirectoryPath);
|
this->openIndex(xapianDirectoryPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Open Xapian readable database */
|
/* Open Xapian readable database */
|
||||||
void XapianSearcher::openIndex(const string &directoryPath) {
|
void XapianSearcher::openIndex(const string &directoryPath) {
|
||||||
try
|
this->readableDatabase = Xapian::Database(directoryPath);
|
||||||
{
|
|
||||||
zim::File zimFile = zim::File(directoryPath);
|
|
||||||
zim::Article xapianArticle = zimFile.getArticle('Z', "/fulltextIndex/xapian");
|
|
||||||
if (!xapianArticle.good())
|
|
||||||
throw NoXapianIndexInZim();
|
|
||||||
zim::offset_type dbOffset = xapianArticle.getOffset();
|
|
||||||
int databasefd = open(directoryPath.c_str(), O_RDONLY);
|
|
||||||
lseek(databasefd, dbOffset, SEEK_SET);
|
|
||||||
this->readableDatabase = Xapian::Database(databasefd);
|
|
||||||
} catch (...) {
|
|
||||||
this->readableDatabase = Xapian::Database(directoryPath);
|
|
||||||
}
|
|
||||||
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
|
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
|
||||||
this->language = this->readableDatabase.get_metadata("language");
|
this->language = this->readableDatabase.get_metadata("language");
|
||||||
this->stopwords = this->readableDatabase.get_metadata("stopwords");
|
this->stopwords = this->readableDatabase.get_metadata("stopwords");
|
||||||
|
@ -121,9 +108,6 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
||||||
/* Get the results */
|
/* Get the results */
|
||||||
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
|
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
|
||||||
this->current_result = this->results.begin();
|
this->current_result = this->results.begin();
|
||||||
|
|
||||||
/* Update the global resultCount value*/
|
|
||||||
this->estimatedResultCount = this->results.get_matches_estimated();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get next result */
|
/* Get next result */
|
||||||
|
|
Loading…
Reference in New Issue