Use the language stored in the database to configure the queryparser.

To properly search in the xapian database, we need a stemmer using the
same language that the one used during the indexing.
This commit is contained in:
Matthieu Gautier 2017-03-27 18:33:42 +02:00
parent 46fab22a73
commit 998db0eb2b
2 changed files with 28 additions and 3 deletions

View File

@ -70,9 +70,12 @@ namespace kiwix {
protected: protected:
void closeIndex(); void closeIndex();
void openIndex(const string &xapianDirectoryPath); void openIndex(const string &xapianDirectoryPath);
void setup_queryParser();
Reader* reader; Reader* reader;
Xapian::Database readableDatabase; Xapian::Database readableDatabase;
std::string language;
Xapian::QueryParser queryParser;
Xapian::Stem stemmer; Xapian::Stem stemmer;
Xapian::MSet results; Xapian::MSet results;
Xapian::MSetIterator current_result; Xapian::MSetIterator current_result;

View File

@ -25,6 +25,7 @@
#include <zim/error.h> #include <zim/error.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <unicode/locid.h>
#include <vector> #include <vector>
@ -46,8 +47,8 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
/* Constructor */ /* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
: Searcher(), : Searcher(),
reader(reader), reader(reader)
stemmer(Xapian::Stem("english")) { {
this->openIndex(xapianDirectoryPath); this->openIndex(xapianDirectoryPath);
} }
@ -67,18 +68,39 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
this->readableDatabase = Xapian::Database(directoryPath); this->readableDatabase = Xapian::Database(directoryPath);
} }
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language");
setup_queryParser();
} }
/* Close Xapian writable database */ /* Close Xapian writable database */
void XapianSearcher::closeIndex() { void XapianSearcher::closeIndex() {
return; return;
} }
void XapianSearcher::setup_queryParser()
{
queryParser.set_database(readableDatabase);
if ( ! language.empty() )
{
/* Build ICU Local object to retrieve ISO-639 language code (from
ISO-639-3) */
icu::Locale languageLocale(language.c_str());
/* Configuring language base steemming */
try {
stemmer = Xapian::Stem(languageLocale.getLanguage());
queryParser.set_stemmer(stemmer);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
} catch (...) {
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
}
}
}
/* Search strings in the database */ /* Search strings in the database */
void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart, void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart,
const unsigned int resultEnd, const bool verbose) { const unsigned int resultEnd, const bool verbose) {
/* Create the query */ /* Create the query */
Xapian::QueryParser queryParser;
Xapian::Query query = queryParser.parse_query(search); Xapian::Query query = queryParser.parse_query(search);
/* Create the enquire object */ /* Create the enquire object */