From 998db0eb2b446da46fb58951fdacf07544a37142 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 27 Mar 2017 18:33:42 +0200 Subject: [PATCH] Use the language stored in the database to configure the queryparser. To properly search in the xapian database, we need a stemmer using the same language that the one used during the indexing. --- include/xapianSearcher.h | 3 +++ src/xapianSearcher.cpp | 28 +++++++++++++++++++++++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index 8b27eb229..df5df276a 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -70,9 +70,12 @@ namespace kiwix { protected: void closeIndex(); void openIndex(const string &xapianDirectoryPath); + void setup_queryParser(); Reader* reader; Xapian::Database readableDatabase; + std::string language; + Xapian::QueryParser queryParser; Xapian::Stem stemmer; Xapian::MSet results; Xapian::MSetIterator current_result; diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index 22fd06e17..f636336d0 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -46,8 +47,8 @@ std::map read_valuesmap(const std::string &s) { /* Constructor */ XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) : Searcher(), - reader(reader), - stemmer(Xapian::Stem("english")) { + reader(reader) + { this->openIndex(xapianDirectoryPath); } @@ -67,18 +68,39 @@ std::map read_valuesmap(const std::string &s) { this->readableDatabase = Xapian::Database(directoryPath); } this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); + this->language = this->readableDatabase.get_metadata("language"); + setup_queryParser(); } /* Close Xapian writable database */ void XapianSearcher::closeIndex() { return; } + + void XapianSearcher::setup_queryParser() + { + queryParser.set_database(readableDatabase); + if ( ! language.empty() ) + { + /* Build ICU Local object to retrieve ISO-639 language code (from + ISO-639-3) */ + icu::Locale languageLocale(language.c_str()); + + /* Configuring language base steemming */ + try { + stemmer = Xapian::Stem(languageLocale.getLanguage()); + queryParser.set_stemmer(stemmer); + queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); + } catch (...) { + std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl; + } + } + } /* Search strings in the database */ void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, const bool verbose) { /* Create the query */ - Xapian::QueryParser queryParser; Xapian::Query query = queryParser.parse_query(search); /* Create the enquire object */