Use the stop words stored in the database to configure the queryparser.

To properly search in the xapian database, we need to use the same
stop words that the ones used during the indexing.
This commit is contained in:
Matthieu Gautier 2017-03-28 09:59:39 +02:00
parent 998db0eb2b
commit b8d950c1a0
4 changed files with 13 additions and 19 deletions

4
include/indexer.h Executable file → Normal file
View File

@ -78,10 +78,6 @@ namespace kiwix {
virtual void flush() = 0;
virtual void indexingPostlude(const string indexPath) = 0;
/* Stop words */
std::vector<std::string> stopWords;
void readStopWords(const string languageCode);
/* Others */
unsigned int countWords(const string &text);

View File

@ -75,8 +75,10 @@ namespace kiwix {
Reader* reader;
Xapian::Database readableDatabase;
std::string language;
std::string stopwords;
Xapian::QueryParser queryParser;
Xapian::Stem stemmer;
Xapian::SimpleStopper stopper;
Xapian::MSet results;
Xapian::MSetIterator current_result;
std::map<std::string, int> valuesmap;

View File

@ -62,22 +62,7 @@ namespace kiwix {
/* Destructor */
Indexer::~Indexer() {
}
/* Read the stopwords */
void Indexer::readStopWords(const string languageCode) {
std::string stopWord;
std::istringstream file(getResource("stopwords/" + languageCode));
this->stopWords.clear();
while (getline(file, stopWord, '\n')) {
this->stopWords.push_back(stopWord);
}
if (this->verboseFlag) {
std::cout << "Read stop words, lang code:" << languageCode << ", count:" << this->stopWords.size() << std::endl;
}
}
#pragma mark - Extractor

View File

@ -69,6 +69,7 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
}
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language");
this->stopwords = this->readableDatabase.get_metadata("stopwords");
setup_queryParser();
}
@ -95,6 +96,16 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
}
}
if ( ! stopwords.empty() )
{
std::string stopWord;
std::istringstream file(this->stopwords);
while (std::getline(file, stopWord, '\n')) {
this->stopper.add(stopWord);
}
queryParser.set_stopper(&(this->stopper));
}
}
/* Search strings in the database */