diff --git a/include/indexer.h b/include/indexer.h old mode 100755 new mode 100644 index a4f160638..9435819b6 --- a/include/indexer.h +++ b/include/indexer.h @@ -78,10 +78,6 @@ namespace kiwix { virtual void flush() = 0; virtual void indexingPostlude(const string indexPath) = 0; - /* Stop words */ - std::vector stopWords; - void readStopWords(const string languageCode); - /* Others */ unsigned int countWords(const string &text); diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index df5df276a..e11c03e68 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -75,8 +75,10 @@ namespace kiwix { Reader* reader; Xapian::Database readableDatabase; std::string language; + std::string stopwords; Xapian::QueryParser queryParser; Xapian::Stem stemmer; + Xapian::SimpleStopper stopper; Xapian::MSet results; Xapian::MSetIterator current_result; std::map valuesmap; diff --git a/src/indexer.cpp b/src/indexer.cpp index f265aa720..7e230bc9a 100755 --- a/src/indexer.cpp +++ b/src/indexer.cpp @@ -62,22 +62,7 @@ namespace kiwix { /* Destructor */ Indexer::~Indexer() { } - - /* Read the stopwords */ - void Indexer::readStopWords(const string languageCode) { - std::string stopWord; - std::istringstream file(getResource("stopwords/" + languageCode)); - - this->stopWords.clear(); - - while (getline(file, stopWord, '\n')) { - this->stopWords.push_back(stopWord); - } - if (this->verboseFlag) { - std::cout << "Read stop words, lang code:" << languageCode << ", count:" << this->stopWords.size() << std::endl; - } - } #pragma mark - Extractor diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index f636336d0..7e0fab28f 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -69,6 +69,7 @@ std::map read_valuesmap(const std::string &s) { } this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); this->language = this->readableDatabase.get_metadata("language"); + this->stopwords = this->readableDatabase.get_metadata("stopwords"); setup_queryParser(); } @@ -95,6 +96,16 @@ std::map read_valuesmap(const std::string &s) { std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl; } } + + if ( ! stopwords.empty() ) + { + std::string stopWord; + std::istringstream file(this->stopwords); + while (std::getline(file, stopWord, '\n')) { + this->stopper.add(stopWord); + } + queryParser.set_stopper(&(this->stopper)); + } } /* Search strings in the database */