mirror of https://github.com/kiwix/libkiwix.git
Use the stop words stored in the database to configure the queryparser.
To properly search in the xapian database, we need to use the same stop words that the ones used during the indexing.
This commit is contained in:
parent
998db0eb2b
commit
b8d950c1a0
|
@ -78,10 +78,6 @@ namespace kiwix {
|
|||
virtual void flush() = 0;
|
||||
virtual void indexingPostlude(const string indexPath) = 0;
|
||||
|
||||
/* Stop words */
|
||||
std::vector<std::string> stopWords;
|
||||
void readStopWords(const string languageCode);
|
||||
|
||||
/* Others */
|
||||
unsigned int countWords(const string &text);
|
||||
|
||||
|
|
|
@ -75,8 +75,10 @@ namespace kiwix {
|
|||
Reader* reader;
|
||||
Xapian::Database readableDatabase;
|
||||
std::string language;
|
||||
std::string stopwords;
|
||||
Xapian::QueryParser queryParser;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::MSet results;
|
||||
Xapian::MSetIterator current_result;
|
||||
std::map<std::string, int> valuesmap;
|
||||
|
|
|
@ -62,22 +62,7 @@ namespace kiwix {
|
|||
/* Destructor */
|
||||
Indexer::~Indexer() {
|
||||
}
|
||||
|
||||
/* Read the stopwords */
|
||||
void Indexer::readStopWords(const string languageCode) {
|
||||
std::string stopWord;
|
||||
std::istringstream file(getResource("stopwords/" + languageCode));
|
||||
|
||||
this->stopWords.clear();
|
||||
|
||||
while (getline(file, stopWord, '\n')) {
|
||||
this->stopWords.push_back(stopWord);
|
||||
}
|
||||
|
||||
if (this->verboseFlag) {
|
||||
std::cout << "Read stop words, lang code:" << languageCode << ", count:" << this->stopWords.size() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Extractor
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
|||
}
|
||||
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
|
||||
this->language = this->readableDatabase.get_metadata("language");
|
||||
this->stopwords = this->readableDatabase.get_metadata("stopwords");
|
||||
setup_queryParser();
|
||||
}
|
||||
|
||||
|
@ -95,6 +96,16 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
|||
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! stopwords.empty() )
|
||||
{
|
||||
std::string stopWord;
|
||||
std::istringstream file(this->stopwords);
|
||||
while (std::getline(file, stopWord, '\n')) {
|
||||
this->stopper.add(stopWord);
|
||||
}
|
||||
queryParser.set_stopper(&(this->stopper));
|
||||
}
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
|
|
Loading…
Reference in New Issue