Better suggestion search

This commit is contained in:
Emmanuel Engelhart 2015-08-16 17:18:29 +02:00
parent 7903458e38
commit e829d7428d
4 changed files with 67 additions and 33 deletions

View File

@ -503,8 +503,6 @@ namespace kiwix {
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
bool retVal = false; bool retVal = false;
zim::File::const_iterator articleItr; zim::File::const_iterator articleItr;
std::vector<std::string>::iterator suggestionItr;
int result;
/* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */ /* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */
if (reset) { if (reset) {
@ -515,37 +513,51 @@ namespace kiwix {
} }
} }
if (prefix.size()) { /* Return if no prefix */
for (articleItr = zimFileHandler->findByTitle('A', prefix); if (prefix.size() == 0) {
articleItr != zimFileHandler->end() && return false;
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && }
this->suggestions.size() < suggestionsCount ;
++articleItr) {
if (this->suggestions.size() == 0) { for (articleItr = zimFileHandler->findByTitle('A', prefix);
this->suggestions.push_back(articleItr->getTitle()); articleItr != zimFileHandler->end() &&
} else if (this->suggestions.size() < suggestionsCount) { articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
for (suggestionItr = this->suggestions.begin() ; this->suggestions.size() < suggestionsCount ;
suggestionItr != this->suggestions.end(); ++articleItr) {
++suggestionItr) {
result = articleItr->getTitle().compare(*suggestionItr); /* Extract the interesting part of article title & url */
if (result < 0) { std::string normalizedArticleTitle = kiwix::normalize(articleItr->getTitle());
this->suggestions.insert(suggestionItr, articleItr->getTitle()); std::string articleFinalUrl = "/A/"+articleItr->getUrl();
break; if (articleItr->isRedirect()) {
} else if (result == 0) { zim::Article article = *articleItr;
break; unsigned int loopCounter = 0;
} while (article.isRedirect() && loopCounter++<42) {
} article = article.getRedirectArticle();
}
if (suggestionItr == this->suggestions.end()) { articleFinalUrl = "/A/"+article.getUrl();
this->suggestions.push_back(articleItr->getTitle());
}
}
/* Suggestions where found */
retVal = true;
} }
/* Go through all already found suggestions and skip if this
article is already in the suggestions list (with an other
title) */
bool insert = true;
std::vector<std::vector<std::string>>::iterator suggestionItr;
for (suggestionItr = this->suggestions.begin(); suggestionItr != this->suggestions.end(); suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
insert = false;
break;
} else if (result < 0) {
break;
}
}
/* Insert if possible */
if (insert) {
this->suggestions.insert(suggestionItr, std::vector<std::string>{articleItr->getTitle(), articleFinalUrl, normalizedArticleTitle});
}
/* Suggestions where found */
retVal = true;
} }
/* Set the cursor to the begining */ /* Set the cursor to the begining */
@ -582,7 +594,22 @@ namespace kiwix {
bool Reader::getNextSuggestion(string &title) { bool Reader::getNextSuggestion(string &title) {
if (this->suggestionsOffset != this->suggestions.end()) { if (this->suggestionsOffset != this->suggestions.end()) {
/* title */ /* title */
title = *(this->suggestionsOffset); title = (*(this->suggestionsOffset))[0];
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
bool Reader::getNextSuggestion(string &title, string &url) {
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset))[0];
url = (*(this->suggestionsOffset))[1];
/* increment the cursor for the next call */ /* increment the cursor for the next call */
this->suggestionsOffset++; this->suggestionsOffset++;

View File

@ -71,6 +71,7 @@ namespace kiwix {
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount); bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
std::vector<std::string> getTitleVariants(const std::string &title); std::vector<std::string> getTitleVariants(const std::string &title);
bool getNextSuggestion(string &title); bool getNextSuggestion(string &title);
bool getNextSuggestion(string &title, string &url);
bool canCheckIntegrity(); bool canCheckIntegrity();
bool isCorrupted(); bool isCorrupted();
bool parseUrl(const string &url, char *ns, string &title); bool parseUrl(const string &url, char *ns, string &title);
@ -85,8 +86,8 @@ namespace kiwix {
zim::size_type nsACount; zim::size_type nsACount;
zim::size_type nsICount; zim::size_type nsICount;
std::vector<std::string> suggestions; std::vector<std::vector<std::string>> suggestions;
std::vector<std::string>::iterator suggestionsOffset; std::vector<std::vector<std::string>>::iterator suggestionsOffset;
private: private:
std::map<std::string, unsigned int> parseCounterMetadata(); std::map<std::string, unsigned int> parseCounterMetadata();

View File

@ -266,3 +266,7 @@ std::string kiwix::toTitle (const std::string &word) {
return result; return result;
} }
std::string kiwix::normalize (const std::string &word) {
return kiwix::removeAccents(kiwix::lcAll(word));
}

View File

@ -65,6 +65,8 @@ namespace kiwix {
std::string ucFirst(const std::string &word); std::string ucFirst(const std::string &word);
std::string lcFirst(const std::string &word); std::string lcFirst(const std::string &word);
std::string toTitle(const std::string &word); std::string toTitle(const std::string &word);
std::string normalize(const std::string &word);
} }
#endif #endif