diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 79c84feaf..ca9ed869c 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -503,8 +503,6 @@ namespace kiwix { bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool retVal = false; zim::File::const_iterator articleItr; - std::vector::iterator suggestionItr; - int result; /* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */ if (reset) { @@ -515,37 +513,51 @@ namespace kiwix { } } - if (prefix.size()) { - for (articleItr = zimFileHandler->findByTitle('A', prefix); - articleItr != zimFileHandler->end() && - articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && - this->suggestions.size() < suggestionsCount ; - ++articleItr) { + /* Return if no prefix */ + if (prefix.size() == 0) { + return false; + } - if (this->suggestions.size() == 0) { - this->suggestions.push_back(articleItr->getTitle()); - } else if (this->suggestions.size() < suggestionsCount) { - for (suggestionItr = this->suggestions.begin() ; - suggestionItr != this->suggestions.end(); - ++suggestionItr) { + for (articleItr = zimFileHandler->findByTitle('A', prefix); + articleItr != zimFileHandler->end() && + articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && + this->suggestions.size() < suggestionsCount ; + ++articleItr) { - result = articleItr->getTitle().compare(*suggestionItr); - if (result < 0) { - this->suggestions.insert(suggestionItr, articleItr->getTitle()); - break; - } else if (result == 0) { - break; - } - } - - if (suggestionItr == this->suggestions.end()) { - this->suggestions.push_back(articleItr->getTitle()); - } - } - - /* Suggestions where found */ - retVal = true; + /* Extract the interesting part of article title & url */ + std::string normalizedArticleTitle = kiwix::normalize(articleItr->getTitle()); + std::string articleFinalUrl = "/A/"+articleItr->getUrl(); + if (articleItr->isRedirect()) { + zim::Article article = *articleItr; + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++<42) { + article = article.getRedirectArticle(); + } + articleFinalUrl = "/A/"+article.getUrl(); } + + /* Go through all already found suggestions and skip if this + article is already in the suggestions list (with an other + title) */ + bool insert = true; + std::vector>::iterator suggestionItr; + for (suggestionItr = this->suggestions.begin(); suggestionItr != this->suggestions.end(); suggestionItr++) { + int result = normalizedArticleTitle.compare((*suggestionItr)[2]); + if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) { + insert = false; + break; + } else if (result < 0) { + break; + } + } + + /* Insert if possible */ + if (insert) { + this->suggestions.insert(suggestionItr, std::vector{articleItr->getTitle(), articleFinalUrl, normalizedArticleTitle}); + } + + /* Suggestions where found */ + retVal = true; } /* Set the cursor to the begining */ @@ -582,7 +594,22 @@ namespace kiwix { bool Reader::getNextSuggestion(string &title) { if (this->suggestionsOffset != this->suggestions.end()) { /* title */ - title = *(this->suggestionsOffset); + title = (*(this->suggestionsOffset))[0]; + + /* increment the cursor for the next call */ + this->suggestionsOffset++; + + return true; + } + + return false; + } + + bool Reader::getNextSuggestion(string &title, string &url) { + if (this->suggestionsOffset != this->suggestions.end()) { + /* title */ + title = (*(this->suggestionsOffset))[0]; + url = (*(this->suggestionsOffset))[1]; /* increment the cursor for the next call */ this->suggestionsOffset++; diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index 8dd77f84a..f21173edd 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -71,6 +71,7 @@ namespace kiwix { bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount); std::vector getTitleVariants(const std::string &title); bool getNextSuggestion(string &title); + bool getNextSuggestion(string &title, string &url); bool canCheckIntegrity(); bool isCorrupted(); bool parseUrl(const string &url, char *ns, string &title); @@ -85,8 +86,8 @@ namespace kiwix { zim::size_type nsACount; zim::size_type nsICount; - std::vector suggestions; - std::vector::iterator suggestionsOffset; + std::vector> suggestions; + std::vector>::iterator suggestionsOffset; private: std::map parseCounterMetadata(); diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 0b0ceeeba..a0f938e4d 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -266,3 +266,7 @@ std::string kiwix::toTitle (const std::string &word) { return result; } + +std::string kiwix::normalize (const std::string &word) { + return kiwix::removeAccents(kiwix::lcAll(word)); +} diff --git a/src/common/stringTools.h b/src/common/stringTools.h index 2a2367b29..250f9143d 100644 --- a/src/common/stringTools.h +++ b/src/common/stringTools.h @@ -65,6 +65,8 @@ namespace kiwix { std::string ucFirst(const std::string &word); std::string lcFirst(const std::string &word); std::string toTitle(const std::string &word); + + std::string normalize(const std::string &word); } #endif