Merge pull request #374 from kiwix/new_api_multithread_suggestion

Add new thread safe suggestion API.
This commit is contained in:
Matthieu Gautier 2020-07-02 14:12:12 +02:00 committed by GitHub
commit f0b037f37f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 104 additions and 28 deletions

View File

@ -43,6 +43,8 @@ namespace kiwix
* The Reader class is the class who allow to get an entry content from a zim * The Reader class is the class who allow to get an entry content from a zim
* file. * file.
*/ */
using SuggestionsList_t = std::vector<std::vector<std::string>>;
class Reader class Reader
{ {
public: public:
@ -419,6 +421,10 @@ class Reader
* *
* Suggestions are stored in an internal vector and can be retrieved using * Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method. * `getNextSuggestion` method.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestions(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
* *
* @param prefix The prefix to search. * @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for. * @param suggestionsCount How many suggestions to search for.
@ -426,12 +432,49 @@ class Reader
* If false, add suggestions to the internal vector * If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more * (until internal vector size is suggestionCount (or no more
* suggestion)) * suggestion))
* @return True if some suggestions where added to the internal vector. * @return True if some suggestions have been added to the internal vector.
*/ */
bool searchSuggestions(const string& prefix, DEPRECATED bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount, unsigned int suggestionsCount,
const bool reset = true); const bool reset = true);
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are added to the `result` vector.
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param result The vector where to store the suggestions.
* @return True if some suggestions have been added to the vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& resuls);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestionsSmart(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
*/
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/** /**
* Search for entries for the given prefix. * Search for entries for the given prefix.
* *
@ -446,9 +489,13 @@ class Reader
* *
* @param prefix The prefix to search for. * @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for. * @param suggestionsCount How many suggestions to search for.
* @param results The vector where to store the suggestions
* @return True if some suggestions have been added to the results.
*/ */
bool searchSuggestionsSmart(const string& prefix, bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount); unsigned int suggestionsCount,
SuggestionsList_t& results);
/** /**
* Check if the url exists in the zim file. * Check if the url exists in the zim file.
@ -490,7 +537,7 @@ class Reader
* @param[out] title the title of the suggestion. * @param[out] title the title of the suggestion.
* @return True if title has been set. * @return True if title has been set.
*/ */
bool getNextSuggestion(string& title); DEPRECATED bool getNextSuggestion(string& title);
/** /**
* Get the next suggestion title and url. * Get the next suggestion title and url.
@ -499,7 +546,7 @@ class Reader
* @param[out] url the url of the suggestion. * @param[out] url the url of the suggestion.
* @return True if title and url have been set. * @return True if title and url have been set.
*/ */
bool getNextSuggestion(string& title, string& url); DEPRECATED bool getNextSuggestion(string& title, string& url);
/** /**
* Get if we can check zim file integrity (has a checksum). * Get if we can check zim file integrity (has a checksum).
@ -559,8 +606,8 @@ class Reader
zim::size_type nsICount; zim::size_type nsICount;
std::string zimFilePath; std::string zimFilePath;
std::vector<std::vector<std::string>> suggestions; SuggestionsList_t suggestions;
std::vector<std::vector<std::string>>::iterator suggestionsOffset; SuggestionsList_t::iterator suggestionsOffset;
private: private:
std::map<const std::string, unsigned int> parseCounterMetadata() const; std::map<const std::string, unsigned int> parseCounterMetadata() const;

View File

@ -709,12 +709,11 @@ bool Reader::hasFulltextIndex() const
} }
/* Search titles by prefix */ /* Search titles by prefix */
bool Reader::searchSuggestions(const string& prefix, bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount, unsigned int suggestionsCount,
const bool reset) const bool reset)
{ {
bool retVal = false;
/* Reset the suggestions otherwise check if the suggestions number is less /* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */ * than the suggestionsCount */
if (reset) { if (reset) {
@ -726,6 +725,21 @@ bool Reader::searchSuggestions(const string& prefix,
} }
} }
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
bool retVal = false;
/* Return if no prefix */ /* Return if no prefix */
if (prefix.size() == 0) { if (prefix.size() == 0) {
return false; return false;
@ -734,7 +748,7 @@ bool Reader::searchSuggestions(const string& prefix,
for (auto articleItr = zimFileHandler->findByTitle('A', prefix); for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end() articleItr != zimFileHandler->end()
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
&& this->suggestions.size() < suggestionsCount; && results.size() < suggestionsCount;
++articleItr) { ++articleItr) {
/* Extract the interesting part of article title & url */ /* Extract the interesting part of article title & url */
std::string normalizedArticleTitle std::string normalizedArticleTitle
@ -754,8 +768,8 @@ bool Reader::searchSuggestions(const string& prefix,
title) */ title) */
bool insert = true; bool insert = true;
std::vector<std::vector<std::string>>::iterator suggestionItr; std::vector<std::vector<std::string>>::iterator suggestionItr;
for (suggestionItr = this->suggestions.begin(); for (suggestionItr = results.begin();
suggestionItr != this->suggestions.end(); suggestionItr != results.end();
suggestionItr++) { suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr)[2]); int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) { if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
@ -772,16 +786,13 @@ bool Reader::searchSuggestions(const string& prefix,
suggestion.push_back(articleItr->getTitle()); suggestion.push_back(articleItr->getTitle());
suggestion.push_back(articleFinalUrl); suggestion.push_back(articleFinalUrl);
suggestion.push_back(normalizedArticleTitle); suggestion.push_back(normalizedArticleTitle);
this->suggestions.insert(suggestionItr, suggestion); results.insert(suggestionItr, suggestion);
} }
/* Suggestions where found */ /* Suggestions where found */
retVal = true; retVal = true;
} }
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return retVal; return retVal;
} }
@ -796,15 +807,28 @@ std::vector<std::string> Reader::getTitleVariants(
return variants; return variants;
} }
/* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string& prefix, bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount) unsigned int suggestionsCount)
{
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
/* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{ {
std::vector<std::string> variants = this->getTitleVariants(prefix); std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal = false; bool retVal = false;
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
/* Try to search in the title using fulltext search database */ /* Try to search in the title using fulltext search database */
const auto suggestionSearch const auto suggestionSearch
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount); = this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
@ -820,15 +844,14 @@ bool Reader::searchSuggestionsSmart(const string& prefix,
suggestion.push_back(current->getTitle()); suggestion.push_back(current->getTitle());
suggestion.push_back("/A/" + current->getUrl()); suggestion.push_back("/A/" + current->getUrl());
suggestion.push_back(kiwix::normalize(current->getTitle())); suggestion.push_back(kiwix::normalize(current->getTitle()));
this->suggestions.push_back(suggestion); results.push_back(suggestion);
} }
this->suggestionsOffset = this->suggestions.begin();
retVal = true; retVal = true;
} else { } else {
for (std::vector<std::string>::iterator variantsItr = variants.begin(); for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end(); variantsItr != variants.end();
variantsItr++) { variantsItr++) {
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false) retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
|| retVal; || retVal;
} }
} }

View File

@ -543,7 +543,6 @@ Response InternalServer::handle_suggest(const RequestContext& request)
std::string mimeType; std::string mimeType;
unsigned int maxSuggestionCount = 10; unsigned int maxSuggestionCount = 10;
unsigned int suggestionCount = 0; unsigned int suggestionCount = 0;
std::string suggestion;
std::string bookName; std::string bookName;
std::string bookId; std::string bookId;
@ -567,11 +566,12 @@ Response InternalServer::handle_suggest(const RequestContext& request)
bool first = true; bool first = true;
if (reader != nullptr) { if (reader != nullptr) {
/* Get the suggestions */ /* Get the suggestions */
reader->searchSuggestionsSmart(term, maxSuggestionCount); SuggestionsList_t suggestions;
while (reader->getNextSuggestion(suggestion)) { reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions);
for(auto& suggestion:suggestions) {
MustacheData result; MustacheData result;
result.set("label", suggestion); result.set("label", suggestion[0]);
result.set("value", suggestion); result.set("value", suggestion[0]);
result.set("first", first); result.set("first", first);
first = false; first = false;
results.push_back(result); results.push_back(result);

View File

@ -355,9 +355,12 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env,
unsigned int cCount = jni2c(count, env); unsigned int cCount = jni2c(count, env);
try { try {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (READER->searchSuggestionsSmart(cPrefix, cCount)) { if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
retVal = JNI_TRUE; retVal = JNI_TRUE;
} }
#pragma GCC diagnostic pop
} catch (std::exception& e) { } catch (std::exception& e) {
LOG("Unable to get search results for pattern: %s", cPrefix.c_str()); LOG("Unable to get search results for pattern: %s", cPrefix.c_str());
LOG(e.what()); LOG(e.what());
@ -377,11 +380,14 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env,
std::string cUrl; std::string cUrl;
try { try {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (READER->getNextSuggestion(cTitle, cUrl)) { if (READER->getNextSuggestion(cTitle, cUrl)) {
setStringObjValue(cTitle, titleObj, env); setStringObjValue(cTitle, titleObj, env);
setStringObjValue(cUrl, urlObj, env); setStringObjValue(cUrl, urlObj, env);
retVal = JNI_TRUE; retVal = JNI_TRUE;
} }
#pragma GCC diagnostic pop
} catch (std::exception& e) { } catch (std::exception& e) {
LOG("Unable to get next suggestion"); LOG("Unable to get next suggestion");
LOG(e.what()); LOG(e.what());