mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #374 from kiwix/new_api_multithread_suggestion
Add new thread safe suggestion API.
This commit is contained in:
commit
f0b037f37f
|
@ -43,6 +43,8 @@ namespace kiwix
|
||||||
* The Reader class is the class who allow to get an entry content from a zim
|
* The Reader class is the class who allow to get an entry content from a zim
|
||||||
* file.
|
* file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
using SuggestionsList_t = std::vector<std::vector<std::string>>;
|
||||||
class Reader
|
class Reader
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -419,6 +421,10 @@ class Reader
|
||||||
*
|
*
|
||||||
* Suggestions are stored in an internal vector and can be retrieved using
|
* Suggestions are stored in an internal vector and can be retrieved using
|
||||||
* `getNextSuggestion` method.
|
* `getNextSuggestion` method.
|
||||||
|
* This method is not thread safe and is deprecated. Use :
|
||||||
|
* bool searchSuggestions(const string& prefix,
|
||||||
|
* unsigned int suggestionsCount,
|
||||||
|
* SuggestionsList_t& results);
|
||||||
*
|
*
|
||||||
* @param prefix The prefix to search.
|
* @param prefix The prefix to search.
|
||||||
* @param suggestionsCount How many suggestions to search for.
|
* @param suggestionsCount How many suggestions to search for.
|
||||||
|
@ -426,12 +432,49 @@ class Reader
|
||||||
* If false, add suggestions to the internal vector
|
* If false, add suggestions to the internal vector
|
||||||
* (until internal vector size is suggestionCount (or no more
|
* (until internal vector size is suggestionCount (or no more
|
||||||
* suggestion))
|
* suggestion))
|
||||||
* @return True if some suggestions where added to the internal vector.
|
* @return True if some suggestions have been added to the internal vector.
|
||||||
*/
|
*/
|
||||||
bool searchSuggestions(const string& prefix,
|
DEPRECATED bool searchSuggestions(const string& prefix,
|
||||||
unsigned int suggestionsCount,
|
unsigned int suggestionsCount,
|
||||||
const bool reset = true);
|
const bool reset = true);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for entries with title starting with prefix (case sensitive).
|
||||||
|
*
|
||||||
|
* Suggestions are added to the `result` vector.
|
||||||
|
*
|
||||||
|
* @param prefix The prefix to search.
|
||||||
|
* @param suggestionsCount How many suggestions to search for.
|
||||||
|
* @param result The vector where to store the suggestions.
|
||||||
|
* @return True if some suggestions have been added to the vector.
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool searchSuggestions(const string& prefix,
|
||||||
|
unsigned int suggestionsCount,
|
||||||
|
SuggestionsList_t& resuls);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for entries for the given prefix.
|
||||||
|
*
|
||||||
|
* If the zim file has a internal fulltext index, the suggestions will be
|
||||||
|
* searched using it.
|
||||||
|
* Else the suggestions will be search using `searchSuggestions` while trying
|
||||||
|
* to be smart about case sensitivity (using `getTitleVariants`).
|
||||||
|
*
|
||||||
|
* In any case, suggestions are stored in an internal vector and can be
|
||||||
|
* retrieved using `getNextSuggestion` method.
|
||||||
|
* The internal vector will be reset.
|
||||||
|
* This method is not thread safe and is deprecated. Use :
|
||||||
|
* bool searchSuggestionsSmart(const string& prefix,
|
||||||
|
* unsigned int suggestionsCount,
|
||||||
|
* SuggestionsList_t& results);
|
||||||
|
*
|
||||||
|
* @param prefix The prefix to search for.
|
||||||
|
* @param suggestionsCount How many suggestions to search for.
|
||||||
|
*/
|
||||||
|
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
|
||||||
|
unsigned int suggestionsCount);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Search for entries for the given prefix.
|
* Search for entries for the given prefix.
|
||||||
*
|
*
|
||||||
|
@ -446,9 +489,13 @@ class Reader
|
||||||
*
|
*
|
||||||
* @param prefix The prefix to search for.
|
* @param prefix The prefix to search for.
|
||||||
* @param suggestionsCount How many suggestions to search for.
|
* @param suggestionsCount How many suggestions to search for.
|
||||||
|
* @param results The vector where to store the suggestions
|
||||||
|
* @return True if some suggestions have been added to the results.
|
||||||
*/
|
*/
|
||||||
bool searchSuggestionsSmart(const string& prefix,
|
bool searchSuggestionsSmart(const string& prefix,
|
||||||
unsigned int suggestionsCount);
|
unsigned int suggestionsCount,
|
||||||
|
SuggestionsList_t& results);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the url exists in the zim file.
|
* Check if the url exists in the zim file.
|
||||||
|
@ -490,7 +537,7 @@ class Reader
|
||||||
* @param[out] title the title of the suggestion.
|
* @param[out] title the title of the suggestion.
|
||||||
* @return True if title has been set.
|
* @return True if title has been set.
|
||||||
*/
|
*/
|
||||||
bool getNextSuggestion(string& title);
|
DEPRECATED bool getNextSuggestion(string& title);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the next suggestion title and url.
|
* Get the next suggestion title and url.
|
||||||
|
@ -499,7 +546,7 @@ class Reader
|
||||||
* @param[out] url the url of the suggestion.
|
* @param[out] url the url of the suggestion.
|
||||||
* @return True if title and url have been set.
|
* @return True if title and url have been set.
|
||||||
*/
|
*/
|
||||||
bool getNextSuggestion(string& title, string& url);
|
DEPRECATED bool getNextSuggestion(string& title, string& url);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get if we can check zim file integrity (has a checksum).
|
* Get if we can check zim file integrity (has a checksum).
|
||||||
|
@ -559,8 +606,8 @@ class Reader
|
||||||
zim::size_type nsICount;
|
zim::size_type nsICount;
|
||||||
std::string zimFilePath;
|
std::string zimFilePath;
|
||||||
|
|
||||||
std::vector<std::vector<std::string>> suggestions;
|
SuggestionsList_t suggestions;
|
||||||
std::vector<std::vector<std::string>>::iterator suggestionsOffset;
|
SuggestionsList_t::iterator suggestionsOffset;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::map<const std::string, unsigned int> parseCounterMetadata() const;
|
std::map<const std::string, unsigned int> parseCounterMetadata() const;
|
||||||
|
|
|
@ -709,12 +709,11 @@ bool Reader::hasFulltextIndex() const
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Search titles by prefix */
|
/* Search titles by prefix */
|
||||||
|
|
||||||
bool Reader::searchSuggestions(const string& prefix,
|
bool Reader::searchSuggestions(const string& prefix,
|
||||||
unsigned int suggestionsCount,
|
unsigned int suggestionsCount,
|
||||||
const bool reset)
|
const bool reset)
|
||||||
{
|
{
|
||||||
bool retVal = false;
|
|
||||||
|
|
||||||
/* Reset the suggestions otherwise check if the suggestions number is less
|
/* Reset the suggestions otherwise check if the suggestions number is less
|
||||||
* than the suggestionsCount */
|
* than the suggestionsCount */
|
||||||
if (reset) {
|
if (reset) {
|
||||||
|
@ -726,6 +725,21 @@ bool Reader::searchSuggestions(const string& prefix,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
|
||||||
|
|
||||||
|
/* Set the cursor to the begining */
|
||||||
|
this->suggestionsOffset = this->suggestions.begin();
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Reader::searchSuggestions(const string& prefix,
|
||||||
|
unsigned int suggestionsCount,
|
||||||
|
SuggestionsList_t& results)
|
||||||
|
{
|
||||||
|
bool retVal = false;
|
||||||
|
|
||||||
/* Return if no prefix */
|
/* Return if no prefix */
|
||||||
if (prefix.size() == 0) {
|
if (prefix.size() == 0) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -734,7 +748,7 @@ bool Reader::searchSuggestions(const string& prefix,
|
||||||
for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
|
for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
|
||||||
articleItr != zimFileHandler->end()
|
articleItr != zimFileHandler->end()
|
||||||
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
|
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
|
||||||
&& this->suggestions.size() < suggestionsCount;
|
&& results.size() < suggestionsCount;
|
||||||
++articleItr) {
|
++articleItr) {
|
||||||
/* Extract the interesting part of article title & url */
|
/* Extract the interesting part of article title & url */
|
||||||
std::string normalizedArticleTitle
|
std::string normalizedArticleTitle
|
||||||
|
@ -754,8 +768,8 @@ bool Reader::searchSuggestions(const string& prefix,
|
||||||
title) */
|
title) */
|
||||||
bool insert = true;
|
bool insert = true;
|
||||||
std::vector<std::vector<std::string>>::iterator suggestionItr;
|
std::vector<std::vector<std::string>>::iterator suggestionItr;
|
||||||
for (suggestionItr = this->suggestions.begin();
|
for (suggestionItr = results.begin();
|
||||||
suggestionItr != this->suggestions.end();
|
suggestionItr != results.end();
|
||||||
suggestionItr++) {
|
suggestionItr++) {
|
||||||
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
|
int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
|
||||||
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
|
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
|
||||||
|
@ -772,16 +786,13 @@ bool Reader::searchSuggestions(const string& prefix,
|
||||||
suggestion.push_back(articleItr->getTitle());
|
suggestion.push_back(articleItr->getTitle());
|
||||||
suggestion.push_back(articleFinalUrl);
|
suggestion.push_back(articleFinalUrl);
|
||||||
suggestion.push_back(normalizedArticleTitle);
|
suggestion.push_back(normalizedArticleTitle);
|
||||||
this->suggestions.insert(suggestionItr, suggestion);
|
results.insert(suggestionItr, suggestion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Suggestions where found */
|
/* Suggestions where found */
|
||||||
retVal = true;
|
retVal = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the cursor to the begining */
|
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
|
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -796,15 +807,28 @@ std::vector<std::string> Reader::getTitleVariants(
|
||||||
return variants;
|
return variants;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try also a few variations of the prefix to have better results */
|
|
||||||
bool Reader::searchSuggestionsSmart(const string& prefix,
|
bool Reader::searchSuggestionsSmart(const string& prefix,
|
||||||
unsigned int suggestionsCount)
|
unsigned int suggestionsCount)
|
||||||
|
{
|
||||||
|
this->suggestions.clear();
|
||||||
|
this->suggestionsOffset = this->suggestions.begin();
|
||||||
|
|
||||||
|
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
|
||||||
|
|
||||||
|
this->suggestionsOffset = this->suggestions.begin();
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try also a few variations of the prefix to have better results */
|
||||||
|
bool Reader::searchSuggestionsSmart(const string& prefix,
|
||||||
|
unsigned int suggestionsCount,
|
||||||
|
SuggestionsList_t& results)
|
||||||
{
|
{
|
||||||
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
||||||
bool retVal = false;
|
bool retVal = false;
|
||||||
|
|
||||||
this->suggestions.clear();
|
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
/* Try to search in the title using fulltext search database */
|
/* Try to search in the title using fulltext search database */
|
||||||
const auto suggestionSearch
|
const auto suggestionSearch
|
||||||
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
|
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
|
||||||
|
@ -820,15 +844,14 @@ bool Reader::searchSuggestionsSmart(const string& prefix,
|
||||||
suggestion.push_back(current->getTitle());
|
suggestion.push_back(current->getTitle());
|
||||||
suggestion.push_back("/A/" + current->getUrl());
|
suggestion.push_back("/A/" + current->getUrl());
|
||||||
suggestion.push_back(kiwix::normalize(current->getTitle()));
|
suggestion.push_back(kiwix::normalize(current->getTitle()));
|
||||||
this->suggestions.push_back(suggestion);
|
results.push_back(suggestion);
|
||||||
}
|
}
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
retVal = true;
|
retVal = true;
|
||||||
} else {
|
} else {
|
||||||
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
||||||
variantsItr != variants.end();
|
variantsItr != variants.end();
|
||||||
variantsItr++) {
|
variantsItr++) {
|
||||||
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false)
|
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
|
||||||
|| retVal;
|
|| retVal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -543,7 +543,6 @@ Response InternalServer::handle_suggest(const RequestContext& request)
|
||||||
std::string mimeType;
|
std::string mimeType;
|
||||||
unsigned int maxSuggestionCount = 10;
|
unsigned int maxSuggestionCount = 10;
|
||||||
unsigned int suggestionCount = 0;
|
unsigned int suggestionCount = 0;
|
||||||
std::string suggestion;
|
|
||||||
|
|
||||||
std::string bookName;
|
std::string bookName;
|
||||||
std::string bookId;
|
std::string bookId;
|
||||||
|
@ -567,11 +566,12 @@ Response InternalServer::handle_suggest(const RequestContext& request)
|
||||||
bool first = true;
|
bool first = true;
|
||||||
if (reader != nullptr) {
|
if (reader != nullptr) {
|
||||||
/* Get the suggestions */
|
/* Get the suggestions */
|
||||||
reader->searchSuggestionsSmart(term, maxSuggestionCount);
|
SuggestionsList_t suggestions;
|
||||||
while (reader->getNextSuggestion(suggestion)) {
|
reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions);
|
||||||
|
for(auto& suggestion:suggestions) {
|
||||||
MustacheData result;
|
MustacheData result;
|
||||||
result.set("label", suggestion);
|
result.set("label", suggestion[0]);
|
||||||
result.set("value", suggestion);
|
result.set("value", suggestion[0]);
|
||||||
result.set("first", first);
|
result.set("first", first);
|
||||||
first = false;
|
first = false;
|
||||||
results.push_back(result);
|
results.push_back(result);
|
||||||
|
|
|
@ -355,9 +355,12 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env,
|
||||||
unsigned int cCount = jni2c(count, env);
|
unsigned int cCount = jni2c(count, env);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||||
if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
|
if (READER->searchSuggestionsSmart(cPrefix, cCount)) {
|
||||||
retVal = JNI_TRUE;
|
retVal = JNI_TRUE;
|
||||||
}
|
}
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
} catch (std::exception& e) {
|
} catch (std::exception& e) {
|
||||||
LOG("Unable to get search results for pattern: %s", cPrefix.c_str());
|
LOG("Unable to get search results for pattern: %s", cPrefix.c_str());
|
||||||
LOG(e.what());
|
LOG(e.what());
|
||||||
|
@ -377,11 +380,14 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env,
|
||||||
std::string cUrl;
|
std::string cUrl;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||||
if (READER->getNextSuggestion(cTitle, cUrl)) {
|
if (READER->getNextSuggestion(cTitle, cUrl)) {
|
||||||
setStringObjValue(cTitle, titleObj, env);
|
setStringObjValue(cTitle, titleObj, env);
|
||||||
setStringObjValue(cUrl, urlObj, env);
|
setStringObjValue(cUrl, urlObj, env);
|
||||||
retVal = JNI_TRUE;
|
retVal = JNI_TRUE;
|
||||||
}
|
}
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
} catch (std::exception& e) {
|
} catch (std::exception& e) {
|
||||||
LOG("Unable to get next suggestion");
|
LOG("Unable to get next suggestion");
|
||||||
LOG(e.what());
|
LOG(e.what());
|
||||||
|
|
Loading…
Reference in New Issue