Merge pull request #567 from kiwix/suggestion_api_fix

This commit is contained in:
Kelson 2021-08-14 19:21:29 +02:00 committed by GitHub
commit e9eaadde9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 123 additions and 49 deletions

View File

@ -52,6 +52,7 @@ class Result
}; };
struct SearcherInternal; struct SearcherInternal;
struct SuggestionInternal;
/** /**
* The Searcher class is reponsible to do different kind of search using the * The Searcher class is reponsible to do different kind of search using the
* fulltext index. * fulltext index.
@ -160,6 +161,7 @@ class Searcher
std::vector<Reader*> readers; std::vector<Reader*> readers;
std::unique_ptr<SearcherInternal> internal; std::unique_ptr<SearcherInternal> internal;
std::unique_ptr<SuggestionInternal> suggestionInternal;
std::string searchPattern; std::string searchPattern;
unsigned int estimatedResultCount; unsigned int estimatedResultCount;
unsigned int resultStart; unsigned int resultStart;

View File

@ -21,6 +21,7 @@
#include <time.h> #include <time.h>
#include <zim/search.h> #include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/item.h> #include <zim/item.h>
#include <zim/error.h> #include <zim/error.h>
@ -377,35 +378,36 @@ bool Reader::searchSuggestionsSmart(const string& prefix,
SuggestionsList_t& results) SuggestionsList_t& results)
{ {
std::vector<std::string> variants = this->getTitleVariants(prefix); std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal = false;
/* Try to search in the title using fulltext search database */ auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive);
auto suggestionSearcher = zim::Searcher(*zimArchive); if (zimArchive->hasTitleIndex()) {
zim::Query suggestionQuery; auto suggestionSearch = suggestionSearcher.suggest(prefix);
suggestionQuery.setQuery(prefix, true);
auto suggestionSearch = suggestionSearcher.search(suggestionQuery);
if (suggestionSearch.getEstimatedMatches()) {
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount); const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
for (auto current = suggestions.begin(); for (auto current : suggestions) {
current != suggestions.end();
current++) {
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()), SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet()); current.getPath(), current.getSnippet());
results.push_back(suggestion); results.push_back(suggestion);
} }
retVal = true;
} else { } else {
// Check some of the variants of the prefix
for (std::vector<std::string>::iterator variantsItr = variants.begin(); for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end(); variantsItr != variants.end();
variantsItr++) { variantsItr++) {
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results) auto suggestionSearch = suggestionSearcher.suggest(*variantsItr);
|| retVal; for (auto current : suggestionSearch.getResults(0, suggestionsCount)) {
if (results.size() >= suggestionsCount) {
break;
}
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
} }
} }
return retVal; return results.size() > 0;
} }
/* Get next suggestion */ /* Get next suggestion */

View File

@ -23,6 +23,7 @@
#include "reader.h" #include "reader.h"
#include <zim/search.h> #include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp> #include <mustache.hpp>
#include <cmath> #include <cmath>
@ -37,6 +38,7 @@ class _Result : public Result
{ {
public: public:
_Result(zim::SearchResultSet::iterator iterator); _Result(zim::SearchResultSet::iterator iterator);
_Result(SuggestionItem suggestionItem);
virtual ~_Result(){}; virtual ~_Result(){};
virtual std::string get_url(); virtual std::string get_url();
@ -50,6 +52,8 @@ class _Result : public Result
private: private:
zim::SearchResultSet::iterator iterator; zim::SearchResultSet::iterator iterator;
SuggestionItem suggestionItem;
bool isSuggestion;
}; };
struct SearcherInternal : zim::SearchResultSet { struct SearcherInternal : zim::SearchResultSet {
@ -62,6 +66,14 @@ struct SearcherInternal : zim::SearchResultSet {
zim::SearchResultSet::iterator current_iterator; zim::SearchResultSet::iterator current_iterator;
}; };
struct SuggestionInternal : zim::SuggestionResultSet {
explicit SuggestionInternal(const zim::SuggestionResultSet& srs)
: zim::SuggestionResultSet(srs),
currentIterator(srs.begin()) {}
zim::SuggestionResultSet::iterator currentIterator;
};
/* Constructor */ /* Constructor */
Searcher::Searcher() Searcher::Searcher()
: searchPattern(""), : searchPattern(""),
@ -119,9 +131,9 @@ void Searcher::search(const std::string& search,
} }
} }
zim::Searcher searcher(archives); zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query; zim::Query query;
query.setQuery(unaccentedSearch, false); query.setQuery(unaccentedSearch);
query.setVerbose(verbose);
zim::Search search = searcher.search(query); zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount))); internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches(); this->estimatedResultCount = search.getEstimatedMatches();
@ -160,9 +172,9 @@ void Searcher::geo_search(float latitude, float longitude, float distance,
archives.push_back(*(*current)->getZimArchive()); archives.push_back(*(*current)->getZimArchive());
} }
zim::Searcher searcher(archives); zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query; zim::Query query;
query.setVerbose(verbose); query.setQuery("");
query.setQuery("", false);
query.setGeorange(latitude, longitude, distance); query.setGeorange(latitude, longitude, distance);
zim::Search search = searcher.search(query); zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount))); internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
@ -179,11 +191,21 @@ void Searcher::restart_search()
Result* Searcher::getNextResult() Result* Searcher::getNextResult()
{ {
if (internal.get() && if (internal.get() && internal->current_iterator != internal->end()) {
internal->current_iterator != internal->end()) {
Result* result = new _Result(internal->current_iterator); Result* result = new _Result(internal->current_iterator);
internal->current_iterator++; internal->current_iterator++;
return result; return result;
} else if (suggestionInternal.get() &&
suggestionInternal->currentIterator != suggestionInternal->end()) {
SuggestionItem item(
suggestionInternal->currentIterator->getTitle(),
normalize(suggestionInternal->currentIterator->getTitle()),
suggestionInternal->currentIterator->getPath(),
suggestionInternal->currentIterator->getSnippet()
);
Result* result = new _Result(item);
suggestionInternal->currentIterator++;
return result;
} }
return NULL; return NULL;
} }
@ -209,17 +231,12 @@ void Searcher::suggestions(std::string& searchPattern, const bool verbose)
this->maxResultCount = 10; this->maxResultCount = 10;
string unaccentedSearch = removeAccents(searchPattern); string unaccentedSearch = removeAccents(searchPattern);
std::vector<zim::Archive> archives; // Multizim suggestion is not supported as of now! taking only one archive
for (auto current = this->readers.begin(); current != this->readers.end(); zim::Archive archive = *(*this->readers.begin())->getZimArchive();
current++) { zim::SuggestionSearcher searcher(archive);
archives.push_back(*(*current)->getZimArchive()); searcher.setVerbose(verbose);
} zim::SuggestionSearch search = searcher.suggest(searchPattern);
zim::Searcher searcher(archives); suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount)));
zim::Query query;
query.setVerbose(verbose);
query.setQuery(unaccentedSearch, true);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches(); this->estimatedResultCount = search.getEstimatedMatches();
} }
@ -235,40 +252,69 @@ zim::SearchResultSet Searcher::getSearchResultSet()
} }
_Result::_Result(zim::SearchResultSet::iterator iterator) _Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator) : iterator(iterator),
{ suggestionItem("", "", ""),
} isSuggestion(false)
{}
_Result::_Result(SuggestionItem item)
: iterator(),
suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()),
isSuggestion(true)
{}
std::string _Result::get_url() std::string _Result::get_url()
{ {
if (isSuggestion) {
return suggestionItem.getPath();
}
return iterator.getPath(); return iterator.getPath();
} }
std::string _Result::get_title() std::string _Result::get_title()
{ {
if (isSuggestion) {
return suggestionItem.getTitle();
}
return iterator.getTitle(); return iterator.getTitle();
} }
int _Result::get_score() int _Result::get_score()
{ {
if (isSuggestion) {
return 0;
}
return iterator.getScore(); return iterator.getScore();
} }
std::string _Result::get_snippet() std::string _Result::get_snippet()
{ {
if (isSuggestion) {
return suggestionItem.getSnippet();
}
return iterator.getSnippet(); return iterator.getSnippet();
} }
std::string _Result::get_content() std::string _Result::get_content()
{ {
if (isSuggestion) return "";
return iterator->getItem(true).getData(); return iterator->getItem(true).getData();
} }
int _Result::get_size() int _Result::get_size()
{ {
if (isSuggestion) {
return 0;
}
return iterator.getSize(); return iterator.getSize();
} }
int _Result::get_wordCount() int _Result::get_wordCount()
{ {
if (isSuggestion) {
return 0;
}
return iterator.getWordCount(); return iterator.getWordCount();
} }
std::string _Result::get_zimId() std::string _Result::get_zimId()
{ {
if (isSuggestion) {
return "";
}
std::ostringstream s; std::ostringstream s;
s << iterator.getZimId(); s << iterator.getZimId();
return s.str(); return s.str();

View File

@ -58,6 +58,7 @@ extern "C" {
#include <zim/uuid.h> #include <zim/uuid.h>
#include <zim/error.h> #include <zim/error.h>
#include <zim/search.h> #include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/entry.h> #include <zim/entry.h>
#include <zim/item.h> #include <zim/item.h>
@ -347,14 +348,12 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
const std::string& queryString, int suggestionCount) const std::string& queryString, int suggestionCount)
{ {
SuggestionsList_t suggestions; SuggestionsList_t suggestions;
auto searcher = zim::SuggestionSearcher(*archive);
if (archive->hasTitleIndex()) { if (archive->hasTitleIndex()) {
auto searcher = zim::Searcher(*archive); auto search = searcher.suggest(queryString);
zim::Query suggestionQuery; auto srs = search.getResults(0, suggestionCount);
suggestionQuery.setQuery(queryString, true);
auto suggestionSearch = searcher.search(suggestionQuery);
auto suggestionResult = suggestionSearch.getResults(0, suggestionCount);
for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()), SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath(), it.getSnippet()); it.getPath(), it.getSnippet());
suggestions.push_back(suggestion); suggestions.push_back(suggestion);
@ -364,9 +363,11 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
std::vector<std::string> variants = getTitleVariants(queryString); std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0; int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
for (auto& entry: archive->findByTitle(*it)) { auto search = searcher.suggest(queryString);
SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()), auto srs = search.getResults(0, suggestionCount);
entry.getPath()); for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath());
suggestions.push_back(suggestion); suggestions.push_back(suggestion);
currCount++; currCount++;
} }
@ -610,8 +611,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl; cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
} }
query.setVerbose(m_verbose.load()); query.setQuery("");
query.setQuery("", false);
query.setGeorange(latitude, longitude, distance); query.setGeorange(latitude, longitude, distance);
} else { } else {
// Execute Ft search // Execute Ft search
@ -620,8 +620,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
} }
std::string queryString = removeAccents(patternString); std::string queryString = removeAccents(patternString);
query.setQuery(queryString, false); query.setQuery(queryString);
query.setVerbose(m_verbose.load());
} }
zim::Search search = searcher->search(query); zim::Search search = searcher->search(query);

View File

@ -22,12 +22,37 @@ TEST(Searcher, search) {
ASSERT_EQ(result->get_title(), "Wikibooks"); ASSERT_EQ(result->get_title(), "Wikibooks");
} }
TEST(Searcher, suggestion) {
Reader reader("./test/zimfile.zim");
Searcher searcher;
searcher.add_reader(&reader);
ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle());
std::string query = "ray";
searcher.suggestions(query, true);
searcher.restart_search();
auto result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "Charles, Ray");
ASSERT_EQ(result->get_url(), "A/Charles,_Ray");
ASSERT_EQ(result->get_snippet(), "Charles, <b>Ray</b>");
ASSERT_EQ(result->get_score(), 0);
ASSERT_EQ(result->get_content(), "");
ASSERT_EQ(result->get_size(), 0);
ASSERT_EQ(result->get_wordCount(), 0);
ASSERT_EQ(result->get_zimId(), "");
result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "Ray (film)");
}
TEST(Searcher, incrementalRange) { TEST(Searcher, incrementalRange) {
// Attempt to get 50 results in steps of 5 // Attempt to get 50 results in steps of 5
zim::Archive archive("./test/zimfile.zim"); zim::Archive archive("./test/zimfile.zim");
zim::Searcher ftsearcher(archive); zim::Searcher ftsearcher(archive);
zim::Query query; zim::Query query;
query.setQuery("ray", false); query.setQuery("ray");
auto search = ftsearcher.search(query); auto search = ftsearcher.search(query);
int suggCount = 0; int suggCount = 0;