mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #567 from kiwix/suggestion_api_fix
This commit is contained in:
commit
e9eaadde9e
|
@ -52,6 +52,7 @@ class Result
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SearcherInternal;
|
struct SearcherInternal;
|
||||||
|
struct SuggestionInternal;
|
||||||
/**
|
/**
|
||||||
* The Searcher class is reponsible to do different kind of search using the
|
* The Searcher class is reponsible to do different kind of search using the
|
||||||
* fulltext index.
|
* fulltext index.
|
||||||
|
@ -160,6 +161,7 @@ class Searcher
|
||||||
|
|
||||||
std::vector<Reader*> readers;
|
std::vector<Reader*> readers;
|
||||||
std::unique_ptr<SearcherInternal> internal;
|
std::unique_ptr<SearcherInternal> internal;
|
||||||
|
std::unique_ptr<SuggestionInternal> suggestionInternal;
|
||||||
std::string searchPattern;
|
std::string searchPattern;
|
||||||
unsigned int estimatedResultCount;
|
unsigned int estimatedResultCount;
|
||||||
unsigned int resultStart;
|
unsigned int resultStart;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
#include <zim/search.h>
|
#include <zim/search.h>
|
||||||
|
#include <zim/suggestion.h>
|
||||||
#include <zim/item.h>
|
#include <zim/item.h>
|
||||||
#include <zim/error.h>
|
#include <zim/error.h>
|
||||||
|
|
||||||
|
@ -377,35 +378,36 @@ bool Reader::searchSuggestionsSmart(const string& prefix,
|
||||||
SuggestionsList_t& results)
|
SuggestionsList_t& results)
|
||||||
{
|
{
|
||||||
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
||||||
bool retVal = false;
|
|
||||||
|
|
||||||
/* Try to search in the title using fulltext search database */
|
auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive);
|
||||||
|
|
||||||
auto suggestionSearcher = zim::Searcher(*zimArchive);
|
if (zimArchive->hasTitleIndex()) {
|
||||||
zim::Query suggestionQuery;
|
auto suggestionSearch = suggestionSearcher.suggest(prefix);
|
||||||
suggestionQuery.setQuery(prefix, true);
|
|
||||||
auto suggestionSearch = suggestionSearcher.search(suggestionQuery);
|
|
||||||
|
|
||||||
if (suggestionSearch.getEstimatedMatches()) {
|
|
||||||
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
|
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
|
||||||
for (auto current = suggestions.begin();
|
for (auto current : suggestions) {
|
||||||
current != suggestions.end();
|
|
||||||
current++) {
|
|
||||||
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
|
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
|
||||||
current.getPath(), current.getSnippet());
|
current.getPath(), current.getSnippet());
|
||||||
results.push_back(suggestion);
|
results.push_back(suggestion);
|
||||||
}
|
}
|
||||||
retVal = true;
|
|
||||||
} else {
|
} else {
|
||||||
|
// Check some of the variants of the prefix
|
||||||
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
||||||
variantsItr != variants.end();
|
variantsItr != variants.end();
|
||||||
variantsItr++) {
|
variantsItr++) {
|
||||||
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
|
auto suggestionSearch = suggestionSearcher.suggest(*variantsItr);
|
||||||
|| retVal;
|
for (auto current : suggestionSearch.getResults(0, suggestionsCount)) {
|
||||||
|
if (results.size() >= suggestionsCount) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
|
||||||
|
current.getPath(), current.getSnippet());
|
||||||
|
results.push_back(suggestion);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return retVal;
|
return results.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get next suggestion */
|
/* Get next suggestion */
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "reader.h"
|
#include "reader.h"
|
||||||
|
|
||||||
#include <zim/search.h>
|
#include <zim/search.h>
|
||||||
|
#include <zim/suggestion.h>
|
||||||
|
|
||||||
#include <mustache.hpp>
|
#include <mustache.hpp>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
@ -37,6 +38,7 @@ class _Result : public Result
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
_Result(zim::SearchResultSet::iterator iterator);
|
_Result(zim::SearchResultSet::iterator iterator);
|
||||||
|
_Result(SuggestionItem suggestionItem);
|
||||||
virtual ~_Result(){};
|
virtual ~_Result(){};
|
||||||
|
|
||||||
virtual std::string get_url();
|
virtual std::string get_url();
|
||||||
|
@ -50,6 +52,8 @@ class _Result : public Result
|
||||||
|
|
||||||
private:
|
private:
|
||||||
zim::SearchResultSet::iterator iterator;
|
zim::SearchResultSet::iterator iterator;
|
||||||
|
SuggestionItem suggestionItem;
|
||||||
|
bool isSuggestion;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SearcherInternal : zim::SearchResultSet {
|
struct SearcherInternal : zim::SearchResultSet {
|
||||||
|
@ -62,6 +66,14 @@ struct SearcherInternal : zim::SearchResultSet {
|
||||||
zim::SearchResultSet::iterator current_iterator;
|
zim::SearchResultSet::iterator current_iterator;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SuggestionInternal : zim::SuggestionResultSet {
|
||||||
|
explicit SuggestionInternal(const zim::SuggestionResultSet& srs)
|
||||||
|
: zim::SuggestionResultSet(srs),
|
||||||
|
currentIterator(srs.begin()) {}
|
||||||
|
|
||||||
|
zim::SuggestionResultSet::iterator currentIterator;
|
||||||
|
};
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
Searcher::Searcher()
|
Searcher::Searcher()
|
||||||
: searchPattern(""),
|
: searchPattern(""),
|
||||||
|
@ -119,9 +131,9 @@ void Searcher::search(const std::string& search,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
zim::Searcher searcher(archives);
|
zim::Searcher searcher(archives);
|
||||||
|
searcher.setVerbose(verbose);
|
||||||
zim::Query query;
|
zim::Query query;
|
||||||
query.setQuery(unaccentedSearch, false);
|
query.setQuery(unaccentedSearch);
|
||||||
query.setVerbose(verbose);
|
|
||||||
zim::Search search = searcher.search(query);
|
zim::Search search = searcher.search(query);
|
||||||
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
||||||
this->estimatedResultCount = search.getEstimatedMatches();
|
this->estimatedResultCount = search.getEstimatedMatches();
|
||||||
|
@ -160,9 +172,9 @@ void Searcher::geo_search(float latitude, float longitude, float distance,
|
||||||
archives.push_back(*(*current)->getZimArchive());
|
archives.push_back(*(*current)->getZimArchive());
|
||||||
}
|
}
|
||||||
zim::Searcher searcher(archives);
|
zim::Searcher searcher(archives);
|
||||||
|
searcher.setVerbose(verbose);
|
||||||
zim::Query query;
|
zim::Query query;
|
||||||
query.setVerbose(verbose);
|
query.setQuery("");
|
||||||
query.setQuery("", false);
|
|
||||||
query.setGeorange(latitude, longitude, distance);
|
query.setGeorange(latitude, longitude, distance);
|
||||||
zim::Search search = searcher.search(query);
|
zim::Search search = searcher.search(query);
|
||||||
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
||||||
|
@ -179,11 +191,21 @@ void Searcher::restart_search()
|
||||||
|
|
||||||
Result* Searcher::getNextResult()
|
Result* Searcher::getNextResult()
|
||||||
{
|
{
|
||||||
if (internal.get() &&
|
if (internal.get() && internal->current_iterator != internal->end()) {
|
||||||
internal->current_iterator != internal->end()) {
|
|
||||||
Result* result = new _Result(internal->current_iterator);
|
Result* result = new _Result(internal->current_iterator);
|
||||||
internal->current_iterator++;
|
internal->current_iterator++;
|
||||||
return result;
|
return result;
|
||||||
|
} else if (suggestionInternal.get() &&
|
||||||
|
suggestionInternal->currentIterator != suggestionInternal->end()) {
|
||||||
|
SuggestionItem item(
|
||||||
|
suggestionInternal->currentIterator->getTitle(),
|
||||||
|
normalize(suggestionInternal->currentIterator->getTitle()),
|
||||||
|
suggestionInternal->currentIterator->getPath(),
|
||||||
|
suggestionInternal->currentIterator->getSnippet()
|
||||||
|
);
|
||||||
|
Result* result = new _Result(item);
|
||||||
|
suggestionInternal->currentIterator++;
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -209,17 +231,12 @@ void Searcher::suggestions(std::string& searchPattern, const bool verbose)
|
||||||
this->maxResultCount = 10;
|
this->maxResultCount = 10;
|
||||||
string unaccentedSearch = removeAccents(searchPattern);
|
string unaccentedSearch = removeAccents(searchPattern);
|
||||||
|
|
||||||
std::vector<zim::Archive> archives;
|
// Multizim suggestion is not supported as of now! taking only one archive
|
||||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
zim::Archive archive = *(*this->readers.begin())->getZimArchive();
|
||||||
current++) {
|
zim::SuggestionSearcher searcher(archive);
|
||||||
archives.push_back(*(*current)->getZimArchive());
|
searcher.setVerbose(verbose);
|
||||||
}
|
zim::SuggestionSearch search = searcher.suggest(searchPattern);
|
||||||
zim::Searcher searcher(archives);
|
suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount)));
|
||||||
zim::Query query;
|
|
||||||
query.setVerbose(verbose);
|
|
||||||
query.setQuery(unaccentedSearch, true);
|
|
||||||
zim::Search search = searcher.search(query);
|
|
||||||
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
|
||||||
this->estimatedResultCount = search.getEstimatedMatches();
|
this->estimatedResultCount = search.getEstimatedMatches();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,40 +252,69 @@ zim::SearchResultSet Searcher::getSearchResultSet()
|
||||||
}
|
}
|
||||||
|
|
||||||
_Result::_Result(zim::SearchResultSet::iterator iterator)
|
_Result::_Result(zim::SearchResultSet::iterator iterator)
|
||||||
: iterator(iterator)
|
: iterator(iterator),
|
||||||
{
|
suggestionItem("", "", ""),
|
||||||
}
|
isSuggestion(false)
|
||||||
|
{}
|
||||||
|
|
||||||
|
_Result::_Result(SuggestionItem item)
|
||||||
|
: iterator(),
|
||||||
|
suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()),
|
||||||
|
isSuggestion(true)
|
||||||
|
{}
|
||||||
|
|
||||||
std::string _Result::get_url()
|
std::string _Result::get_url()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return suggestionItem.getPath();
|
||||||
|
}
|
||||||
return iterator.getPath();
|
return iterator.getPath();
|
||||||
}
|
}
|
||||||
std::string _Result::get_title()
|
std::string _Result::get_title()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return suggestionItem.getTitle();
|
||||||
|
}
|
||||||
return iterator.getTitle();
|
return iterator.getTitle();
|
||||||
}
|
}
|
||||||
int _Result::get_score()
|
int _Result::get_score()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
return iterator.getScore();
|
return iterator.getScore();
|
||||||
}
|
}
|
||||||
std::string _Result::get_snippet()
|
std::string _Result::get_snippet()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return suggestionItem.getSnippet();
|
||||||
|
}
|
||||||
return iterator.getSnippet();
|
return iterator.getSnippet();
|
||||||
}
|
}
|
||||||
std::string _Result::get_content()
|
std::string _Result::get_content()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) return "";
|
||||||
return iterator->getItem(true).getData();
|
return iterator->getItem(true).getData();
|
||||||
}
|
}
|
||||||
int _Result::get_size()
|
int _Result::get_size()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
return iterator.getSize();
|
return iterator.getSize();
|
||||||
}
|
}
|
||||||
int _Result::get_wordCount()
|
int _Result::get_wordCount()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
return iterator.getWordCount();
|
return iterator.getWordCount();
|
||||||
}
|
}
|
||||||
std::string _Result::get_zimId()
|
std::string _Result::get_zimId()
|
||||||
{
|
{
|
||||||
|
if (isSuggestion) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
s << iterator.getZimId();
|
s << iterator.getZimId();
|
||||||
return s.str();
|
return s.str();
|
||||||
|
|
|
@ -58,6 +58,7 @@ extern "C" {
|
||||||
#include <zim/uuid.h>
|
#include <zim/uuid.h>
|
||||||
#include <zim/error.h>
|
#include <zim/error.h>
|
||||||
#include <zim/search.h>
|
#include <zim/search.h>
|
||||||
|
#include <zim/suggestion.h>
|
||||||
#include <zim/entry.h>
|
#include <zim/entry.h>
|
||||||
#include <zim/item.h>
|
#include <zim/item.h>
|
||||||
|
|
||||||
|
@ -347,14 +348,12 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
|
||||||
const std::string& queryString, int suggestionCount)
|
const std::string& queryString, int suggestionCount)
|
||||||
{
|
{
|
||||||
SuggestionsList_t suggestions;
|
SuggestionsList_t suggestions;
|
||||||
|
auto searcher = zim::SuggestionSearcher(*archive);
|
||||||
if (archive->hasTitleIndex()) {
|
if (archive->hasTitleIndex()) {
|
||||||
auto searcher = zim::Searcher(*archive);
|
auto search = searcher.suggest(queryString);
|
||||||
zim::Query suggestionQuery;
|
auto srs = search.getResults(0, suggestionCount);
|
||||||
suggestionQuery.setQuery(queryString, true);
|
|
||||||
auto suggestionSearch = searcher.search(suggestionQuery);
|
|
||||||
auto suggestionResult = suggestionSearch.getResults(0, suggestionCount);
|
|
||||||
|
|
||||||
for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) {
|
for (auto it : srs) {
|
||||||
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
|
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
|
||||||
it.getPath(), it.getSnippet());
|
it.getPath(), it.getSnippet());
|
||||||
suggestions.push_back(suggestion);
|
suggestions.push_back(suggestion);
|
||||||
|
@ -364,9 +363,11 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
|
||||||
std::vector<std::string> variants = getTitleVariants(queryString);
|
std::vector<std::string> variants = getTitleVariants(queryString);
|
||||||
int currCount = 0;
|
int currCount = 0;
|
||||||
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
|
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
|
||||||
for (auto& entry: archive->findByTitle(*it)) {
|
auto search = searcher.suggest(queryString);
|
||||||
SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()),
|
auto srs = search.getResults(0, suggestionCount);
|
||||||
entry.getPath());
|
for (auto it : srs) {
|
||||||
|
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
|
||||||
|
it.getPath());
|
||||||
suggestions.push_back(suggestion);
|
suggestions.push_back(suggestion);
|
||||||
currCount++;
|
currCount++;
|
||||||
}
|
}
|
||||||
|
@ -610,8 +611,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
|
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
query.setVerbose(m_verbose.load());
|
query.setQuery("");
|
||||||
query.setQuery("", false);
|
|
||||||
query.setGeorange(latitude, longitude, distance);
|
query.setGeorange(latitude, longitude, distance);
|
||||||
} else {
|
} else {
|
||||||
// Execute Ft search
|
// Execute Ft search
|
||||||
|
@ -620,8 +620,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string queryString = removeAccents(patternString);
|
std::string queryString = removeAccents(patternString);
|
||||||
query.setQuery(queryString, false);
|
query.setQuery(queryString);
|
||||||
query.setVerbose(m_verbose.load());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
zim::Search search = searcher->search(query);
|
zim::Search search = searcher->search(query);
|
||||||
|
|
|
@ -22,12 +22,37 @@ TEST(Searcher, search) {
|
||||||
ASSERT_EQ(result->get_title(), "Wikibooks");
|
ASSERT_EQ(result->get_title(), "Wikibooks");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Searcher, suggestion) {
|
||||||
|
Reader reader("./test/zimfile.zim");
|
||||||
|
|
||||||
|
Searcher searcher;
|
||||||
|
searcher.add_reader(&reader);
|
||||||
|
ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle());
|
||||||
|
|
||||||
|
std::string query = "ray";
|
||||||
|
searcher.suggestions(query, true);
|
||||||
|
searcher.restart_search();
|
||||||
|
|
||||||
|
auto result = searcher.getNextResult();
|
||||||
|
ASSERT_EQ(result->get_title(), "Charles, Ray");
|
||||||
|
ASSERT_EQ(result->get_url(), "A/Charles,_Ray");
|
||||||
|
ASSERT_EQ(result->get_snippet(), "Charles, <b>Ray</b>");
|
||||||
|
ASSERT_EQ(result->get_score(), 0);
|
||||||
|
ASSERT_EQ(result->get_content(), "");
|
||||||
|
ASSERT_EQ(result->get_size(), 0);
|
||||||
|
ASSERT_EQ(result->get_wordCount(), 0);
|
||||||
|
ASSERT_EQ(result->get_zimId(), "");
|
||||||
|
|
||||||
|
result = searcher.getNextResult();
|
||||||
|
ASSERT_EQ(result->get_title(), "Ray (film)");
|
||||||
|
}
|
||||||
|
|
||||||
TEST(Searcher, incrementalRange) {
|
TEST(Searcher, incrementalRange) {
|
||||||
// Attempt to get 50 results in steps of 5
|
// Attempt to get 50 results in steps of 5
|
||||||
zim::Archive archive("./test/zimfile.zim");
|
zim::Archive archive("./test/zimfile.zim");
|
||||||
zim::Searcher ftsearcher(archive);
|
zim::Searcher ftsearcher(archive);
|
||||||
zim::Query query;
|
zim::Query query;
|
||||||
query.setQuery("ray", false);
|
query.setQuery("ray");
|
||||||
auto search = ftsearcher.search(query);
|
auto search = ftsearcher.search(query);
|
||||||
|
|
||||||
int suggCount = 0;
|
int suggCount = 0;
|
||||||
|
|
Loading…
Reference in New Issue