Update libkiwix with new libzim api

This commit is contained in:
Maneesh P M 2021-08-11 22:43:20 +05:30
parent ba05999cba
commit 8a4080baba
5 changed files with 123 additions and 49 deletions

View File

@ -52,6 +52,7 @@ class Result
};
struct SearcherInternal;
struct SuggestionInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
@ -160,6 +161,7 @@ class Searcher
std::vector<Reader*> readers;
std::unique_ptr<SearcherInternal> internal;
std::unique_ptr<SuggestionInternal> suggestionInternal;
std::string searchPattern;
unsigned int estimatedResultCount;
unsigned int resultStart;

View File

@ -21,6 +21,7 @@
#include <time.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/item.h>
#include <zim/error.h>
@ -377,35 +378,36 @@ bool Reader::searchSuggestionsSmart(const string& prefix,
SuggestionsList_t& results)
{
std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal = false;
/* Try to search in the title using fulltext search database */
auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive);
auto suggestionSearcher = zim::Searcher(*zimArchive);
zim::Query suggestionQuery;
suggestionQuery.setQuery(prefix, true);
auto suggestionSearch = suggestionSearcher.search(suggestionQuery);
if (suggestionSearch.getEstimatedMatches()) {
if (zimArchive->hasTitleIndex()) {
auto suggestionSearch = suggestionSearcher.suggest(prefix);
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
for (auto current = suggestions.begin();
current != suggestions.end();
current++) {
for (auto current : suggestions) {
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
retVal = true;
} else {
// Check some of the variants of the prefix
for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end();
variantsItr++) {
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results)
|| retVal;
auto suggestionSearch = suggestionSearcher.suggest(*variantsItr);
for (auto current : suggestionSearch.getResults(0, suggestionsCount)) {
if (results.size() >= suggestionsCount) {
break;
}
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
}
}
return retVal;
return results.size() > 0;
}
/* Get next suggestion */

View File

@ -23,6 +23,7 @@
#include "reader.h"
#include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp>
#include <cmath>
@ -37,6 +38,7 @@ class _Result : public Result
{
public:
_Result(zim::SearchResultSet::iterator iterator);
_Result(SuggestionItem suggestionItem);
virtual ~_Result(){};
virtual std::string get_url();
@ -50,6 +52,8 @@ class _Result : public Result
private:
zim::SearchResultSet::iterator iterator;
SuggestionItem suggestionItem;
bool isSuggestion;
};
struct SearcherInternal : zim::SearchResultSet {
@ -62,6 +66,14 @@ struct SearcherInternal : zim::SearchResultSet {
zim::SearchResultSet::iterator current_iterator;
};
struct SuggestionInternal : zim::SuggestionResultSet {
explicit SuggestionInternal(const zim::SuggestionResultSet& srs)
: zim::SuggestionResultSet(srs),
currentIterator(srs.begin()) {}
zim::SuggestionResultSet::iterator currentIterator;
};
/* Constructor */
Searcher::Searcher()
: searchPattern(""),
@ -119,9 +131,9 @@ void Searcher::search(const std::string& search,
}
}
zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query;
query.setQuery(unaccentedSearch, false);
query.setVerbose(verbose);
query.setQuery(unaccentedSearch);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
@ -160,9 +172,9 @@ void Searcher::geo_search(float latitude, float longitude, float distance,
archives.push_back(*(*current)->getZimArchive());
}
zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query;
query.setVerbose(verbose);
query.setQuery("", false);
query.setQuery("");
query.setGeorange(latitude, longitude, distance);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
@ -179,11 +191,21 @@ void Searcher::restart_search()
Result* Searcher::getNextResult()
{
if (internal.get() &&
internal->current_iterator != internal->end()) {
if (internal.get() && internal->current_iterator != internal->end()) {
Result* result = new _Result(internal->current_iterator);
internal->current_iterator++;
return result;
} else if (suggestionInternal.get() &&
suggestionInternal->currentIterator != suggestionInternal->end()) {
SuggestionItem item(
suggestionInternal->currentIterator->getTitle(),
normalize(suggestionInternal->currentIterator->getTitle()),
suggestionInternal->currentIterator->getPath(),
suggestionInternal->currentIterator->getSnippet()
);
Result* result = new _Result(item);
suggestionInternal->currentIterator++;
return result;
}
return NULL;
}
@ -209,17 +231,12 @@ void Searcher::suggestions(std::string& searchPattern, const bool verbose)
this->maxResultCount = 10;
string unaccentedSearch = removeAccents(searchPattern);
std::vector<zim::Archive> archives;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
archives.push_back(*(*current)->getZimArchive());
}
zim::Searcher searcher(archives);
zim::Query query;
query.setVerbose(verbose);
query.setQuery(unaccentedSearch, true);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
// Multizim suggestion is not supported as of now! taking only one archive
zim::Archive archive = *(*this->readers.begin())->getZimArchive();
zim::SuggestionSearcher searcher(archive);
searcher.setVerbose(verbose);
zim::SuggestionSearch search = searcher.suggest(searchPattern);
suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
}
@ -235,40 +252,69 @@ zim::SearchResultSet Searcher::getSearchResultSet()
}
_Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator)
{
}
: iterator(iterator),
suggestionItem("", "", ""),
isSuggestion(false)
{}
_Result::_Result(SuggestionItem item)
: iterator(),
suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()),
isSuggestion(true)
{}
std::string _Result::get_url()
{
if (isSuggestion) {
return suggestionItem.getPath();
}
return iterator.getPath();
}
std::string _Result::get_title()
{
if (isSuggestion) {
return suggestionItem.getTitle();
}
return iterator.getTitle();
}
int _Result::get_score()
{
if (isSuggestion) {
return 0;
}
return iterator.getScore();
}
std::string _Result::get_snippet()
{
if (isSuggestion) {
return suggestionItem.getSnippet();
}
return iterator.getSnippet();
}
std::string _Result::get_content()
{
if (isSuggestion) return "";
return iterator->getItem(true).getData();
}
int _Result::get_size()
{
if (isSuggestion) {
return 0;
}
return iterator.getSize();
}
int _Result::get_wordCount()
{
if (isSuggestion) {
return 0;
}
return iterator.getWordCount();
}
std::string _Result::get_zimId()
{
if (isSuggestion) {
return "";
}
std::ostringstream s;
s << iterator.getZimId();
return s.str();

View File

@ -58,6 +58,7 @@ extern "C" {
#include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/entry.h>
#include <zim/item.h>
@ -347,14 +348,12 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
const std::string& queryString, int suggestionCount)
{
SuggestionsList_t suggestions;
auto searcher = zim::SuggestionSearcher(*archive);
if (archive->hasTitleIndex()) {
auto searcher = zim::Searcher(*archive);
zim::Query suggestionQuery;
suggestionQuery.setQuery(queryString, true);
auto suggestionSearch = searcher.search(suggestionQuery);
auto suggestionResult = suggestionSearch.getResults(0, suggestionCount);
auto search = searcher.suggest(queryString);
auto srs = search.getResults(0, suggestionCount);
for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) {
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath(), it.getSnippet());
suggestions.push_back(suggestion);
@ -364,9 +363,11 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive,
std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
for (auto& entry: archive->findByTitle(*it)) {
SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()),
entry.getPath());
auto search = searcher.suggest(queryString);
auto srs = search.getResults(0, suggestionCount);
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath());
suggestions.push_back(suggestion);
currCount++;
}
@ -610,8 +611,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
query.setVerbose(m_verbose.load());
query.setQuery("", false);
query.setQuery("");
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
@ -620,8 +620,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}
std::string queryString = removeAccents(patternString);
query.setQuery(queryString, false);
query.setVerbose(m_verbose.load());
query.setQuery(queryString);
}
zim::Search search = searcher->search(query);

View File

@ -22,12 +22,37 @@ TEST(Searcher, search) {
ASSERT_EQ(result->get_title(), "Wikibooks");
}
TEST(Searcher, suggestion) {
Reader reader("./test/zimfile.zim");
Searcher searcher;
searcher.add_reader(&reader);
ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle());
std::string query = "ray";
searcher.suggestions(query, true);
searcher.restart_search();
auto result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "Charles, Ray");
ASSERT_EQ(result->get_url(), "A/Charles,_Ray");
ASSERT_EQ(result->get_snippet(), "Charles, <b>Ray</b>");
ASSERT_EQ(result->get_score(), 0);
ASSERT_EQ(result->get_content(), "");
ASSERT_EQ(result->get_size(), 0);
ASSERT_EQ(result->get_wordCount(), 0);
ASSERT_EQ(result->get_zimId(), "");
result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "Ray (film)");
}
TEST(Searcher, incrementalRange) {
// Attempt to get 50 results in steps of 5
zim::Archive archive("./test/zimfile.zim");
zim::Searcher ftsearcher(archive);
zim::Query query;
query.setQuery("ray", false);
query.setQuery("ray");
auto search = ftsearcher.search(query);
int suggCount = 0;