From 8a4080baba02f3947d20052b9bf0d50cb194710d Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Wed, 11 Aug 2021 22:43:20 +0530 Subject: [PATCH] Update libkiwix with new libzim api --- include/searcher.h | 2 + src/reader.cpp | 32 +++++++------ src/searcher.cpp | 86 +++++++++++++++++++++++++++-------- src/server/internalServer.cpp | 25 +++++----- test/searcher.cpp | 27 ++++++++++- 5 files changed, 123 insertions(+), 49 deletions(-) diff --git a/include/searcher.h b/include/searcher.h index b4aa4fbf5..e677cc98e 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -52,6 +52,7 @@ class Result }; struct SearcherInternal; +struct SuggestionInternal; /** * The Searcher class is reponsible to do different kind of search using the * fulltext index. @@ -160,6 +161,7 @@ class Searcher std::vector readers; std::unique_ptr internal; + std::unique_ptr suggestionInternal; std::string searchPattern; unsigned int estimatedResultCount; unsigned int resultStart; diff --git a/src/reader.cpp b/src/reader.cpp index 2bbe58189..6af7de6f7 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -377,35 +378,36 @@ bool Reader::searchSuggestionsSmart(const string& prefix, SuggestionsList_t& results) { std::vector variants = this->getTitleVariants(prefix); - bool retVal = false; - /* Try to search in the title using fulltext search database */ + auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive); - auto suggestionSearcher = zim::Searcher(*zimArchive); - zim::Query suggestionQuery; - suggestionQuery.setQuery(prefix, true); - auto suggestionSearch = suggestionSearcher.search(suggestionQuery); - - if (suggestionSearch.getEstimatedMatches()) { + if (zimArchive->hasTitleIndex()) { + auto suggestionSearch = suggestionSearcher.suggest(prefix); const auto suggestions = suggestionSearch.getResults(0, suggestionsCount); - for (auto current = suggestions.begin(); - current != suggestions.end(); - current++) { + for (auto current : suggestions) { SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()), current.getPath(), current.getSnippet()); results.push_back(suggestion); } - retVal = true; } else { + // Check some of the variants of the prefix for (std::vector::iterator variantsItr = variants.begin(); variantsItr != variants.end(); variantsItr++) { - retVal = this->searchSuggestions(*variantsItr, suggestionsCount, results) - || retVal; + auto suggestionSearch = suggestionSearcher.suggest(*variantsItr); + for (auto current : suggestionSearch.getResults(0, suggestionsCount)) { + if (results.size() >= suggestionsCount) { + break; + } + + SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()), + current.getPath(), current.getSnippet()); + results.push_back(suggestion); + } } } - return retVal; + return results.size() > 0; } /* Get next suggestion */ diff --git a/src/searcher.cpp b/src/searcher.cpp index 1d0b492c6..8fa77c391 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -23,6 +23,7 @@ #include "reader.h" #include +#include #include #include @@ -37,6 +38,7 @@ class _Result : public Result { public: _Result(zim::SearchResultSet::iterator iterator); + _Result(SuggestionItem suggestionItem); virtual ~_Result(){}; virtual std::string get_url(); @@ -50,6 +52,8 @@ class _Result : public Result private: zim::SearchResultSet::iterator iterator; + SuggestionItem suggestionItem; + bool isSuggestion; }; struct SearcherInternal : zim::SearchResultSet { @@ -62,6 +66,14 @@ struct SearcherInternal : zim::SearchResultSet { zim::SearchResultSet::iterator current_iterator; }; +struct SuggestionInternal : zim::SuggestionResultSet { + explicit SuggestionInternal(const zim::SuggestionResultSet& srs) + : zim::SuggestionResultSet(srs), + currentIterator(srs.begin()) {} + + zim::SuggestionResultSet::iterator currentIterator; +}; + /* Constructor */ Searcher::Searcher() : searchPattern(""), @@ -119,9 +131,9 @@ void Searcher::search(const std::string& search, } } zim::Searcher searcher(archives); + searcher.setVerbose(verbose); zim::Query query; - query.setQuery(unaccentedSearch, false); - query.setVerbose(verbose); + query.setQuery(unaccentedSearch); zim::Search search = searcher.search(query); internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount))); this->estimatedResultCount = search.getEstimatedMatches(); @@ -160,9 +172,9 @@ void Searcher::geo_search(float latitude, float longitude, float distance, archives.push_back(*(*current)->getZimArchive()); } zim::Searcher searcher(archives); + searcher.setVerbose(verbose); zim::Query query; - query.setVerbose(verbose); - query.setQuery("", false); + query.setQuery(""); query.setGeorange(latitude, longitude, distance); zim::Search search = searcher.search(query); internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount))); @@ -179,11 +191,21 @@ void Searcher::restart_search() Result* Searcher::getNextResult() { - if (internal.get() && - internal->current_iterator != internal->end()) { + if (internal.get() && internal->current_iterator != internal->end()) { Result* result = new _Result(internal->current_iterator); internal->current_iterator++; return result; + } else if (suggestionInternal.get() && + suggestionInternal->currentIterator != suggestionInternal->end()) { + SuggestionItem item( + suggestionInternal->currentIterator->getTitle(), + normalize(suggestionInternal->currentIterator->getTitle()), + suggestionInternal->currentIterator->getPath(), + suggestionInternal->currentIterator->getSnippet() + ); + Result* result = new _Result(item); + suggestionInternal->currentIterator++; + return result; } return NULL; } @@ -209,17 +231,12 @@ void Searcher::suggestions(std::string& searchPattern, const bool verbose) this->maxResultCount = 10; string unaccentedSearch = removeAccents(searchPattern); - std::vector archives; - for (auto current = this->readers.begin(); current != this->readers.end(); - current++) { - archives.push_back(*(*current)->getZimArchive()); - } - zim::Searcher searcher(archives); - zim::Query query; - query.setVerbose(verbose); - query.setQuery(unaccentedSearch, true); - zim::Search search = searcher.search(query); - internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount))); + // Multizim suggestion is not supported as of now! taking only one archive + zim::Archive archive = *(*this->readers.begin())->getZimArchive(); + zim::SuggestionSearcher searcher(archive); + searcher.setVerbose(verbose); + zim::SuggestionSearch search = searcher.suggest(searchPattern); + suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount))); this->estimatedResultCount = search.getEstimatedMatches(); } @@ -235,40 +252,69 @@ zim::SearchResultSet Searcher::getSearchResultSet() } _Result::_Result(zim::SearchResultSet::iterator iterator) - : iterator(iterator) -{ -} + : iterator(iterator), + suggestionItem("", "", ""), + isSuggestion(false) +{} + +_Result::_Result(SuggestionItem item) + : iterator(), + suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()), + isSuggestion(true) +{} std::string _Result::get_url() { + if (isSuggestion) { + return suggestionItem.getPath(); + } return iterator.getPath(); } std::string _Result::get_title() { + if (isSuggestion) { + return suggestionItem.getTitle(); + } return iterator.getTitle(); } int _Result::get_score() { + if (isSuggestion) { + return 0; + } return iterator.getScore(); } std::string _Result::get_snippet() { + if (isSuggestion) { + return suggestionItem.getSnippet(); + } return iterator.getSnippet(); } std::string _Result::get_content() { + if (isSuggestion) return ""; return iterator->getItem(true).getData(); } int _Result::get_size() { + if (isSuggestion) { + return 0; + } return iterator.getSize(); } int _Result::get_wordCount() { + if (isSuggestion) { + return 0; + } return iterator.getWordCount(); } std::string _Result::get_zimId() { + if (isSuggestion) { + return ""; + } std::ostringstream s; s << iterator.getZimId(); return s.str(); diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index cd708a1f9..6cf8b01e3 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -58,6 +58,7 @@ extern "C" { #include #include #include +#include #include #include @@ -347,14 +348,12 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, const std::string& queryString, int suggestionCount) { SuggestionsList_t suggestions; + auto searcher = zim::SuggestionSearcher(*archive); if (archive->hasTitleIndex()) { - auto searcher = zim::Searcher(*archive); - zim::Query suggestionQuery; - suggestionQuery.setQuery(queryString, true); - auto suggestionSearch = searcher.search(suggestionQuery); - auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); + auto search = searcher.suggest(queryString); + auto srs = search.getResults(0, suggestionCount); - for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { + for (auto it : srs) { SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()), it.getPath(), it.getSnippet()); suggestions.push_back(suggestion); @@ -364,9 +363,11 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, std::vector variants = getTitleVariants(queryString); int currCount = 0; for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { - for (auto& entry: archive->findByTitle(*it)) { - SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()), - entry.getPath()); + auto search = searcher.suggest(queryString); + auto srs = search.getResults(0, suggestionCount); + for (auto it : srs) { + SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()), + it.getPath()); suggestions.push_back(suggestion); currCount++; } @@ -610,8 +611,7 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl; } - query.setVerbose(m_verbose.load()); - query.setQuery("", false); + query.setQuery(""); query.setGeorange(latitude, longitude, distance); } else { // Execute Ft search @@ -620,8 +620,7 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } std::string queryString = removeAccents(patternString); - query.setQuery(queryString, false); - query.setVerbose(m_verbose.load()); + query.setQuery(queryString); } zim::Search search = searcher->search(query); diff --git a/test/searcher.cpp b/test/searcher.cpp index d89523b62..a3569d959 100644 --- a/test/searcher.cpp +++ b/test/searcher.cpp @@ -22,12 +22,37 @@ TEST(Searcher, search) { ASSERT_EQ(result->get_title(), "Wikibooks"); } +TEST(Searcher, suggestion) { + Reader reader("./test/zimfile.zim"); + + Searcher searcher; + searcher.add_reader(&reader); + ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle()); + + std::string query = "ray"; + searcher.suggestions(query, true); + searcher.restart_search(); + + auto result = searcher.getNextResult(); + ASSERT_EQ(result->get_title(), "Charles, Ray"); + ASSERT_EQ(result->get_url(), "A/Charles,_Ray"); + ASSERT_EQ(result->get_snippet(), "Charles, Ray"); + ASSERT_EQ(result->get_score(), 0); + ASSERT_EQ(result->get_content(), ""); + ASSERT_EQ(result->get_size(), 0); + ASSERT_EQ(result->get_wordCount(), 0); + ASSERT_EQ(result->get_zimId(), ""); + + result = searcher.getNextResult(); + ASSERT_EQ(result->get_title(), "Ray (film)"); +} + TEST(Searcher, incrementalRange) { // Attempt to get 50 results in steps of 5 zim::Archive archive("./test/zimfile.zim"); zim::Searcher ftsearcher(archive); zim::Query query; - query.setQuery("ray", false); + query.setQuery("ray"); auto search = ftsearcher.search(query); int suggCount = 0;