Retrieve Searcher and Search from LRU Cache

We use the new cache template to implement two kind of cache.
1: The Searcher cache is more general in terms of its usage. A Searcher
   can be used for multiple searches without much change to itself. We
   try to retrieve the searcher and perform searches using it whenever
   possible, and if not we put a searcher into the cache. User can
   specify a custom cache length by manipulating the environment
   variable SEARCHER_CACHE_SIZE. It's default value is 10% of all the
   books available.
2: The search cache is much more restricted in terms of usage. It's main
   purpose is to avoid re-searching on the searcher during page changes
   to generate SearchResultSet of various ranges. User can specify a
   custom cache length using the environment variable SEARCH_CACHE_SIZE
   with a default value of 2;
This commit is contained in:
Maneesh P M 2021-10-12 13:45:18 +05:30 committed by Matthieu Gautier
parent a51f8d66a7
commit 7cb4c1361f
2 changed files with 38 additions and 10 deletions

View File

@ -58,8 +58,6 @@ extern "C" {
#include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/entry.h>
#include <zim/item.h>
@ -80,6 +78,7 @@ extern "C" {
#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
#define DEFAULT_CACHE_SIZE 2
namespace kiwix {
@ -96,6 +95,18 @@ inline std::string normalizeRootUrl(std::string rootUrl)
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
}
// Returns the value of env var `name` if found, otherwise returns defaultVal
unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
try {
const char* envString = std::getenv(name);
if (envString == nullptr) {
throw std::runtime_error("Environment variable not set");
}
return extractFromString<unsigned int>(envString);
} catch (...) {}
return defaultVal;
}
} // unnamed namespace
static IdNameMapper defaultNameMapper;
@ -134,7 +145,9 @@ InternalServer::InternalServer(Library* library,
m_ipConnectionLimit(ipConnectionLimit),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper),
searcherCache(getCacheLength("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))),
searchCache(getCacheLength("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE))
{}
bool InternalServer::start() {
@ -488,11 +501,11 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}
std::string bookName;
std::string bookName, bookId;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
const std::string bookId = mp_nameMapper->getIdForName(bookName);
bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {}
@ -509,7 +522,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = std::make_shared<zim::Searcher>(*archive);
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentArchive = mp_library->getArchiveById(bookId);
@ -540,6 +553,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}
/* Get the results */
std::string queryString;
try {
zim::Query query;
if (patternString.empty()) {
@ -549,6 +563,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}
query.setQuery("");
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
@ -556,13 +571,16 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
cout << "Performing query `" << patternString << "'" << endl;
}
std::string queryString = removeAccents(patternString);
queryString = "FT:" + removeAccents(patternString);
query.setQuery(queryString);
}
queryString = bookId + queryString;
zim::Search search = searcher->search(query);
SearchRenderer renderer(search.getResults(start, pageLength), mp_nameMapper, mp_library, start,
search.getEstimatedMatches());
std::shared_ptr<zim::Search> search;
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
search->getEstimatedMatches());
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");

View File

@ -28,6 +28,9 @@ extern "C" {
#include "library.h"
#include "name_mapper.h"
#include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp>
#include <atomic>
@ -36,9 +39,13 @@ extern "C" {
#include "server/request_context.h"
#include "server/response.h"
#include "tools/concurrent_cache.h"
namespace kiwix {
typedef kainjow::mustache::data MustacheData;
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;
class Entry;
class OPDSDumper;
@ -115,6 +122,9 @@ class InternalServer {
Library* mp_library;
NameMapper* mp_nameMapper;
SearcherCache searcherCache;
SearchCache searchCache;
std::string m_server_id;
std::string m_library_id;