mirror of https://github.com/kiwix/libkiwix.git
Retrieve Searcher and Search from LRU Cache
We use the new cache template to implement two kind of cache. 1: The Searcher cache is more general in terms of its usage. A Searcher can be used for multiple searches without much change to itself. We try to retrieve the searcher and perform searches using it whenever possible, and if not we put a searcher into the cache. User can specify a custom cache length by manipulating the environment variable SEARCHER_CACHE_SIZE. It's default value is 10% of all the books available. 2: The search cache is much more restricted in terms of usage. It's main purpose is to avoid re-searching on the searcher during page changes to generate SearchResultSet of various ranges. User can specify a custom cache length using the environment variable SEARCH_CACHE_SIZE with a default value of 2;
This commit is contained in:
parent
a51f8d66a7
commit
7cb4c1361f
|
@ -58,8 +58,6 @@ extern "C" {
|
|||
|
||||
#include <zim/uuid.h>
|
||||
#include <zim/error.h>
|
||||
#include <zim/search.h>
|
||||
#include <zim/suggestion.h>
|
||||
#include <zim/entry.h>
|
||||
#include <zim/item.h>
|
||||
|
||||
|
@ -80,6 +78,7 @@ extern "C" {
|
|||
|
||||
#define MAX_SEARCH_LEN 140
|
||||
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
||||
#define DEFAULT_CACHE_SIZE 2
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
|
@ -96,6 +95,18 @@ inline std::string normalizeRootUrl(std::string rootUrl)
|
|||
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
|
||||
}
|
||||
|
||||
// Returns the value of env var `name` if found, otherwise returns defaultVal
|
||||
unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
|
||||
try {
|
||||
const char* envString = std::getenv(name);
|
||||
if (envString == nullptr) {
|
||||
throw std::runtime_error("Environment variable not set");
|
||||
}
|
||||
return extractFromString<unsigned int>(envString);
|
||||
} catch (...) {}
|
||||
|
||||
return defaultVal;
|
||||
}
|
||||
} // unnamed namespace
|
||||
|
||||
static IdNameMapper defaultNameMapper;
|
||||
|
@ -134,7 +145,9 @@ InternalServer::InternalServer(Library* library,
|
|||
m_ipConnectionLimit(ipConnectionLimit),
|
||||
mp_daemon(nullptr),
|
||||
mp_library(library),
|
||||
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
|
||||
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper),
|
||||
searcherCache(getCacheLength("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))),
|
||||
searchCache(getCacheLength("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE))
|
||||
{}
|
||||
|
||||
bool InternalServer::start() {
|
||||
|
@ -488,11 +501,11 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
} catch(const std::out_of_range&) {}
|
||||
catch(const std::invalid_argument&) {}
|
||||
|
||||
std::string bookName;
|
||||
std::string bookName, bookId;
|
||||
std::shared_ptr<zim::Archive> archive;
|
||||
try {
|
||||
bookName = request.get_argument("content");
|
||||
const std::string bookId = mp_nameMapper->getIdForName(bookName);
|
||||
bookId = mp_nameMapper->getIdForName(bookName);
|
||||
archive = mp_library->getArchiveById(bookId);
|
||||
} catch (const std::out_of_range&) {}
|
||||
|
||||
|
@ -509,7 +522,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
|
||||
std::shared_ptr<zim::Searcher> searcher;
|
||||
if (archive) {
|
||||
searcher = std::make_shared<zim::Searcher>(*archive);
|
||||
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
|
||||
} else {
|
||||
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
|
||||
auto currentArchive = mp_library->getArchiveById(bookId);
|
||||
|
@ -540,6 +553,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
}
|
||||
|
||||
/* Get the results */
|
||||
std::string queryString;
|
||||
try {
|
||||
zim::Query query;
|
||||
if (patternString.empty()) {
|
||||
|
@ -549,6 +563,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
}
|
||||
|
||||
query.setQuery("");
|
||||
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
|
||||
query.setGeorange(latitude, longitude, distance);
|
||||
} else {
|
||||
// Execute Ft search
|
||||
|
@ -556,13 +571,16 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
cout << "Performing query `" << patternString << "'" << endl;
|
||||
}
|
||||
|
||||
std::string queryString = removeAccents(patternString);
|
||||
queryString = "FT:" + removeAccents(patternString);
|
||||
query.setQuery(queryString);
|
||||
}
|
||||
queryString = bookId + queryString;
|
||||
|
||||
zim::Search search = searcher->search(query);
|
||||
SearchRenderer renderer(search.getResults(start, pageLength), mp_nameMapper, mp_library, start,
|
||||
search.getEstimatedMatches());
|
||||
std::shared_ptr<zim::Search> search;
|
||||
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});
|
||||
|
||||
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
|
||||
search->getEstimatedMatches());
|
||||
renderer.setSearchPattern(patternString);
|
||||
renderer.setSearchContent(bookName);
|
||||
renderer.setProtocolPrefix(m_root + "/");
|
||||
|
|
|
@ -28,6 +28,9 @@ extern "C" {
|
|||
#include "library.h"
|
||||
#include "name_mapper.h"
|
||||
|
||||
#include <zim/search.h>
|
||||
#include <zim/suggestion.h>
|
||||
|
||||
#include <mustache.hpp>
|
||||
|
||||
#include <atomic>
|
||||
|
@ -36,9 +39,13 @@ extern "C" {
|
|||
#include "server/request_context.h"
|
||||
#include "server/response.h"
|
||||
|
||||
#include "tools/concurrent_cache.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
typedef kainjow::mustache::data MustacheData;
|
||||
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
|
||||
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;
|
||||
|
||||
class Entry;
|
||||
class OPDSDumper;
|
||||
|
@ -115,6 +122,9 @@ class InternalServer {
|
|||
Library* mp_library;
|
||||
NameMapper* mp_nameMapper;
|
||||
|
||||
SearcherCache searcherCache;
|
||||
SearchCache searchCache;
|
||||
|
||||
std::string m_server_id;
|
||||
std::string m_library_id;
|
||||
|
||||
|
|
Loading…
Reference in New Issue