mirror of https://github.com/kiwix/libkiwix.git
Retrieve Searcher and Search from LRU Cache
We use the new cache template to implement two kind of cache. 1: The Searcher cache is more general in terms of its usage. A Searcher can be used for multiple searches without much change to itself. We try to retrieve the searcher and perform searches using it whenever possible, and if not we put a searcher into the cache. User can specify a custom cache length by manipulating the environment variable SEARCHER_CACHE_SIZE. It's default value is 10% of all the books available. 2: The search cache is much more restricted in terms of usage. It's main purpose is to avoid re-searching on the searcher during page changes to generate SearchResultSet of various ranges. User can specify a custom cache length using the environment variable SEARCH_CACHE_SIZE with a default value of 2;
This commit is contained in:
parent
a51f8d66a7
commit
7cb4c1361f
|
@ -58,8 +58,6 @@ extern "C" {
|
||||||
|
|
||||||
#include <zim/uuid.h>
|
#include <zim/uuid.h>
|
||||||
#include <zim/error.h>
|
#include <zim/error.h>
|
||||||
#include <zim/search.h>
|
|
||||||
#include <zim/suggestion.h>
|
|
||||||
#include <zim/entry.h>
|
#include <zim/entry.h>
|
||||||
#include <zim/item.h>
|
#include <zim/item.h>
|
||||||
|
|
||||||
|
@ -80,6 +78,7 @@ extern "C" {
|
||||||
|
|
||||||
#define MAX_SEARCH_LEN 140
|
#define MAX_SEARCH_LEN 140
|
||||||
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
||||||
|
#define DEFAULT_CACHE_SIZE 2
|
||||||
|
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
||||||
|
@ -96,6 +95,18 @@ inline std::string normalizeRootUrl(std::string rootUrl)
|
||||||
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
|
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the value of env var `name` if found, otherwise returns defaultVal
|
||||||
|
unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
|
||||||
|
try {
|
||||||
|
const char* envString = std::getenv(name);
|
||||||
|
if (envString == nullptr) {
|
||||||
|
throw std::runtime_error("Environment variable not set");
|
||||||
|
}
|
||||||
|
return extractFromString<unsigned int>(envString);
|
||||||
|
} catch (...) {}
|
||||||
|
|
||||||
|
return defaultVal;
|
||||||
|
}
|
||||||
} // unnamed namespace
|
} // unnamed namespace
|
||||||
|
|
||||||
static IdNameMapper defaultNameMapper;
|
static IdNameMapper defaultNameMapper;
|
||||||
|
@ -134,7 +145,9 @@ InternalServer::InternalServer(Library* library,
|
||||||
m_ipConnectionLimit(ipConnectionLimit),
|
m_ipConnectionLimit(ipConnectionLimit),
|
||||||
mp_daemon(nullptr),
|
mp_daemon(nullptr),
|
||||||
mp_library(library),
|
mp_library(library),
|
||||||
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
|
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper),
|
||||||
|
searcherCache(getCacheLength("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))),
|
||||||
|
searchCache(getCacheLength("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
bool InternalServer::start() {
|
bool InternalServer::start() {
|
||||||
|
@ -488,11 +501,11 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
} catch(const std::out_of_range&) {}
|
} catch(const std::out_of_range&) {}
|
||||||
catch(const std::invalid_argument&) {}
|
catch(const std::invalid_argument&) {}
|
||||||
|
|
||||||
std::string bookName;
|
std::string bookName, bookId;
|
||||||
std::shared_ptr<zim::Archive> archive;
|
std::shared_ptr<zim::Archive> archive;
|
||||||
try {
|
try {
|
||||||
bookName = request.get_argument("content");
|
bookName = request.get_argument("content");
|
||||||
const std::string bookId = mp_nameMapper->getIdForName(bookName);
|
bookId = mp_nameMapper->getIdForName(bookName);
|
||||||
archive = mp_library->getArchiveById(bookId);
|
archive = mp_library->getArchiveById(bookId);
|
||||||
} catch (const std::out_of_range&) {}
|
} catch (const std::out_of_range&) {}
|
||||||
|
|
||||||
|
@ -509,7 +522,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
|
|
||||||
std::shared_ptr<zim::Searcher> searcher;
|
std::shared_ptr<zim::Searcher> searcher;
|
||||||
if (archive) {
|
if (archive) {
|
||||||
searcher = std::make_shared<zim::Searcher>(*archive);
|
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
|
||||||
} else {
|
} else {
|
||||||
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
|
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
|
||||||
auto currentArchive = mp_library->getArchiveById(bookId);
|
auto currentArchive = mp_library->getArchiveById(bookId);
|
||||||
|
@ -540,6 +553,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the results */
|
/* Get the results */
|
||||||
|
std::string queryString;
|
||||||
try {
|
try {
|
||||||
zim::Query query;
|
zim::Query query;
|
||||||
if (patternString.empty()) {
|
if (patternString.empty()) {
|
||||||
|
@ -549,6 +563,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
}
|
}
|
||||||
|
|
||||||
query.setQuery("");
|
query.setQuery("");
|
||||||
|
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
|
||||||
query.setGeorange(latitude, longitude, distance);
|
query.setGeorange(latitude, longitude, distance);
|
||||||
} else {
|
} else {
|
||||||
// Execute Ft search
|
// Execute Ft search
|
||||||
|
@ -556,13 +571,16 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
||||||
cout << "Performing query `" << patternString << "'" << endl;
|
cout << "Performing query `" << patternString << "'" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string queryString = removeAccents(patternString);
|
queryString = "FT:" + removeAccents(patternString);
|
||||||
query.setQuery(queryString);
|
query.setQuery(queryString);
|
||||||
}
|
}
|
||||||
|
queryString = bookId + queryString;
|
||||||
|
|
||||||
zim::Search search = searcher->search(query);
|
std::shared_ptr<zim::Search> search;
|
||||||
SearchRenderer renderer(search.getResults(start, pageLength), mp_nameMapper, mp_library, start,
|
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});
|
||||||
search.getEstimatedMatches());
|
|
||||||
|
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
|
||||||
|
search->getEstimatedMatches());
|
||||||
renderer.setSearchPattern(patternString);
|
renderer.setSearchPattern(patternString);
|
||||||
renderer.setSearchContent(bookName);
|
renderer.setSearchContent(bookName);
|
||||||
renderer.setProtocolPrefix(m_root + "/");
|
renderer.setProtocolPrefix(m_root + "/");
|
||||||
|
|
|
@ -28,6 +28,9 @@ extern "C" {
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "name_mapper.h"
|
#include "name_mapper.h"
|
||||||
|
|
||||||
|
#include <zim/search.h>
|
||||||
|
#include <zim/suggestion.h>
|
||||||
|
|
||||||
#include <mustache.hpp>
|
#include <mustache.hpp>
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -36,9 +39,13 @@ extern "C" {
|
||||||
#include "server/request_context.h"
|
#include "server/request_context.h"
|
||||||
#include "server/response.h"
|
#include "server/response.h"
|
||||||
|
|
||||||
|
#include "tools/concurrent_cache.h"
|
||||||
|
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
||||||
typedef kainjow::mustache::data MustacheData;
|
typedef kainjow::mustache::data MustacheData;
|
||||||
|
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
|
||||||
|
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;
|
||||||
|
|
||||||
class Entry;
|
class Entry;
|
||||||
class OPDSDumper;
|
class OPDSDumper;
|
||||||
|
@ -115,6 +122,9 @@ class InternalServer {
|
||||||
Library* mp_library;
|
Library* mp_library;
|
||||||
NameMapper* mp_nameMapper;
|
NameMapper* mp_nameMapper;
|
||||||
|
|
||||||
|
SearcherCache searcherCache;
|
||||||
|
SearchCache searchCache;
|
||||||
|
|
||||||
std::string m_server_id;
|
std::string m_server_id;
|
||||||
std::string m_library_id;
|
std::string m_library_id;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue