From 7c688a4acc452de7afffbddc1c71ceea57c6007e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 21 Mar 2022 18:01:07 +0100 Subject: [PATCH 01/29] Move `getCacheLength` to a generic helper function `getEnvVar` --- src/server/internalServer.cpp | 18 +++--------------- src/tools/otherTools.h | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 0290d79a7..1ed43db94 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -95,18 +95,6 @@ inline std::string normalizeRootUrl(std::string rootUrl) return rootUrl.empty() ? rootUrl : "/" + rootUrl; } -// Returns the value of env var `name` if found, otherwise returns defaultVal -unsigned int getCacheLength(const char* name, unsigned int defaultVal) { - try { - const char* envString = std::getenv(name); - if (envString == nullptr) { - throw std::runtime_error("Environment variable not set"); - } - return extractFromString(envString); - } catch (...) {} - - return defaultVal; -} } // unnamed namespace SearchInfo::SearchInfo(const std::string& pattern) @@ -194,9 +182,9 @@ InternalServer::InternalServer(Library* library, mp_daemon(nullptr), mp_library(library), mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper), - searcherCache(getCacheLength("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))), - searchCache(getCacheLength("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE)), - suggestionSearcherCache(getCacheLength("SUGGESTION_SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))) + searcherCache(getEnvVar("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))), + searchCache(getEnvVar("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE)), + suggestionSearcherCache(getEnvVar("SUGGESTION_SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))) {} bool InternalServer::start() { diff --git a/src/tools/otherTools.h b/src/tools/otherTools.h index b1297798e..c0920d7bf 100644 --- a/src/tools/otherTools.h +++ b/src/tools/otherTools.h @@ -23,9 +23,12 @@ #include #include #include +#include #include #include +#include "stringTools.h" + namespace pugi { class xml_node; } @@ -53,6 +56,20 @@ namespace kiwix kainjow::mustache::data onlyAsNonEmptyMustacheValue(const std::string& s); std::string render_template(const std::string& template_str, kainjow::mustache::data data); + + template + T getEnvVar(const char* name, const T& defaultValue) + { + try { + const char* envString = std::getenv(name); + if (envString == nullptr) { + throw std::runtime_error("Environment variable not set"); + } + return extractFromString(envString); + } catch (...) {} + + return defaultValue; + } } #endif From 28fb76bbc239ff5233677550410d59c87f634c5e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 26 Apr 2022 17:44:07 +0200 Subject: [PATCH 02/29] Remove m_readers in `Library::impl` It is a deprecated interface and it is a simple wrapper on Archive. --- include/library.h | 2 +- src/library.cpp | 25 ++++++++----------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/include/library.h b/include/library.h index 1763de7d5..4a2bc787e 100644 --- a/include/library.h +++ b/include/library.h @@ -338,7 +338,7 @@ private: // functions std::vector getBookPropValueSet(BookStrPropMemFn p) const; BookIdCollection filterViaBookDB(const Filter& filter) const; void updateBookDB(const Book& book); - void dropReader(const std::string& bookId); + void dropCache(const std::string& bookId); private: //data std::unique_ptr mp_impl; diff --git a/src/library.cpp b/src/library.cpp index f77059404..f9e7affa4 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -70,7 +70,6 @@ struct Library::Impl Library::Revision m_revision; std::map m_books; - std::map> m_readers; std::map> m_archives; std::vector m_bookmarks; Xapian::WritableDatabase m_bookDB; @@ -139,7 +138,7 @@ bool Library::addBook(const Book& book) try { auto& oldbook = mp_impl->m_books.at(book.getId()); if ( ! booksReferToTheSameArchive(oldbook, book) ) { - dropReader(book.getId()); + dropCache(book.getId()); } oldbook.update(book); // XXX: This may have no effect if oldbook is readonly // XXX: Then m_bookDB will become out-of-sync with @@ -173,9 +172,8 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url) } -void Library::dropReader(const std::string& id) +void Library::dropCache(const std::string& id) { - mp_impl->m_readers.erase(id); mp_impl->m_archives.erase(id); } @@ -183,7 +181,7 @@ bool Library::removeBookById(const std::string& id) { std::lock_guard lock(m_mutex); mp_impl->m_bookDB.delete_document("Q" + id); - dropReader(id); + dropCache(id); return mp_impl->m_books.erase(id) == 1; } @@ -242,19 +240,12 @@ const Book& Library::getBookByPath(const std::string& path) const std::shared_ptr Library::getReaderById(const std::string& id) { - try { - std::lock_guard lock(m_mutex); - return mp_impl->m_readers.at(id); - } catch (std::out_of_range& e) {} - - const auto archive = getArchiveById(id); - if ( !archive ) + auto archive = getArchiveById(id); + if(archive) { + return std::make_shared(archive); + } else { return nullptr; - - const shared_ptr reader(new Reader(archive, true)); - std::lock_guard lock(m_mutex); - mp_impl->m_readers[id] = reader; - return reader; + } } std::shared_ptr Library::getArchiveById(const std::string& id) From 582e3ec46dcc77075d896b1ce6a0bc2f8f15c2f4 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 26 Apr 2022 17:49:10 +0200 Subject: [PATCH 03/29] Use a concurrent cache to store Archive cache. --- src/library.cpp | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index f9e7affa4..7c60fa8aa 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -27,6 +27,8 @@ #include "tools/regexTools.h" #include "tools/pathTools.h" #include "tools/stringTools.h" +#include "tools/otherTools.h" +#include "tools/concurrent_cache.h" #include #include @@ -64,13 +66,12 @@ struct Library::Impl { Library::Revision lastUpdatedRevision = 0; - // May also keep the Archive and Reader pointers here and get - // rid of the m_readers and m_archives data members in Library }; Library::Revision m_revision; std::map m_books; - std::map> m_archives; + using ArchiveCache = ConcurrentCache>; + std::unique_ptr mp_archiveCache; std::vector m_bookmarks; Xapian::WritableDatabase m_bookDB; @@ -84,7 +85,8 @@ struct Library::Impl }; Library::Impl::Impl() - : m_bookDB("", Xapian::DB_BACKEND_INMEMORY) + : mp_archiveCache(new ArchiveCache(std::max(getEnvVar("ARCHIVE_CACHE_SIZE", 1), 1))), + m_bookDB("", Xapian::DB_BACKEND_INMEMORY) { } @@ -174,7 +176,7 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url) void Library::dropCache(const std::string& id) { - mp_impl->m_archives.erase(id); + mp_impl->mp_archiveCache->drop(id); } bool Library::removeBookById(const std::string& id) @@ -250,18 +252,19 @@ std::shared_ptr Library::getReaderById(const std::string& id) std::shared_ptr Library::getArchiveById(const std::string& id) { - std::lock_guard lock(m_mutex); try { - return mp_impl->m_archives.at(id); - } catch (std::out_of_range& e) {} - - auto book = getBookById(id); - if (!book.isPathValid()) + return mp_impl->mp_archiveCache->getOrPut(id, + [&](){ + auto book = getBookById(id); + if (!book.isPathValid()) { + throw std::invalid_argument(""); + } + return std::make_shared(book.getPath()); + }); + } catch (std::invalid_argument&) { return nullptr; + } - auto sptr = make_shared(book.getPath()); - mp_impl->m_archives[id] = sptr; - return sptr; } unsigned int Library::getBookCount(const bool localBooks, From 740581c55ca9fd68c24e188328d3e79ff9b44a03 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 26 Apr 2022 17:55:56 +0200 Subject: [PATCH 04/29] Link the cache size to the book count. Unless explicitly set via user env variable. --- src/library.cpp | 11 +++++++++++ src/tools/concurrent_cache.h | 6 ++++++ src/tools/lrucache.h | 8 +++++++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/library.cpp b/src/library.cpp index 7c60fa8aa..b28533a4a 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -151,6 +152,10 @@ bool Library::addBook(const Book& book) auto& newEntry = mp_impl->m_books[book.getId()]; static_cast(newEntry) = book; newEntry.lastUpdatedRevision = mp_impl->m_revision; + size_t new_cache_size = std::ceil(mp_impl->getBookCount(true, true)*0.1); + if (getEnvVar("ARCHIVE_CACHE_SIZE", -1) <= 0) { + mp_impl->mp_archiveCache->setMaxSize(new_cache_size); + } return true; } } @@ -184,6 +189,12 @@ bool Library::removeBookById(const std::string& id) std::lock_guard lock(m_mutex); mp_impl->m_bookDB.delete_document("Q" + id); dropCache(id); + // We do not change the cache size here + // Most of the time, the book is remove in case of library refresh, it is + // often associated with addBook calls (which will properly set the cache size) + // Having a too big cache is not a problem here (or it would have been before) + // (And setMaxSize doesn't actually reduce the cache size, extra cached items + // will be removed in put or getOrPut). return mp_impl->m_books.erase(id) == 1; } diff --git a/src/tools/concurrent_cache.h b/src/tools/concurrent_cache.h index 1b5175b2e..4c5c324aa 100644 --- a/src/tools/concurrent_cache.h +++ b/src/tools/concurrent_cache.h @@ -1,3 +1,4 @@ + /* * Copyright (C) 2021 Matthieu Gautier * Copyright (C) 2020 Veloman Yunkan @@ -84,6 +85,11 @@ public: // types return impl_.drop(key); } + size_t setMaxSize(size_t new_size) { + std::unique_lock l(lock_); + return impl_.setMaxSize(new_size); + } + private: // data Impl impl_; std::mutex lock_; diff --git a/src/tools/lrucache.h b/src/tools/lrucache.h index bd90c3128..08fa1b4d8 100644 --- a/src/tools/lrucache.h +++ b/src/tools/lrucache.h @@ -138,12 +138,18 @@ public: // functions return _cache_items_map.size(); } + size_t setMaxSize(size_t new_size) { + size_t previous = _max_size; + _max_size = new_size; + return previous; + } + private: // functions void putMissing(const key_t& key, const value_t& value) { assert(_cache_items_map.find(key) == _cache_items_map.end()); _cache_items_list.push_front(key_value_pair_t(key, value)); _cache_items_map[key] = _cache_items_list.begin(); - if (_cache_items_map.size() > _max_size) { + while (_cache_items_map.size() > _max_size) { _cache_items_map.erase(_cache_items_list.back().first); _cache_items_list.pop_back(); } From f5af0633ececb27cdb4511e946645ff26a82236f Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 27 Apr 2022 15:18:09 +0200 Subject: [PATCH 05/29] Move the searcher cache into the Library --- include/library.h | 2 ++ src/library.cpp | 23 ++++++++++++++++++++++- src/server/internalServer.cpp | 15 ++++++--------- src/server/internalServer.h | 2 -- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/include/library.h b/include/library.h index 4a2bc787e..58e07c874 100644 --- a/include/library.h +++ b/include/library.h @@ -26,6 +26,7 @@ #include #include #include +#include #include "book.h" #include "bookmark.h" @@ -207,6 +208,7 @@ class Library DEPRECATED std::shared_ptr getReaderById(const std::string& id); std::shared_ptr getArchiveById(const std::string& id); + std::shared_ptr getSearcherById(const std::string& id); /** * Remove a book from the library. diff --git a/src/library.cpp b/src/library.cpp index b28533a4a..4369f58a7 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -66,13 +66,14 @@ struct Library::Impl struct Entry : Book { Library::Revision lastUpdatedRevision = 0; - }; Library::Revision m_revision; std::map m_books; using ArchiveCache = ConcurrentCache>; std::unique_ptr mp_archiveCache; + using SearcherCache = ConcurrentCache>; + std::unique_ptr mp_searcherCache; std::vector m_bookmarks; Xapian::WritableDatabase m_bookDB; @@ -87,6 +88,7 @@ struct Library::Impl Library::Impl::Impl() : mp_archiveCache(new ArchiveCache(std::max(getEnvVar("ARCHIVE_CACHE_SIZE", 1), 1))), + mp_searcherCache(new SearcherCache(std::max(getEnvVar("SEARCHER_CACHE_SIZE", 1), 1))), m_bookDB("", Xapian::DB_BACKEND_INMEMORY) { } @@ -156,6 +158,9 @@ bool Library::addBook(const Book& book) if (getEnvVar("ARCHIVE_CACHE_SIZE", -1) <= 0) { mp_impl->mp_archiveCache->setMaxSize(new_cache_size); } + if (getEnvVar("SEARCHER_CACHE_SIZE", -1) <= 0) { + mp_impl->mp_searcherCache->setMaxSize(new_cache_size); + } return true; } } @@ -182,6 +187,7 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url) void Library::dropCache(const std::string& id) { mp_impl->mp_archiveCache->drop(id); + mp_impl->mp_searcherCache->drop(id); } bool Library::removeBookById(const std::string& id) @@ -275,7 +281,22 @@ std::shared_ptr Library::getArchiveById(const std::string& id) } catch (std::invalid_argument&) { return nullptr; } +} +std::shared_ptr Library::getSearcherById(const std::string& id) +{ + try { + return mp_impl->mp_searcherCache->getOrPut(id, + [&](){ + auto archive = getArchiveById(id); + if(!archive) { + throw std::invalid_argument(""); + } + return std::make_shared(*archive); + }); + } catch (std::invalid_argument&) { + return nullptr; + } } unsigned int Library::getBookCount(const bool localBooks, diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 1ed43db94..8bc0fd8a2 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -182,7 +182,6 @@ InternalServer::InternalServer(Library* library, mp_daemon(nullptr), mp_library(library), mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper), - searcherCache(getEnvVar("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))), searchCache(getEnvVar("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE)), suggestionSearcherCache(getEnvVar("SUGGESTION_SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))) {} @@ -582,11 +581,9 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re auto searchInfo = SearchInfo(request); std::string bookId; - std::shared_ptr archive; if (!searchInfo.bookName.empty()) { try { bookId = mp_nameMapper->getIdForName(searchInfo.bookName); - archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { throw std::invalid_argument("The requested book doesn't exist."); } @@ -600,8 +597,8 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re search = searchCache.getOrPut(searchInfo, [=](){ std::shared_ptr searcher; - if (archive) { - searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared(*archive);}); + if(!bookId.empty()) { + searcher = mp_library->getSearcherById(bookId); } else { for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { auto currentArchive = mp_library->getArchiveById(bookId); @@ -622,11 +619,11 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re // (in case of zim file not containing a index) const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); HTTPErrorHtmlResponse response(*this, request, MHD_HTTP_NOT_FOUND, - "fulltext-search-unavailable", - "404-page-heading", + "fulltext-search-unavailable", + "404-page-heading", cssUrl); response += nonParameterizedMessage("no-search-results"); - response += TaskbarInfo(searchInfo.bookName, archive.get()); + response += TaskbarInfo(searchInfo.bookName, bookId.empty()?nullptr:mp_library->getArchiveById(bookId).get()); return response; } @@ -656,7 +653,7 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re renderer.setSearchProtocolPrefix(m_root + "/search"); renderer.setPageLength(pageLength); auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); - response->set_taskbar(searchInfo.bookName, archive.get()); + response->set_taskbar(searchInfo.bookName, bookId.empty()?nullptr:mp_library->getArchiveById(bookId).get()); return std::move(response); } catch (const std::invalid_argument& e) { return HTTP400HtmlResponse(*this, request) diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 69e8166c4..2d6da476b 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -88,7 +88,6 @@ class SearchInfo { typedef kainjow::mustache::data MustacheData; -typedef ConcurrentCache> SearcherCache; typedef ConcurrentCache> SearchCache; typedef ConcurrentCache> SuggestionSearcherCache; @@ -167,7 +166,6 @@ class InternalServer { Library* mp_library; NameMapper* mp_nameMapper; - SearcherCache searcherCache; SearchCache searchCache; SuggestionSearcherCache suggestionSearcherCache; From fd0edbba80510256d6573a246b5d76ab6317c80f Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 22 Mar 2022 11:42:29 +0100 Subject: [PATCH 06/29] Use a set of id as key for a the searcher Cache. It will allow use to cache seacher for multiple zim files. --- src/library.cpp | 26 ++++++++++++++++++++++++-- src/tools/concurrent_cache.h | 2 +- src/tools/lrucache.h | 8 ++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 4369f58a7..d57b4d450 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -59,6 +59,27 @@ bool booksReferToTheSameArchive(const Book& book1, const Book& book2) && book1.getPath() == book2.getPath(); } +template +class MultiKeyCache: public ConcurrentCache, Value> +{ + public: + explicit MultiKeyCache(size_t maxEntries) + : ConcurrentCache, Value>(maxEntries) + {} + + bool drop(const Key& key) + { + std::unique_lock l(this->lock_); + bool removed = false; + for(auto& cache_key: this->impl_.keys()) { + if(cache_key.find(key)!=cache_key.end()) { + removed |= this->impl_.drop(cache_key); + } + } + return removed; + } +}; + } // unnamed namespace struct Library::Impl @@ -72,7 +93,7 @@ struct Library::Impl std::map m_books; using ArchiveCache = ConcurrentCache>; std::unique_ptr mp_archiveCache; - using SearcherCache = ConcurrentCache>; + using SearcherCache = MultiKeyCache>; std::unique_ptr mp_searcherCache; std::vector m_bookmarks; Xapian::WritableDatabase m_bookDB; @@ -285,8 +306,9 @@ std::shared_ptr Library::getArchiveById(const std::string& id) std::shared_ptr Library::getSearcherById(const std::string& id) { + std::set ids {id}; try { - return mp_impl->mp_searcherCache->getOrPut(id, + return mp_impl->mp_searcherCache->getOrPut(ids, [&](){ auto archive = getArchiveById(id); if(!archive) { diff --git a/src/tools/concurrent_cache.h b/src/tools/concurrent_cache.h index 4c5c324aa..3ee8ce60e 100644 --- a/src/tools/concurrent_cache.h +++ b/src/tools/concurrent_cache.h @@ -90,7 +90,7 @@ public: // types return impl_.setMaxSize(new_size); } -private: // data +protected: // data Impl impl_; std::mutex lock_; }; diff --git a/src/tools/lrucache.h b/src/tools/lrucache.h index 08fa1b4d8..06ee9c992 100644 --- a/src/tools/lrucache.h +++ b/src/tools/lrucache.h @@ -144,6 +144,14 @@ public: // functions return previous; } + std::set keys() const { + std::set keys; + for(auto& item:_cache_items_map) { + keys.insert(item.first); + } + return keys; + } + private: // functions void putMissing(const key_t& key, const value_t& value) { assert(_cache_items_map.find(key) == _cache_items_map.end()); From 854623618cedaf4ab7e5ce60b092eb6ebb552a7b Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 22 Mar 2022 11:50:22 +0100 Subject: [PATCH 07/29] Use the newly introduced searcherCache for multizim searcher. --- include/library.h | 6 +++++- src/library.cpp | 16 ++++++++++------ src/server/internalServer.cpp | 16 +++++----------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/include/library.h b/include/library.h index 58e07c874..0d5d19f0f 100644 --- a/include/library.h +++ b/include/library.h @@ -153,6 +153,7 @@ class Library typedef uint64_t Revision; typedef std::vector BookIdCollection; typedef std::map AttributeCounts; + typedef std::set BookIdSet; public: Library(); @@ -208,7 +209,10 @@ class Library DEPRECATED std::shared_ptr getReaderById(const std::string& id); std::shared_ptr getArchiveById(const std::string& id); - std::shared_ptr getSearcherById(const std::string& id); + std::shared_ptr getSearcherById(const std::string& id) { + return getSearcherByIds(BookIdSet{id}); + } + std::shared_ptr getSearcherByIds(const BookIdSet& ids); /** * Remove a book from the library. diff --git a/src/library.cpp b/src/library.cpp index d57b4d450..7c5a63562 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -304,17 +304,21 @@ std::shared_ptr Library::getArchiveById(const std::string& id) } } -std::shared_ptr Library::getSearcherById(const std::string& id) +std::shared_ptr Library::getSearcherByIds(const BookIdSet& ids) { - std::set ids {id}; + assert(!ids.empty()); try { return mp_impl->mp_searcherCache->getOrPut(ids, [&](){ - auto archive = getArchiveById(id); - if(!archive) { - throw std::invalid_argument(""); + std::vector archives; + for(auto& id:ids) { + auto archive = getArchiveById(id); + if(!archive) { + throw std::invalid_argument(""); + } + archives.push_back(*archive); } - return std::make_shared(*archive); + return std::make_shared(archives); }); } catch (std::invalid_argument&) { return nullptr; diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 8bc0fd8a2..7fad45d4e 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -596,21 +596,15 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re try { search = searchCache.getOrPut(searchInfo, [=](){ - std::shared_ptr searcher; + Library::BookIdSet bookIds; if(!bookId.empty()) { - searcher = mp_library->getSearcherById(bookId); + bookIds.insert(bookId); } else { for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { - auto currentArchive = mp_library->getArchiveById(bookId); - if (currentArchive) { - if (! searcher) { - searcher = std::make_shared(*currentArchive); - } else { - searcher->addArchive(*currentArchive); - } - } - } + bookIds.insert(bookId); + } } + auto searcher = mp_library->getSearcherByIds(bookIds); return make_shared(searcher->search(searchInfo.getZimQuery(m_verbose.load()))); } ); From 98c54b22797a362d031f56e16c1f3713fef1f98c Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 22 Mar 2022 11:55:59 +0100 Subject: [PATCH 08/29] Handle multiple arguments in RequestContext. --- src/server/request_context.cpp | 20 ++++++++++++++------ src/server/request_context.h | 9 +++++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 6b435e5ad..c479a6201 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -106,7 +106,7 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind, const char *key, const char* value) { RequestContext *_this = static_cast(__this); - _this->arguments[key] = value == nullptr ? "" : value; + _this->arguments[key].push_back(value == nullptr ? "" : value); return MHD_YES; } @@ -121,8 +121,14 @@ void RequestContext::print_debug_info() const { printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str()); } printf("arguments :\n"); - for (auto it=arguments.begin(); it!=arguments.end(); it++) { - printf(" - %s : '%s'\n", it->first.c_str(), it->second.c_str()); + for (auto& pair:arguments) { + printf(" - %s :", pair.first.c_str()); + bool first = true; + for (auto& v: pair.second) { + printf("%s %s", first?"":",", v.c_str()); + first = false; + } + printf("\n"); } printf("Parsed : \n"); printf("full_url: %s\n", full_url.c_str()); @@ -176,7 +182,7 @@ ByteRange RequestContext::get_range() const { template<> std::string RequestContext::get_argument(const std::string& name) const { - return arguments.at(name); + return arguments.at(name)[0]; } std::string RequestContext::get_header(const std::string& name) const { @@ -187,8 +193,10 @@ std::string RequestContext::get_query() const { std::string q; const char* sep = ""; for ( const auto& a : arguments ) { - q += sep + a.first + '=' + a.second; - sep = "&"; + for (const auto& v: a.second) { + q += sep + a.first + '=' + v; + sep = "&"; + } } return q; } diff --git a/src/server/request_context.h b/src/server/request_context.h index 2c4c8902e..6172bcdf0 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "byte_range.h" @@ -69,7 +70,11 @@ class RequestContext { std::string get_header(const std::string& name) const; template T get_argument(const std::string& name) const { - return extractFromString(arguments.at(name)); + return extractFromString(get_argument(name)); + } + + std::vector get_arguments(const std::string& name) const { + return arguments.at(name); } template @@ -105,7 +110,7 @@ class RequestContext { ByteRange byteRange_; std::map headers; - std::map arguments; + std::map> arguments; private: // functions static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*); From 22996e4a6be6d130dd5fbbd2e1b89fba75bad041 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 20 Apr 2022 10:20:41 +0200 Subject: [PATCH 09/29] Allow user to select multiple books when doing search. --- src/server/internalServer.cpp | 49 +++++++++++++++++++++-------------- src/server/internalServer.h | 6 ++--- 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 7fad45d4e..c3740ec5f 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -109,8 +109,7 @@ SearchInfo::SearchInfo(const std::string& pattern, GeoQuery geoQuery) SearchInfo::SearchInfo(const RequestContext& request) : pattern(request.get_optional_param("pattern", "")), - geoQuery(), - bookName(request.get_optional_param("content", "")) + geoQuery() { /* Retrive geo search */ try { @@ -124,6 +123,11 @@ SearchInfo::SearchInfo(const RequestContext& request) if (!geoQuery && pattern.empty()) { throw std::invalid_argument("No query provided."); } + + try { + auto content_vector = request.get_arguments("content"); + bookNames = std::set(content_vector.begin(), content_vector.end()); + } catch (const std::out_of_range&) {} } zim::Query SearchInfo::getZimQuery(bool verbose) const { @@ -580,16 +584,6 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re try { auto searchInfo = SearchInfo(request); - std::string bookId; - if (!searchInfo.bookName.empty()) { - try { - bookId = mp_nameMapper->getIdForName(searchInfo.bookName); - } catch (const std::out_of_range&) { - throw std::invalid_argument("The requested book doesn't exist."); - } - } - - /* Make the search */ // Try to get a search from the searchInfo, else build it std::shared_ptr search; @@ -597,8 +591,15 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re search = searchCache.getOrPut(searchInfo, [=](){ Library::BookIdSet bookIds; - if(!bookId.empty()) { - bookIds.insert(bookId); + if(!searchInfo.bookNames.empty()) { + for(const auto& bookName: searchInfo.bookNames) { + try { + auto bookId = mp_nameMapper->getIdForName(bookName); + bookIds.insert(bookId); + } catch(const std::out_of_range&) { + throw std::invalid_argument("The requested book doesn't exist."); + } + } } else { for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { bookIds.insert(bookId); @@ -613,15 +614,18 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re // (in case of zim file not containing a index) const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); HTTPErrorHtmlResponse response(*this, request, MHD_HTTP_NOT_FOUND, - "fulltext-search-unavailable", - "404-page-heading", + "fulltext-search-unavailable", + "404-page-heading", cssUrl); response += nonParameterizedMessage("no-search-results"); - response += TaskbarInfo(searchInfo.bookName, bookId.empty()?nullptr:mp_library->getArchiveById(bookId).get()); + if(searchInfo.bookNames.size() == 1) { + auto bookName =*searchInfo.bookNames.begin(); + auto bookId = mp_nameMapper->getIdForName(bookName); + response += TaskbarInfo(bookName, mp_library->getArchiveById(bookId).get()); + } return response; } - auto start = 0; try { start = request.get_argument("start"); @@ -642,12 +646,17 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start, search->getEstimatedMatches()); renderer.setSearchPattern(searchInfo.pattern); - renderer.setSearchContent(searchInfo.bookName); + //[TODO] + //renderer.setSearchContent(searchInfo.bookNames); renderer.setProtocolPrefix(m_root + "/"); renderer.setSearchProtocolPrefix(m_root + "/search"); renderer.setPageLength(pageLength); auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); - response->set_taskbar(searchInfo.bookName, bookId.empty()?nullptr:mp_library->getArchiveById(bookId).get()); + if(searchInfo.bookNames.size() == 1) { + auto bookName = *searchInfo.bookNames.begin(); + auto bookId = mp_nameMapper->getIdForName(bookName); + response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); + } return std::move(response); } catch (const std::invalid_argument& e) { return HTTP400HtmlResponse(*this, request) diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 2d6da476b..280172054 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -76,14 +76,14 @@ class SearchInfo { friend bool operator<(const SearchInfo& l, const SearchInfo& r) { - return std::tie(l.bookName, l.pattern, l.geoQuery) - < std::tie(r.bookName, r.pattern, r.geoQuery); // keep the same order + return std::tie(l.bookNames, l.pattern, l.geoQuery) + < std::tie(r.bookNames, r.pattern, r.geoQuery); // keep the same order } public: //data std::string pattern; GeoQuery geoQuery; - std::string bookName; + std::set bookNames; }; From 76ebfd7ea4274d93d5f737596fc1d7333044a25a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 30 Mar 2022 11:45:42 +0200 Subject: [PATCH 10/29] Move get_search_filter and subrange. --- src/server/internalServer.cpp | 73 ++++++++++++++++------------------- 1 file changed, 34 insertions(+), 39 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index c3740ec5f..a725674d4 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -95,6 +95,40 @@ inline std::string normalizeRootUrl(std::string rootUrl) return rootUrl.empty() ? rootUrl : "/" + rootUrl; } +Filter get_search_filter(const RequestContext& request) +{ + auto filter = kiwix::Filter().valid(true).local(true); + try { + filter.query(request.get_argument("q")); + } catch (const std::out_of_range&) {} + try { + filter.maxSize(request.get_argument("maxsize")); + } catch (...) {} + try { + filter.name(request.get_argument("name")); + } catch (const std::out_of_range&) {} + try { + filter.category(request.get_argument("category")); + } catch (const std::out_of_range&) {} + try { + filter.lang(request.get_argument("lang")); + } catch (const std::out_of_range&) {} + try { + filter.acceptTags(kiwix::split(request.get_argument("tag"), ";")); + } catch (...) {} + try { + filter.rejectTags(kiwix::split(request.get_argument("notag"), ";")); + } catch (...) {} + return filter; +} + +template +std::vector subrange(const std::vector& v, size_t s, size_t n) +{ + const size_t e = std::min(v.size(), s+n); + return std::vector(v.begin()+std::min(v.size(), s), v.begin()+e); +} + } // unnamed namespace SearchInfo::SearchInfo(const std::string& pattern) @@ -764,45 +798,6 @@ std::unique_ptr InternalServer::handle_catalog(const RequestContext& r return std::move(response); } -namespace -{ - -Filter get_search_filter(const RequestContext& request) -{ - auto filter = kiwix::Filter().valid(true).local(true); - try { - filter.query(request.get_argument("q")); - } catch (const std::out_of_range&) {} - try { - filter.maxSize(request.get_argument("maxsize")); - } catch (...) {} - try { - filter.name(request.get_argument("name")); - } catch (const std::out_of_range&) {} - try { - filter.category(request.get_argument("category")); - } catch (const std::out_of_range&) {} - try { - filter.lang(request.get_argument("lang")); - } catch (const std::out_of_range&) {} - try { - filter.acceptTags(kiwix::split(request.get_argument("tag"), ";")); - } catch (...) {} - try { - filter.rejectTags(kiwix::split(request.get_argument("notag"), ";")); - } catch (...) {} - return filter; -} - -template -std::vector subrange(const std::vector& v, size_t s, size_t n) -{ - const size_t e = std::min(v.size(), s+n); - return std::vector(v.begin()+std::min(v.size(), s), v.begin()+e); -} - -} // unnamed namespace - std::vector InternalServer::search_catalog(const RequestContext& request, kiwix::OPDSDumper& opdsDumper) From 4438106c2fb576b74a01850dc6d6cdf179a7664a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 20 Apr 2022 10:24:34 +0200 Subject: [PATCH 11/29] Add a prefix in get_search_filter The prefix will be used to parse a "query to select book" in different context. For now we have only one context : selecting books for the catalog search. But we will want to select books to do fulltext search on them (will be done in later commit) --- src/server/internalServer.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index a725674d4..6a5203089 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -95,29 +95,29 @@ inline std::string normalizeRootUrl(std::string rootUrl) return rootUrl.empty() ? rootUrl : "/" + rootUrl; } -Filter get_search_filter(const RequestContext& request) +Filter get_search_filter(const RequestContext& request, const std::string& prefix="") { auto filter = kiwix::Filter().valid(true).local(true); try { - filter.query(request.get_argument("q")); + filter.query(request.get_argument(prefix+"q")); } catch (const std::out_of_range&) {} try { - filter.maxSize(request.get_argument("maxsize")); + filter.maxSize(request.get_argument(prefix+"maxsize")); } catch (...) {} try { - filter.name(request.get_argument("name")); + filter.name(request.get_argument(prefix+"name")); } catch (const std::out_of_range&) {} try { - filter.category(request.get_argument("category")); + filter.category(request.get_argument(prefix+"category")); } catch (const std::out_of_range&) {} try { - filter.lang(request.get_argument("lang")); + filter.lang(request.get_argument(prefix+"lang")); } catch (const std::out_of_range&) {} try { - filter.acceptTags(kiwix::split(request.get_argument("tag"), ";")); + filter.acceptTags(kiwix::split(request.get_argument(prefix+"tag"), ";")); } catch (...) {} try { - filter.rejectTags(kiwix::split(request.get_argument("notag"), ";")); + filter.rejectTags(kiwix::split(request.get_argument(prefix+"notag"), ";")); } catch (...) {} return filter; } From 76d5fafb726a649c580eeea83133dc1817a00765 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 30 Mar 2022 11:48:06 +0200 Subject: [PATCH 12/29] Introduce `selectBooks` `selectBooks` allow us to parse a query in a "standard" way to get the book(s) on which the user want to work. --- src/server/internalServer.cpp | 50 +++++++++++++++++++++++++++++++++++ src/server/internalServer.h | 1 + 2 files changed, 51 insertions(+) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 6a5203089..b646afdc4 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -131,6 +131,56 @@ std::vector subrange(const std::vector& v, size_t s, size_t n) } // unnamed namespace +Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) const +{ + // Try old API + try { + auto bookName = request.get_argument("content"); + try { + return {mp_nameMapper->getIdForName(bookName)}; + } catch (const std::out_of_range&) { + throw std::invalid_argument("The requested book doesn't exist."); + } + } catch(const std::out_of_range&) { + // We've catch the out_of_range of get_argument + // continue + } + + // Does user directly gives us ids ? + try { + auto id_vec = request.get_arguments("books.id"); + if (id_vec.empty()) { + throw std::invalid_argument("You must provide a value for the id."); + } + return Library::BookIdSet(id_vec.begin(), id_vec.end()); + } catch(const std::out_of_range&) {} + + // Use the names + try { + auto name_vec = request.get_arguments("books.name"); + if (name_vec.empty()) { + throw std::invalid_argument("You must provide a value for the name."); + } + Library::BookIdSet bookIds; + for(const auto& bookName: name_vec) { + try { + bookIds.insert(mp_nameMapper->getIdForName(bookName)); + } catch(const std::out_of_range&) { + throw std::invalid_argument("The requested book doesn't exist."); + } + } + return bookIds; + } catch(const std::out_of_range&) {} + + // Check for filtering + Filter filter = get_search_filter(request, "books.filter."); + auto id_vec = mp_library->filter(filter); + if (id_vec.empty()) { + throw std::invalid_argument("No books found."); + } + return Library::BookIdSet(id_vec.begin(), id_vec.end()); +} + SearchInfo::SearchInfo(const std::string& pattern) : pattern(pattern), geoQuery() diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 280172054..57f6975aa 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -149,6 +149,7 @@ class InternalServer { bool etag_not_needed(const RequestContext& r) const; ETag get_matching_if_none_match_etag(const RequestContext& request) const; + Library::BookIdSet selectBooks(const RequestContext& r) const; private: // data std::string m_addr; From 39d0a56be8b09079039a380f68e4b74f35e72b21 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 20 Apr 2022 10:32:24 +0200 Subject: [PATCH 13/29] Use selectBooks in handle_search --- src/server/internalServer.cpp | 59 ++++++++++++----------------------- src/server/internalServer.h | 12 +++---- 2 files changed, 26 insertions(+), 45 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index b646afdc4..bc8bb6c9a 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -181,20 +181,12 @@ Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) co return Library::BookIdSet(id_vec.begin(), id_vec.end()); } -SearchInfo::SearchInfo(const std::string& pattern) - : pattern(pattern), - geoQuery() -{} - -SearchInfo::SearchInfo(const std::string& pattern, GeoQuery geoQuery) - : pattern(pattern), - geoQuery(geoQuery) -{} - -SearchInfo::SearchInfo(const RequestContext& request) - : pattern(request.get_optional_param("pattern", "")), - geoQuery() +SearchInfo InternalServer::getSearchInfo(const RequestContext& request) const { + auto bookIds = selectBooks(request); + auto pattern = request.get_optional_param("pattern", ""); + GeoQuery geoQuery; + /* Retrive geo search */ try { auto latitude = request.get_argument("latitude"); @@ -208,12 +200,15 @@ SearchInfo::SearchInfo(const RequestContext& request) throw std::invalid_argument("No query provided."); } - try { - auto content_vector = request.get_arguments("content"); - bookNames = std::set(content_vector.begin(), content_vector.end()); - } catch (const std::out_of_range&) {} + return SearchInfo(pattern, geoQuery, bookIds); } +SearchInfo::SearchInfo(const std::string& pattern, GeoQuery geoQuery, const Library::BookIdSet& bookIds) + : pattern(pattern), + geoQuery(geoQuery), + bookIds(bookIds) +{} + zim::Query SearchInfo::getZimQuery(bool verbose) const { zim::Query query; if (verbose) { @@ -666,7 +661,8 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } try { - auto searchInfo = SearchInfo(request); + auto searchInfo = getSearchInfo(request); + auto bookIds = searchInfo.getBookIds(); /* Make the search */ // Try to get a search from the searchInfo, else build it @@ -674,21 +670,6 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re try { search = searchCache.getOrPut(searchInfo, [=](){ - Library::BookIdSet bookIds; - if(!searchInfo.bookNames.empty()) { - for(const auto& bookName: searchInfo.bookNames) { - try { - auto bookId = mp_nameMapper->getIdForName(bookName); - bookIds.insert(bookId); - } catch(const std::out_of_range&) { - throw std::invalid_argument("The requested book doesn't exist."); - } - } - } else { - for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { - bookIds.insert(bookId); - } - } auto searcher = mp_library->getSearcherByIds(bookIds); return make_shared(searcher->search(searchInfo.getZimQuery(m_verbose.load()))); } @@ -702,9 +683,9 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re "404-page-heading", cssUrl); response += nonParameterizedMessage("no-search-results"); - if(searchInfo.bookNames.size() == 1) { - auto bookName =*searchInfo.bookNames.begin(); - auto bookId = mp_nameMapper->getIdForName(bookName); + if(bookIds.size() == 1) { + auto bookId = *bookIds.begin(); + auto bookName = mp_nameMapper->getNameForId(bookId); response += TaskbarInfo(bookName, mp_library->getArchiveById(bookId).get()); } return response; @@ -736,9 +717,9 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re renderer.setSearchProtocolPrefix(m_root + "/search"); renderer.setPageLength(pageLength); auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); - if(searchInfo.bookNames.size() == 1) { - auto bookName = *searchInfo.bookNames.begin(); - auto bookId = mp_nameMapper->getIdForName(bookName); + if(bookIds.size() == 1) { + auto bookId = *bookIds.begin(); + auto bookName = mp_nameMapper->getNameForId(bookId); response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); } return std::move(response); diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 57f6975aa..6200e2564 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -68,22 +68,21 @@ struct GeoQuery { class SearchInfo { public: - SearchInfo(const std::string& pattern); - SearchInfo(const std::string& pattern, GeoQuery geoQuery); - SearchInfo(const RequestContext& request); + SearchInfo(const std::string& pattern, GeoQuery geoQuery, const Library::BookIdSet& bookIds); zim::Query getZimQuery(bool verbose) const; + const Library::BookIdSet& getBookIds() const { return bookIds; } friend bool operator<(const SearchInfo& l, const SearchInfo& r) { - return std::tie(l.bookNames, l.pattern, l.geoQuery) - < std::tie(r.bookNames, r.pattern, r.geoQuery); // keep the same order + return std::tie(l.bookIds, l.pattern, l.geoQuery) + < std::tie(r.bookIds, r.pattern, r.geoQuery); // keep the same order } public: //data std::string pattern; GeoQuery geoQuery; - std::set bookNames; + Library::BookIdSet bookIds; }; @@ -150,6 +149,7 @@ class InternalServer { bool etag_not_needed(const RequestContext& r) const; ETag get_matching_if_none_match_etag(const RequestContext& request) const; Library::BookIdSet selectBooks(const RequestContext& r) const; + SearchInfo getSearchInfo(const RequestContext& r) const; private: // data std::string m_addr; From 077ceac5a54b66fe3b3af270ea789de2c3b1a561 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 22 Mar 2022 16:38:47 +0100 Subject: [PATCH 14/29] Make the search_rendered handle multizim search. This introduce a intermediate mustache object to store information about the request made by the user. --- include/search_renderer.h | 6 +++--- src/search_renderer.cpp | 12 +++++++----- src/server/internalServer.cpp | 3 +-- test/server.cpp | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/search_renderer.h b/include/search_renderer.h index cd499d13d..10721be07 100644 --- a/include/search_renderer.h +++ b/include/search_renderer.h @@ -81,9 +81,9 @@ class SearchRenderer void setSearchPattern(const std::string& pattern); /** - * Set the search content id. + * Set the book names used to do the search. */ - void setSearchContent(const std::string& name); + void setSearchBookIds(const std::set& bookIds); /** * Set protocol prefix. @@ -112,7 +112,7 @@ class SearchRenderer zim::SearchResultSet m_srs; NameMapper* mp_nameMapper; Library* mp_library; - std::string searchContent; + std::set searchBookIds; std::string searchPattern; std::string protocolPrefix; std::string searchProtocolPrefix; diff --git a/src/search_renderer.cpp b/src/search_renderer.cpp index 8f9685563..2c4e29f0b 100644 --- a/src/search_renderer.cpp +++ b/src/search_renderer.cpp @@ -71,9 +71,9 @@ void SearchRenderer::setSearchPattern(const std::string& pattern) searchPattern = pattern; } -void SearchRenderer::setSearchContent(const std::string& content) +void SearchRenderer::setSearchBookIds(const std::set& bookIds) { - searchContent = content; + searchBookIds = bookIds; } void SearchRenderer::setProtocolPrefix(const std::string& prefix) @@ -90,13 +90,15 @@ kainjow::mustache::data buildQueryData ( const std::string& searchProtocolPrefix, const std::string& pattern, - const std::string& searchContent + const std::set& bookIds ) { kainjow::mustache::data query; query.set("pattern", kiwix::encodeDiples(pattern)); std::ostringstream ss; ss << searchProtocolPrefix << "?pattern=" << urlEncode(pattern, true); - ss << "&content=" << urlEncode(searchContent, true); + for (auto& bookId: bookIds) { + ss << "&books.id="< InternalServer::handle_search(const RequestContext& re SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start, search->getEstimatedMatches()); renderer.setSearchPattern(searchInfo.pattern); - //[TODO] - //renderer.setSearchContent(searchInfo.bookNames); + renderer.setSearchBookIds(bookIds); renderer.setProtocolPrefix(m_root + "/"); renderer.setSearchProtocolPrefix(m_root + "/search"); renderer.setPageLength(pageLength); diff --git a/test/server.cpp b/test/server.cpp index 4054a49ba..8b4e6b48b 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -1611,7 +1611,7 @@ TEST_F(TaskbarlessServerTest, searchResults) static std::string makeUrl(const std::string pattern, int start, size_t resultsPerPage) { - std::string url = "/ROOT/search?pattern=" + pattern + "&content=zimfile"; + std::string url = "/ROOT/search?pattern=" + pattern + "&books.id=6f1d19d0-633f-087b-fb55-7ac324ff9baf"; if ( start >= 0 ) { url += "&start=" + to_string(start); From c72132054dfaa6715d6eef087f24f0e3dd4c9ae1 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 21 Apr 2022 15:40:18 +0200 Subject: [PATCH 15/29] Move i18n helper functions --- src/server/internalServer.cpp | 95 +++++++++++++++++------------------ 1 file changed, 45 insertions(+), 50 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 3f31b1ce6..6082bdbee 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -129,6 +129,51 @@ std::vector subrange(const std::vector& v, size_t s, size_t n) return std::vector(v.begin()+std::min(v.size(), s), v.begin()+e); } +std::string renderUrl(const std::string& root, const std::string& urlTemplate) +{ + MustacheData data; + data.set("root", root); + auto url = kainjow::mustache::mustache(urlTemplate).render(data); + if ( url.back() == '\n' ) + url.pop_back(); + return url; +} + +std::string makeFulltextSearchSuggestion(const std::string& lang, const std::string& queryString) +{ + return i18n::expandParameterizedString(lang, "suggest-full-text-search", + { + {"SEARCH_TERMS", queryString} + } + ); +} + +ParameterizedMessage noSuchBookErrorMsg(const std::string& bookName) +{ + return ParameterizedMessage("no-such-book", { {"BOOK_NAME", bookName} }); +} + +ParameterizedMessage invalidRawAccessMsg(const std::string& dt) +{ + return ParameterizedMessage("invalid-raw-data-type", { {"DATATYPE", dt} }); +} + +ParameterizedMessage rawEntryNotFoundMsg(const std::string& dt, const std::string& entry) +{ + return ParameterizedMessage("raw-entry-not-found", + { + {"DATATYPE", dt}, + {"ENTRY", entry}, + } + ); +} + +ParameterizedMessage nonParameterizedMessage(const std::string& msgId) +{ + const ParameterizedMessage::Parameters noParams; + return ParameterizedMessage(msgId, noParams); +} + } // unnamed namespace Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) const @@ -509,56 +554,6 @@ SuggestionsList_t getSuggestions(SuggestionSearcherCache& cache, const zim::Arch return suggestions; } -namespace -{ - -std::string renderUrl(const std::string& root, const std::string& urlTemplate) -{ - MustacheData data; - data.set("root", root); - auto url = kainjow::mustache::mustache(urlTemplate).render(data); - if ( url.back() == '\n' ) - url.pop_back(); - return url; -} - -std::string makeFulltextSearchSuggestion(const std::string& lang, const std::string& queryString) -{ - return i18n::expandParameterizedString(lang, "suggest-full-text-search", - { - {"SEARCH_TERMS", queryString} - } - ); -} - -ParameterizedMessage noSuchBookErrorMsg(const std::string& bookName) -{ - return ParameterizedMessage("no-such-book", { {"BOOK_NAME", bookName} }); -} - -ParameterizedMessage invalidRawAccessMsg(const std::string& dt) -{ - return ParameterizedMessage("invalid-raw-data-type", { {"DATATYPE", dt} }); -} - -ParameterizedMessage rawEntryNotFoundMsg(const std::string& dt, const std::string& entry) -{ - return ParameterizedMessage("raw-entry-not-found", - { - {"DATATYPE", dt}, - {"ENTRY", entry}, - } - ); -} - -ParameterizedMessage nonParameterizedMessage(const std::string& msgId) -{ - const ParameterizedMessage::Parameters noParams; - return ParameterizedMessage(msgId, noParams); -} - -} // unnamed namespace - std::unique_ptr InternalServer::handle_suggest(const RequestContext& request) { if (m_verbose.load()) { From f0065fdd6fde0d8dbdefed4ac657baedfe41c314 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 21 Apr 2022 15:41:14 +0200 Subject: [PATCH 16/29] Introduce Error exception to do i18n --- src/server/internalServer.cpp | 35 +++++++++++++++++++++++++++-------- static/i18n/en.json | 7 +++++-- static/i18n/qqq.json | 6 +++++- test/server.cpp | 4 ++-- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 6082bdbee..b667d47b9 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -158,6 +158,11 @@ ParameterizedMessage invalidRawAccessMsg(const std::string& dt) return ParameterizedMessage("invalid-raw-data-type", { {"DATATYPE", dt} }); } +ParameterizedMessage noValueForArgMsg(const std::string& argument) +{ + return ParameterizedMessage("no-value-for-arg", { {"ARGUMENT", argument} }); +} + ParameterizedMessage rawEntryNotFoundMsg(const std::string& dt, const std::string& entry) { return ParameterizedMessage("raw-entry-not-found", @@ -174,6 +179,20 @@ ParameterizedMessage nonParameterizedMessage(const std::string& msgId) return ParameterizedMessage(msgId, noParams); } +struct Error : public std::runtime_error { + explicit Error(const ParameterizedMessage& message) + : std::runtime_error("Error while handling request"), + _message(message) + {} + + const ParameterizedMessage& message() const + { + return _message; + } + + const ParameterizedMessage _message; +}; + } // unnamed namespace Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) const @@ -184,7 +203,7 @@ Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) co try { return {mp_nameMapper->getIdForName(bookName)}; } catch (const std::out_of_range&) { - throw std::invalid_argument("The requested book doesn't exist."); + throw Error(noSuchBookErrorMsg(bookName)); } } catch(const std::out_of_range&) { // We've catch the out_of_range of get_argument @@ -195,7 +214,7 @@ Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) co try { auto id_vec = request.get_arguments("books.id"); if (id_vec.empty()) { - throw std::invalid_argument("You must provide a value for the id."); + throw Error(noValueForArgMsg("books.id")); } return Library::BookIdSet(id_vec.begin(), id_vec.end()); } catch(const std::out_of_range&) {} @@ -204,14 +223,14 @@ Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) co try { auto name_vec = request.get_arguments("books.name"); if (name_vec.empty()) { - throw std::invalid_argument("You must provide a value for the name."); + throw Error(noValueForArgMsg("books.name")); } Library::BookIdSet bookIds; for(const auto& bookName: name_vec) { try { bookIds.insert(mp_nameMapper->getIdForName(bookName)); } catch(const std::out_of_range&) { - throw std::invalid_argument("The requested book doesn't exist."); + throw Error(noSuchBookErrorMsg(bookName)); } } return bookIds; @@ -221,7 +240,7 @@ Library::BookIdSet InternalServer::selectBooks(const RequestContext& request) co Filter filter = get_search_filter(request, "books.filter."); auto id_vec = mp_library->filter(filter); if (id_vec.empty()) { - throw std::invalid_argument("No books found."); + throw Error(nonParameterizedMessage("no-book-found")); } return Library::BookIdSet(id_vec.begin(), id_vec.end()); } @@ -242,7 +261,7 @@ SearchInfo InternalServer::getSearchInfo(const RequestContext& request) const catch(const std::invalid_argument&) {} if (!geoQuery && pattern.empty()) { - throw std::invalid_argument("No query provided."); + throw Error(nonParameterizedMessage("no-query")); } return SearchInfo(pattern, geoQuery, bookIds); @@ -717,10 +736,10 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); } return std::move(response); - } catch (const std::invalid_argument& e) { + } catch (const Error& e) { return HTTP400HtmlResponse(*this, request) + invalidUrlMsg - + std::string(e.what()); + + e.message(); } } diff --git a/static/i18n/en.json b/static/i18n/en.json index 183f8b9f4..c67e190c4 100644 --- a/static/i18n/en.json +++ b/static/i18n/en.json @@ -4,12 +4,15 @@ ] }, "name":"English", - "suggest-full-text-search": "containing '{{{SEARCH_TERMS}}}'..." - , "no-such-book": "No such book: {{BOOK_NAME}}" + "suggest-full-text-search" : "containing '{{{SEARCH_TERMS}}}'..." + , "no-such-book" : "No such book: {{BOOK_NAME}}" + , "no-book-found" : "No book matches selection criteria" , "url-not-found" : "The requested URL \"{{url}}\" was not found on this server." , "suggest-search" : "Make a full text search for {{PATTERN}}" , "random-article-failure" : "Oops! Failed to pick a random article :(" , "invalid-raw-data-type" : "{{DATATYPE}} is not a valid request for raw content." + , "no-value-for-arg": "No value provided for argument {{ARGUMENT}}" + , "no-query" : "No query provided." , "raw-entry-not-found" : "Cannot find {{DATATYPE}} entry {{ENTRY}}" , "400-page-title" : "Invalid request" , "400-page-heading" : "Invalid request" diff --git a/static/i18n/qqq.json b/static/i18n/qqq.json index edebb31dc..6a3f10caa 100644 --- a/static/i18n/qqq.json +++ b/static/i18n/qqq.json @@ -2,16 +2,20 @@ "@metadata": { "authors": [ "Veloman Yunkan", - "Verdy p" + "Verdy p", + "Matthieu Gautier" ] }, "name": "{{Doc-important|Don't write \"English\" in your language!}}\n\n'''Write the name of ''your'' language in its native script.'''\n\nCurrent language to which the string is being translated to.\n\nFor example, write \"français\" when translating to French, or \"Deutsch\" when translating to German.\n\n'''Important:''' Do not use your language’s word for “English”. Use the word that your language uses to refer to itself. If you translate this message to mean “English” in your language, your change will be reverted.", "suggest-full-text-search": "Text appearing in the suggestion list that, when selected, runs a full text search instead of the title search", "no-such-book": "Error text when the requested book is not found in the library", "url-not-found": "Error text about wrong URL for an HTTP 404 error", + "no-book-found": "Error text when no book matches the selection criteria", "suggest-search": "Suggest a search when the URL points to a non existing article", "random-article-failure": "Failure of the random article selection procedure", "invalid-raw-data-type": "Invalid DATATYPE was used with the /raw endpoint (/raw//DATATYPE/...); allowed values are 'meta' and 'content'", + "no-value-for-arg" : "Error text when no value has been provided for ARGUMENT in the request's query string", + "no-query" : "Error text when no query has been provided for fulltext search", "raw-entry-not-found": "Entry requested via the /raw endpoint was not found", "400-page-title": "Title of the 400 error page", "400-page-heading": "Heading of the 400 error page", diff --git a/test/server.cpp b/test/server.cpp index 8b4e6b48b..128ed59ef 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -919,7 +919,7 @@ TEST_F(ServerTest, 400WithBodyTesting) The requested URL "/ROOT/search?content=non-existing-book&pattern=asdfqwerty" is not a valid request.

- The requested book doesn't exist. + No such book: non-existing-book

)" }, { /* url */ "/ROOT/search?content=non-existing-book&pattern=a\"