diff --git a/src/book.cpp b/src/book.cpp index 22b9e6931..cf340c04a 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -66,7 +66,7 @@ bool Book::update(const kiwix::Book& other) void Book::update(const zim::Archive& archive) { m_path = archive.getFilename(); m_pathValid = true; - m_id = getArchiveId(archive); + m_id = std::string(archive.getUuid()); m_title = getArchiveTitle(archive); m_description = getMetaDescription(archive); m_language = getMetaLanguage(archive); diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index dd65d7e17..378f516fd 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -77,7 +77,6 @@ extern "C" { #include "request_context.h" #include "response.h" -#define MAX_SEARCH_LEN 140 #define DEFAULT_CACHE_SIZE 2 namespace kiwix { @@ -212,6 +211,16 @@ void checkBookNumber(const Library::BookIdSet& bookIds, size_t limit) { } } +typedef std::set Languages; + +Languages getLanguages(const Library& lib, const Library::BookIdSet& bookIds) { + Languages langs; + for ( const auto& b : bookIds ) { + langs.insert(lib.getBookById(b).getLanguage()); + } + return langs; +} + struct CustomizedResourceData { std::string mimeType; @@ -307,6 +316,10 @@ SearchInfo InternalServer::getSearchInfo(const RequestContext& request) const { auto bookIds = selectBooks(request); checkBookNumber(bookIds.second, m_multizimSearchLimit); + if ( getLanguages(*mp_library, bookIds.second).size() != 1 ) { + throw Error(nonParameterizedMessage("confusion-of-tongues")); + } + auto pattern = request.get_optional_param("pattern", ""); GeoQuery geoQuery; @@ -813,86 +826,93 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } try { - auto searchInfo = getSearchInfo(request); - auto bookIds = searchInfo.getBookIds(); + return handle_search_request(request); + } catch (const Error& e) { + return HTTP400Response(*this, request) + + invalidUrlMsg + + e.message(); + } +} - /* Make the search */ - // Try to get a search from the searchInfo, else build it - auto searcher = mp_library->getSearcherByIds(bookIds); - auto lock(searcher->getLock()); +namespace +{ - std::shared_ptr search; - try { - search = searchCache.getOrPut(searchInfo, - [=](){ - return make_shared(searcher->search(searchInfo.getZimQuery(m_verbose.load()))); - } - ); - } catch(std::runtime_error& e) { - // Searcher->search will throw a runtime error if there is no valid xapian database to do the search. - // (in case of zim file not containing a index) - const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); - HTTPErrorResponse response(*this, request, MHD_HTTP_NOT_FOUND, - "fulltext-search-unavailable", - "404-page-heading", - cssUrl); - response += nonParameterizedMessage("no-search-results"); - // XXX: Now this has to be handled by the iframe-based viewer which - // XXX: has to resolve if the book selection resulted in a single book. - /* - if(bookIds.size() == 1) { - auto bookId = *bookIds.begin(); - auto bookName = mp_nameMapper->getNameForId(bookId); - response += TaskbarInfo(bookName, mp_library->getArchiveById(bookId).get()); +unsigned getSearchPageSize(const RequestContext& r) +{ + const auto DEFAULT_PAGE_LEN = 25u; + const auto MAX_PAGE_LEN = 140u; + + const auto pageLength = r.get_optional_param("pageLength", DEFAULT_PAGE_LEN); + return pageLength == 0 + ? DEFAULT_PAGE_LEN + : min(MAX_PAGE_LEN, pageLength); +} + +} // unnamed namespace + +std::unique_ptr InternalServer::handle_search_request(const RequestContext& request) +{ + auto searchInfo = getSearchInfo(request); + auto bookIds = searchInfo.getBookIds(); + + /* Make the search */ + // Try to get a search from the searchInfo, else build it + auto searcher = mp_library->getSearcherByIds(bookIds); + auto lock(searcher->getLock()); + + std::shared_ptr search; + try { + search = searchCache.getOrPut(searchInfo, + [=](){ + return make_shared(searcher->search(searchInfo.getZimQuery(m_verbose.load()))); } - */ - return response; - } - - auto start = 1; - try { - start = request.get_argument("start"); - } catch (const std::exception&) {} - start = max(1, start); - - auto pageLength = 25; - try { - pageLength = request.get_argument("pageLength"); - } catch (const std::exception&) {} - if (pageLength > MAX_SEARCH_LEN) { - pageLength = MAX_SEARCH_LEN; - } - if (pageLength == 0) { - pageLength = 25; - } - - /* Get the results */ - SearchRenderer renderer(search->getResults(start-1, pageLength), mp_nameMapper, mp_library, start, - search->getEstimatedMatches()); - renderer.setSearchPattern(searchInfo.pattern); - renderer.setSearchBookQuery(searchInfo.bookFilterQuery); - renderer.setProtocolPrefix(m_root + "/content/"); - renderer.setSearchProtocolPrefix(m_root + "/search"); - renderer.setPageLength(pageLength); - if (request.get_requested_format() == "xml") { - return ContentResponse::build(*this, renderer.getXml(), "application/rss+xml; charset=utf-8"); - } - auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); + ); + } catch(std::runtime_error& e) { + // Searcher->search will throw a runtime error if there is no valid xapian database to do the search. + // (in case of zim file not containing a index) + const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); + HTTPErrorResponse response(*this, request, MHD_HTTP_NOT_FOUND, + "fulltext-search-unavailable", + "404-page-heading", + cssUrl); + response += nonParameterizedMessage("no-search-results"); // XXX: Now this has to be handled by the iframe-based viewer which // XXX: has to resolve if the book selection resulted in a single book. /* if(bookIds.size() == 1) { auto bookId = *bookIds.begin(); auto bookName = mp_nameMapper->getNameForId(bookId); - response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); + response += TaskbarInfo(bookName, mp_library->getArchiveById(bookId).get()); } */ - return std::move(response); - } catch (const Error& e) { - return HTTP400Response(*this, request) - + invalidUrlMsg - + e.message(); + return response; } + + const auto start = max(1u, request.get_optional_param("start", 1u)); + const auto pageLength = getSearchPageSize(request); + + /* Get the results */ + SearchRenderer renderer(search->getResults(start-1, pageLength), mp_nameMapper, mp_library, start, + search->getEstimatedMatches()); + renderer.setSearchPattern(searchInfo.pattern); + renderer.setSearchBookQuery(searchInfo.bookFilterQuery); + renderer.setProtocolPrefix(m_root + "/content/"); + renderer.setSearchProtocolPrefix(m_root + "/search"); + renderer.setPageLength(pageLength); + if (request.get_requested_format() == "xml") { + return ContentResponse::build(*this, renderer.getXml(), "application/rss+xml; charset=utf-8"); + } + auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); + // XXX: Now this has to be handled by the iframe-based viewer which + // XXX: has to resolve if the book selection resulted in a single book. + /* + if(bookIds.size() == 1) { + auto bookId = *bookIds.begin(); + auto bookName = mp_nameMapper->getNameForId(bookId); + response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); + } + */ + return std::move(response); } std::unique_ptr InternalServer::handle_random(const RequestContext& request) diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 02360443a..c3990c44d 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -134,6 +134,7 @@ class InternalServer { std::unique_ptr handle_catalog_v2_languages(const RequestContext& request); std::unique_ptr handle_catalog_v2_illustration(const RequestContext& request); std::unique_ptr handle_search(const RequestContext& request); + std::unique_ptr handle_search_request(const RequestContext& request); std::unique_ptr handle_suggest(const RequestContext& request); std::unique_ptr handle_random(const RequestContext& request); std::unique_ptr handle_catch(const RequestContext& request); diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 0b9a1a539..5e191afa7 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -107,6 +107,14 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind, { RequestContext *_this = static_cast(__this); _this->arguments[key].push_back(value == nullptr ? "" : value); + if ( ! _this->queryString.empty() ) { + _this->queryString += "&"; + } + _this->queryString += key; + if ( value ) { + _this->queryString += "="; + _this->queryString += value; + } return MHD_YES; } diff --git a/src/server/request_context.h b/src/server/request_context.h index f63f89810..de02d465f 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -92,9 +92,7 @@ class RequestContext { std::string get_url_part(int part) const; std::string get_full_url() const; - std::string get_query(bool mustEncode = false) const { - return get_query([](const std::string& key) {return true;}, mustEncode); - } + std::string get_query() const { return queryString; } template std::string get_query(F filter, bool mustEncode) const { @@ -132,6 +130,7 @@ class RequestContext { ByteRange byteRange_; std::map headers; std::map> arguments; + std::string queryString; private: // functions static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*); diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 5971ec495..6fceeb97a 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -93,10 +93,6 @@ std::string getMetaFlavour(const zim::Archive& archive) { return getMetadata(archive, "Flavour"); } -std::string getArchiveId(const zim::Archive& archive) { - return (std::string) archive.getUuid(); -} - bool getArchiveFavicon(const zim::Archive& archive, unsigned size, std::string& content, std::string& mimeType){ try { diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 0e2108045..5b159df34 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -40,7 +40,6 @@ namespace kiwix std::string getMetaCreator(const zim::Archive& archive); std::string getMetaPublisher(const zim::Archive& archive); std::string getMetaFlavour(const zim::Archive& archive); - std::string getArchiveId(const zim::Archive& archive); bool getArchiveFavicon(const zim::Archive& archive, unsigned size, std::string& content, std::string& mimeType); diff --git a/static/i18n/en.json b/static/i18n/en.json index 89c83c915..bf8441fc1 100644 --- a/static/i18n/en.json +++ b/static/i18n/en.json @@ -27,4 +27,5 @@ , "home-button-text": "Go to the main page of '{{BOOK_TITLE}}'" , "random-page-button-text": "Go to a randomly selected page" , "searchbox-tooltip": "Search '{{BOOK_TITLE}}'" + , "confusion-of-tongues": "Two or more books in different languages would participate in search, which may lead to confusing results." } diff --git a/test/data/lib_for_server_search_test.xml b/test/data/lib_for_server_search_test.xml new file mode 100644 index 000000000..994c6a6fa --- /dev/null +++ b/test/data/lib_for_server_search_test.xml @@ -0,0 +1,4 @@ + + + + diff --git a/test/library_server.cpp b/test/library_server.cpp index fda5916b4..4afcbc10b 100644 --- a/test/library_server.cpp +++ b/test/library_server.cpp @@ -359,7 +359,7 @@ TEST_F(LibraryServerTest, catalog_search_results_pagination) EXPECT_EQ(maskVariableOPDSFeedData(r->body), OPDS_FEED_TAG " 12345678-90ab-cdef-1234-567890abcdef\n" - " Filtered zims (count=1&start=1)\n" + " Filtered zims (start=1&count=1)\n" " YYYY-MM-DDThh:mm:ssZ\n" " 3\n" " 1\n" @@ -375,7 +375,7 @@ TEST_F(LibraryServerTest, catalog_search_results_pagination) EXPECT_EQ(maskVariableOPDSFeedData(r->body), OPDS_FEED_TAG " 12345678-90ab-cdef-1234-567890abcdef\n" - " Filtered zims (count=10&start=100)\n" + " Filtered zims (start=100&count=10)\n" " YYYY-MM-DDThh:mm:ssZ\n" " 3\n" " 100\n" @@ -638,8 +638,8 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_range) const auto r = zfs1_->GET("/ROOT/catalog/v2/entries?start=1&count=1"); EXPECT_EQ(r->status, 200); EXPECT_EQ(maskVariableOPDSFeedData(r->body), - CATALOG_V2_ENTRIES_PREAMBLE("?count=1&start=1") - " Filtered Entries (count=1&start=1)\n" + CATALOG_V2_ENTRIES_PREAMBLE("?start=1&count=1") + " Filtered Entries (start=1&count=1)\n" " YYYY-MM-DDThh:mm:ssZ\n" " 3\n" " 1\n" diff --git a/test/meson.build b/test/meson.build index b91f8cd0f..90b7ce892 100644 --- a/test/meson.build +++ b/test/meson.build @@ -37,6 +37,7 @@ if gtest_dep.found() and not meson.is_cross_build() 'corner_cases.zim', 'poor.zim', 'library.xml', + 'lib_for_server_search_test.xml', 'customized_resources.txt', 'helloworld.txt', 'welcome.html', diff --git a/test/server.cpp b/test/server.cpp index c774d0f1a..76541d0b3 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -839,7 +839,7 @@ TEST_F(ServerTest, Http400HtmlError) expected_body==R"(

Invalid request

- The requested URL "/ROOT/search?books.filter.lang=eng&pattern=" is not a valid request. + The requested URL "/ROOT/search?books.filter.lang=eng&pattern" is not a valid request.

No query provided. @@ -896,21 +896,21 @@ TEST_F(ServerTest, HttpXmlError) /* HTTP status code */ 400, /* expected response XML */ R"( Invalid request -The requested URL "/ROOT/search?content=zimfile&format=xml" is not a valid request. +The requested URL "/ROOT/search?format=xml&content=zimfile" is not a valid request. No query provided. )" }, { /* url */ "/ROOT/search?format=xml&content=non-existing-book&pattern=asdfqwerty", /* HTTP status code */ 400, /* expected response XML */ R"( Invalid request -The requested URL "/ROOT/search?content=non-existing-book&format=xml&pattern=asdfqwerty" is not a valid request. +The requested URL "/ROOT/search?format=xml&content=non-existing-book&pattern=asdfqwerty" is not a valid request. No such book: non-existing-book )" }, { /* url */ "/ROOT/search?format=xml&content=non-existing-book&pattern=a\"