From f7b853373cd1f13c42d4978196bbd3ef6bd6fdfe Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 11 Dec 2021 20:54:16 +0400 Subject: [PATCH 1/5] Less confusing 404 errors from /random endpoint Before this fix the /random endpoint could return a 404 Not Found page saying The requested URL "/random" was not found on this server. Error cases producing such a result were: - `/random?content=NON-EXISTING-BOOK` (can happen when a server is restarted or the library is reloaded and the current book is no longer available). - Failure of the libkiwix routine for picking a random article. Now a proper message is shown for each of those cases. --- src/server/internalServer.cpp | 8 +++++--- src/server/response.cpp | 4 +++- static/templates/404.html | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 6ff949380..11333f30c 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -668,18 +668,20 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re bookId = mp_nameMapper->getIdForName(bookName); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { - return Response::build_404(*this, request, bookName, ""); + // error handled by the archive == nullptr check below } if (archive == nullptr) { - return Response::build_404(*this, request, bookName, ""); + const std::string error_details = "No such book: " + bookName; + return Response::build_404(*this, request, bookName, "", error_details); } try { auto entry = archive->getRandomEntry(); return build_redirect(bookName, getFinalItem(*archive, entry)); } catch(zim::EntryNotFound& e) { - return Response::build_404(*this, request, bookName, ""); + const std::string error_details = "Oops! Failed to pick a random article :("; + return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), error_details); } } diff --git a/src/server/response.cpp b/src/server/response.cpp index 69408f0de..9fefd5f88 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -86,7 +86,9 @@ std::unique_ptr Response::build_304(const InternalServer& server, cons std::unique_ptr Response::build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details) { MustacheData results; - results.set("url", request.get_full_url()); + if ( request.get_url() != "/random" ) { + results.set("url", request.get_full_url()); + } results.set("details", details); auto response = ContentResponse::build(server, RESOURCE::templates::_404_html, results, "text/html"); diff --git a/static/templates/404.html b/static/templates/404.html index 501c6ebb3..55645dbd7 100644 --- a/static/templates/404.html +++ b/static/templates/404.html @@ -6,9 +6,11 @@

Not Found

+ {{#url}}

The requested URL "{{url}}" was not found on this server.

+ {{/url}} {{#details}}

{{{details}}} From d8c525289be28f52c066f3c02cc2b121bd6221cc Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 11 Dec 2021 21:36:24 +0400 Subject: [PATCH 2/5] Changed the signature of Response::build_404() Now Response::build_404() takes the URL instead of the entire RequestContext object. An empty url suppresses the The requested URL "url" was not found on this server. part of the error text. --- src/server/internalServer.cpp | 28 ++++++++++++------------ src/server/internalServer_catalog_v2.cpp | 6 ++--- src/server/response.cpp | 6 ++--- src/server/response.h | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 11333f30c..b2f4ead56 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -271,7 +271,7 @@ std::unique_ptr InternalServer::handle_request(const RequestContext& r { try { if (! request.is_valid_url()) - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); const ETag etag = get_matching_if_none_match_etag(request); if ( etag ) @@ -394,11 +394,11 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ meta_name = request.get_argument("name"); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range& e) { - return Response::build_404(*this, request, bookName, ""); + // error handled by the archive == nullptr check below } if (archive == nullptr) { - return Response::build_404(*this, request, bookName, ""); + return Response::build_404(*this, request.get_full_url(), bookName, ""); } std::string content; @@ -425,7 +425,7 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ } else if (const unsigned illustrationSize = parseIllustration(meta_name)) { getArchiveFavicon(*archive, illustrationSize, content, mimeType); } else { - return Response::build_404(*this, request, bookName, ""); + return Response::build_404(*this, request.get_full_url(), bookName, ""); } auto response = ContentResponse::build(*this, content, mimeType); @@ -452,7 +452,7 @@ std::unique_ptr InternalServer::handle_suggest(const RequestContext& r queryString = request.get_argument("term"); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { - return Response::build_404(*this, request, bookName, ""); + return Response::build_404(*this, "", bookName, ""); } auto start = 0; @@ -470,7 +470,7 @@ std::unique_ptr InternalServer::handle_suggest(const RequestContext& r } if (archive == nullptr) { - return Response::build_404(*this, request, bookName, ""); + return Response::build_404(*this, "", bookName, ""); } if (m_verbose.load()) { @@ -532,7 +532,7 @@ std::unique_ptr InternalServer::handle_skin(const RequestContext& requ response->set_cacheable(); return std::move(response); } catch (const ResourceNotFound& e) { - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); } } @@ -673,7 +673,7 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re if (archive == nullptr) { const std::string error_details = "No such book: " + bookName; - return Response::build_404(*this, request, bookName, "", error_details); + return Response::build_404(*this, "", bookName, "", error_details); } try { @@ -681,7 +681,7 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re return build_redirect(bookName, getFinalItem(*archive, entry)); } catch(zim::EntryNotFound& e) { const std::string error_details = "Oops! Failed to pick a random article :("; - return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), error_details); + return Response::build_404(*this, "", bookName, getArchiveTitle(*archive), error_details); } } @@ -693,7 +693,7 @@ std::unique_ptr InternalServer::handle_captured_external(const Request } catch (const std::out_of_range& e) {} if (source.empty()) - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); auto data = get_default_data(); data.set("source", source); @@ -712,7 +712,7 @@ std::unique_ptr InternalServer::handle_catalog(const RequestContext& r host = request.get_header("Host"); url = request.get_url_part(1); } catch (const std::out_of_range&) { - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); } if (url == "v2") { @@ -720,7 +720,7 @@ std::unique_ptr InternalServer::handle_catalog(const RequestContext& r } if (url != "searchdescription.xml" && url != "root.xml" && url != "search") { - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); } if (url == "searchdescription.xml") { @@ -856,7 +856,7 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library. const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); - return Response::build_404(*this, request, bookName, "", details); + return Response::build_404(*this, request.get_full_url(), bookName, "", details); } auto urlStr = request.get_url().substr(bookName.size()+1); @@ -889,7 +889,7 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only. const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); - return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), details); + return Response::build_404(*this, request.get_full_url(), bookName, getArchiveTitle(*archive), details); } } diff --git a/src/server/internalServer_catalog_v2.cpp b/src/server/internalServer_catalog_v2.cpp index 7e49f76ea..475e16cb4 100644 --- a/src/server/internalServer_catalog_v2.cpp +++ b/src/server/internalServer_catalog_v2.cpp @@ -43,7 +43,7 @@ std::unique_ptr InternalServer::handle_catalog_v2(const RequestContext try { url = request.get_url_part(2); } catch (const std::out_of_range&) { - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); } if (url == "root.xml") { @@ -67,7 +67,7 @@ std::unique_ptr InternalServer::handle_catalog_v2(const RequestContext } else if (url == "languages") { return handle_catalog_v2_languages(request); } else { - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); } } @@ -108,7 +108,7 @@ std::unique_ptr InternalServer::handle_catalog_v2_complete_entry(const try { mp_library->getBookById(entryId); } catch (const std::out_of_range&) { - return Response::build_404(*this, request, "", ""); + return Response::build_404(*this, request.get_full_url(), "", ""); } OPDSDumper opdsDumper(mp_library); diff --git a/src/server/response.cpp b/src/server/response.cpp index 9fefd5f88..36f9339f8 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -83,11 +83,11 @@ std::unique_ptr Response::build_304(const InternalServer& server, cons return response; } -std::unique_ptr Response::build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details) +std::unique_ptr Response::build_404(const InternalServer& server, const std::string& url, const std::string& bookName, const std::string& bookTitle, const std::string& details) { MustacheData results; - if ( request.get_url() != "/random" ) { - results.set("url", request.get_full_url()); + if ( !url.empty() ) { + results.set("url", url); } results.set("details", details); diff --git a/src/server/response.h b/src/server/response.h index 49d4d11e6..2a8f6de4a 100644 --- a/src/server/response.h +++ b/src/server/response.h @@ -47,7 +47,7 @@ class Response { static std::unique_ptr build(const InternalServer& server); static std::unique_ptr build_304(const InternalServer& server, const ETag& etag); - static std::unique_ptr build_404(const InternalServer& server, const RequestContext& request, const std::string& bookName, const std::string& bookTitle, const std::string& details=""); + static std::unique_ptr build_404(const InternalServer& server, const std::string& url, const std::string& bookName, const std::string& bookTitle, const std::string& details=""); static std::unique_ptr build_416(const InternalServer& server, size_t resourceLength); static std::unique_ptr build_500(const InternalServer& server, const std::string& msg); static std::unique_ptr build_redirect(const InternalServer& server, const std::string& redirectUrl); From 20b5a2b9716946e8ebf9c6a6ecca23cbcbc7ac2a Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 11 Dec 2021 21:45:12 +0400 Subject: [PATCH 3/5] Less confusing 404 errors from /meta endpoint Before this fix the /meta endpoint could return a 404 Not Found page saying The requested URL "/meta" was not found on this server. Error cases producing such a result were: - `/meta?content=NON-EXISTING-BOOK&name=metaname` - `/meta?content=book&name=BAD-META-NAME` Now a proper message is shown for each of those cases. This fix is being done just for consistency (the /meta endpoint is not a user-facing one and the scripts don't bother about error texts). --- src/server/internalServer.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index b2f4ead56..257ef5312 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -386,23 +386,23 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ { std::string bookName; std::string bookId; - std::string meta_name; std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); - meta_name = request.get_argument("name"); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range& e) { // error handled by the archive == nullptr check below } if (archive == nullptr) { - return Response::build_404(*this, request.get_full_url(), bookName, ""); + const std::string error_details = "No such book: " + bookName; + return Response::build_404(*this, "", bookName, "", error_details); } std::string content; std::string mimeType = "text"; + const auto meta_name = request.get_optional_param("name", std::string()); if (meta_name == "title") { content = getArchiveTitle(*archive); @@ -425,7 +425,8 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ } else if (const unsigned illustrationSize = parseIllustration(meta_name)) { getArchiveFavicon(*archive, illustrationSize, content, mimeType); } else { - return Response::build_404(*this, request.get_full_url(), bookName, ""); + const std::string error_details = "No such metadata item: " + meta_name; + return Response::build_404(*this, "", bookName, "", error_details); } auto response = ContentResponse::build(*this, content, mimeType); From 872ddd9cb32a21013a3f05fe3be9fd954484493b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 11 Dec 2021 22:04:13 +0400 Subject: [PATCH 4/5] Cleaned up InternalServer::handle_suggest() As a result of this clean-up the /suggest endpoint too stopped generating confusing 404 Not Found errors (which, like in /meta's case is not that important). Another functional change is that the "term" parameter became optional. --- src/server/internalServer.cpp | 34 +++++++++++----------------------- test/server.cpp | 2 +- 2 files changed, 12 insertions(+), 24 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 257ef5312..a375940ad 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -440,38 +440,26 @@ std::unique_ptr InternalServer::handle_suggest(const RequestContext& r printf("** running handle_suggest\n"); } - std::string content; - std::string mimeType; - std::string bookName; - std::string bookId; - std::string queryString; std::shared_ptr archive; try { bookName = request.get_argument("content"); - bookId = mp_nameMapper->getIdForName(bookName); - queryString = request.get_argument("term"); + const std::string bookId = mp_nameMapper->getIdForName(bookName); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { - return Response::build_404(*this, "", bookName, ""); - } - - auto start = 0; - try { - start = request.get_argument("start"); - } catch (const std::exception&) {} - - unsigned int count = 10; - try { - count = request.get_argument("count"); - } catch (const std::exception&) {} - - if (count == 0) { - count = 10; + // error handled by the archive == nullptr check below } if (archive == nullptr) { - return Response::build_404(*this, "", bookName, ""); + const std::string error_details = "No such book: " + bookName; + return Response::build_404(*this, "", bookName, "", error_details); + } + + const auto queryString = request.get_optional_param("term", std::string()); + const auto start = request.get_optional_param("start", 0); + unsigned int count = request.get_optional_param("count", 10); + if (count == 0) { + count = 10; } if (m_verbose.load()) { diff --git a/test/server.cpp b/test/server.cpp index afffee224..49cfab5a1 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -179,6 +179,7 @@ const ResourceCollection resources200Compressible{ { NO_ETAG, "/search?content=zimfile&pattern=a" }, + { NO_ETAG, "/suggest?content=zimfile" }, { NO_ETAG, "/suggest?content=zimfile&term=ray" }, { NO_ETAG, "/catch/external?source=www.example.com" }, @@ -282,7 +283,6 @@ const char* urls404[] = { "/random?content=non-existent-book", "/search", "/suggest", - "/suggest?content=zimfile", "/suggest?content=non-existent-book&term=abcd", "/catch/external", "/zimfile/A/non-existent-article", From ed2f914e1046a14873c9d52acfd0b10f4d5be4dd Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 17 Dec 2021 20:45:45 +0400 Subject: [PATCH 5/5] Minor cleanup The code for obtaining the archive now looks the same for the /meta, /suggest, /search and /random endpoints. --- src/server/internalServer.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index a375940ad..d59de4d70 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -385,11 +385,10 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, std::unique_ptr InternalServer::handle_meta(const RequestContext& request) { std::string bookName; - std::string bookId; std::shared_ptr archive; try { bookName = request.get_argument("content"); - bookId = mp_nameMapper->getIdForName(bookName); + const std::string bookId = mp_nameMapper->getIdForName(bookName); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range& e) { // error handled by the archive == nullptr check below @@ -531,13 +530,6 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re printf("** running handle_search\n"); } - std::string bookName; - std::string bookId; - try { - bookName = request.get_argument("content"); - bookId = mp_nameMapper->getIdForName(bookName); - } catch (const std::out_of_range&) {} - std::string patternString; try { patternString = request.get_argument("pattern"); @@ -556,8 +548,11 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } catch(const std::out_of_range&) {} catch(const std::invalid_argument&) {} + std::string bookName; std::shared_ptr archive; try { + bookName = request.get_argument("content"); + const std::string bookId = mp_nameMapper->getIdForName(bookName); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) {} @@ -650,11 +645,10 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re } std::string bookName; - std::string bookId; std::shared_ptr archive; try { bookName = request.get_argument("content"); - bookId = mp_nameMapper->getIdForName(bookName); + const std::string bookId = mp_nameMapper->getIdForName(bookName); archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { // error handled by the archive == nullptr check below