diff --git a/src/meson.build b/src/meson.build index 7418fd28e..411f14f28 100644 --- a/src/meson.build +++ b/src/meson.build @@ -21,6 +21,7 @@ kiwix_sources = [ 'tools/otherTools.cpp', 'kiwixserve.cpp', 'name_mapper.cpp', + 'server/etag.cpp', 'server/request_context.cpp', 'server/response.cpp' ] diff --git a/src/server.cpp b/src/server.cpp index e940b7924..ddd057838 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -131,6 +131,7 @@ class InternalServer { Response get_default_response() const; std::shared_ptr get_reader(const std::string& bookName) const; + bool etag_not_needed(const RequestContext& r) const; private: // data std::string m_addr; @@ -145,6 +146,8 @@ class InternalServer { Library* mp_library; NameMapper* mp_nameMapper; + + std::string m_server_id; }; @@ -252,6 +255,8 @@ bool InternalServer::start() { << std::endl; return false; } + auto server_start_time = std::chrono::system_clock::now().time_since_epoch(); + m_server_id = kiwix::to_string(server_start_time.count()); return true; } @@ -319,6 +324,9 @@ int InternalServer::handlerCallback(struct MHD_Connection* connection, } } + if (response.getReturnCode() == MHD_HTTP_OK && !etag_not_needed(request)) + response.set_server_id(m_server_id); + auto ret = response.send(request, connection); auto end_time = std::chrono::steady_clock::now(); auto time_span = std::chrono::duration_cast>(end_time - start_time); @@ -428,6 +436,16 @@ MustacheData InternalServer::homepage_data() const return data; } +bool InternalServer::etag_not_needed(const RequestContext& request) const +{ + const std::string url = request.get_url(); + return kiwix::startsWith(url, "/catalog") + || url == "/search" + || url == "/suggest" + || url == "/random" + || url == "/catch/external"; +} + Response InternalServer::build_homepage(const RequestContext& request) { auto response = get_default_response(); @@ -486,7 +504,7 @@ Response InternalServer::handle_meta(const RequestContext& request) response.set_content(content); response.set_mimeType(mimeType); response.set_compress(false); - response.set_cache(true); + response.set_cacheable(); return response; } @@ -570,7 +588,7 @@ Response InternalServer::handle_skin(const RequestContext& request) } response.set_mimeType(getMimeTypeForFile(resourceName)); response.set_compress(true); - response.set_cache(true); + response.set_cacheable(); return response; } diff --git a/src/server/etag.cpp b/src/server/etag.cpp new file mode 100644 index 000000000..bf64348f3 --- /dev/null +++ b/src/server/etag.cpp @@ -0,0 +1,64 @@ +/* + * Copyright 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + + +#include "etag.h" + +#include + +namespace kiwix { + +namespace { + +// Characters in the options part of the ETag could in principle be picked up +// from the latin alphabet in natural order (the character corresponding to +// ETag::Option opt would be 'a'+opt; that would somewhat simplify the code in +// this file). However it is better to have some mnemonics in the option names, +// hence below variable: all_options[opt] corresponds to the character going +// into the ETag for ETag::Option opt. +const char all_options[] = "cz"; + +static_assert(ETag::OPTION_COUNT == sizeof(all_options) - 1, ""); + +} // namespace + + +void ETag::set_option(Option opt) +{ + if ( ! get_option(opt) ) + { + m_options.push_back(all_options[opt]); + std::sort(m_options.begin(), m_options.end()); + } +} + +bool ETag::get_option(Option opt) const +{ + return m_options.find(all_options[opt]) != std::string::npos; +} + +std::string ETag::get_etag() const +{ + if ( m_serverId.empty() ) + return std::string(); + + return "\"" + m_serverId + "/" + m_options + "\""; +} + +} // namespace kiwix diff --git a/src/server/etag.h b/src/server/etag.h new file mode 100644 index 000000000..ec3b0077e --- /dev/null +++ b/src/server/etag.h @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + + +#ifndef KIWIXLIB_SERVER_ETAG_H +#define KIWIXLIB_SERVER_ETAG_H + +#include + +namespace kiwix { + +// The ETag string used by Kiwix server (more precisely, its value inside the +// double quotes) consists of two parts: +// +// 1. ServerId - The string obtained on server start up +// +// 2. Options - Zero or more characters encoding the values of some of the +// headers of the response +// +// The two parts are separated with a slash (/) symbol (which is always present, +// even when the the options part is empty). Neither portion of a Kiwix ETag +// may contain the slash symbol. +// Examples of valid Kiwix server ETags (including the double quotes): +// +// "abcdefghijklmn/" +// "1234567890/z" +// "1234567890/cz" +// +// The options part of the Kiwix ETag allows to correctly set the required +// headers when responding to a conditional If-None-Match request with a 304 +// (Not Modified) response without following the full code path that would +// discover the necessary options. + +class ETag +{ + public: // types + enum Option { + CACHEABLE_ENTITY, + COMPRESSED_CONTENT, + OPTION_COUNT + }; + + public: // functions + ETag() {} + + void set_server_id(const std::string& id) { m_serverId = id; } + void set_option(Option opt); + + explicit operator bool() const { return !m_serverId.empty(); } + + bool get_option(Option opt) const; + std::string get_etag() const; + + private: // data + std::string m_serverId; + std::string m_options; +}; + +} // namespace kiwix + +#endif // KIWIXLIB_SERVER_ETAG_H diff --git a/src/server/response.cpp b/src/server/response.cpp index 7814f5d75..7992dece7 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -55,7 +55,6 @@ Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool m_withTaskbar(withTaskbar), m_withLibraryButton(withLibraryButton), m_blockExternalLinks(blockExternalLinks), - m_useCache(false), m_addTaskbar(false), m_bookName(""), m_startRange(0), @@ -168,6 +167,14 @@ void Response::inject_externallinks_blocker() script_tag); } +bool +Response::can_compress(const RequestContext& request) const +{ + return request.can_compress() + && is_compressible_mime_type(m_mimeType) + && (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE); +} + MHD_Response* Response::create_raw_content_mhd_response(const RequestContext& request) { @@ -178,10 +185,7 @@ Response::create_raw_content_mhd_response(const RequestContext& request) inject_externallinks_blocker(); } - bool shouldCompress = m_compress && request.can_compress(); - shouldCompress &= is_compressible_mime_type(m_mimeType); - shouldCompress &= (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE); - + bool shouldCompress = m_compress && can_compress(request); if (shouldCompress) { std::vector compr_buffer(compressBound(m_content.size())); uLongf comprLen = compr_buffer.capacity(); @@ -196,6 +200,7 @@ Response::create_raw_content_mhd_response(const RequestContext& request) It has no incidence on other browsers See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */ m_content = string((char*)&compr_buffer[2], comprLen - 2); + m_etag.set_option(ETag::COMPRESSED_CONTENT); } else { shouldCompress = false; } @@ -204,9 +209,11 @@ Response::create_raw_content_mhd_response(const RequestContext& request) MHD_Response* response = MHD_create_response_from_buffer( m_content.size(), const_cast(m_content.data()), MHD_RESPMEM_MUST_COPY); - if (shouldCompress) { + if ( m_etag.get_option(ETag::COMPRESSED_CONTENT) ) { MHD_add_response_header( response, MHD_HTTP_HEADER_VARY, "Accept-Encoding"); + } + if (shouldCompress) { MHD_add_response_header( response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate"); } @@ -267,7 +274,10 @@ int Response::send(const RequestContext& request, MHD_Connection* connection) MHD_add_response_header(response, "Access-Control-Allow-Origin", "*"); MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, - m_useCache ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate"); + m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate"); + const std::string etag = m_etag.get_etag(); + if ( ! etag.empty() ) + MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str()); if (m_returnCode == MHD_HTTP_OK && request.has_range()) m_returnCode = MHD_HTTP_PARTIAL_CONTENT; @@ -301,7 +311,7 @@ void Response::set_entry(const Entry& entry, const RequestContext& request) { const std::string mimeType = get_mime_type(entry); set_mimeType(mimeType); - set_cache(true); + set_cacheable(); if ( is_compressible_mime_type(mimeType) ) { zim::Blob raw_content = entry.getBlob(); diff --git a/src/server/response.h b/src/server/response.h index 71364c638..bd15f9543 100644 --- a/src/server/response.h +++ b/src/server/response.h @@ -25,6 +25,7 @@ #include #include "entry.h" +#include "etag.h" extern "C" { #include @@ -55,18 +56,22 @@ class Response { void set_mimeType(const std::string& mimeType) { m_mimeType = mimeType; } void set_code(int code) { m_returnCode = code; } - void set_cache(bool cache) { m_useCache = cache; } + void set_cacheable() { m_etag.set_option(ETag::CACHEABLE_ENTITY); } + void set_server_id(const std::string& id) { m_etag.set_server_id(id); } + void set_etag(const ETag& etag) { m_etag = etag; } void set_compress(bool compress) { m_compress = compress; } void set_taskbar(const std::string& bookName, const std::string& bookTitle); void set_range_first(uint64_t start) { m_startRange = start; } void set_range_len(uint64_t len) { m_lenRange = len; } - int getReturnCode() { return m_returnCode; } + int getReturnCode() const { return m_returnCode; } std::string get_mimeType() const { return m_mimeType; } void introduce_taskbar(); void inject_externallinks_blocker(); + bool can_compress(const RequestContext& request) const; + private: // functions MHD_Response* create_mhd_response(const RequestContext& request); MHD_Response* create_raw_content_mhd_response(const RequestContext& request); @@ -84,13 +89,13 @@ class Response { bool m_withTaskbar; bool m_withLibraryButton; bool m_blockExternalLinks; - bool m_useCache; bool m_compress; bool m_addTaskbar; std::string m_bookName; std::string m_bookTitle; uint64_t m_startRange; uint64_t m_lenRange; + ETag m_etag; }; } diff --git a/test/server.cpp b/test/server.cpp index 26a9b603f..cc157992e 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -6,6 +6,13 @@ #include "./httplib.h" +bool is_valid_etag(const std::string& etag) +{ + return etag.size() >= 2 && + etag.front() == '"' && + etag.back() == '"'; +} + template T1 concat(T1 a, const T2& b) { @@ -92,8 +99,12 @@ protected: } }; +const bool WITH_ETAG = true; +const bool NO_ETAG = false; + struct Resource { + bool etag_expected; const char* url; }; @@ -106,59 +117,46 @@ std::ostream& operator<<(std::ostream& out, const Resource& r) typedef std::vector ResourceCollection; const ResourceCollection resources200Compressible{ - { "/" }, + { WITH_ETAG, "/" }, - { "/skin/jquery-ui/jquery-ui.structure.min.css" }, - { "/skin/jquery-ui/jquery-ui.min.js" }, - { "/skin/jquery-ui/external/jquery/jquery.js" }, - { "/skin/jquery-ui/jquery-ui.theme.min.css" }, - { "/skin/jquery-ui/jquery-ui.min.css" }, - { "/skin/taskbar.js" }, - { "/skin/taskbar.css" }, - { "/skin/block_external.js" }, + { WITH_ETAG, "/skin/jquery-ui/jquery-ui.structure.min.css" }, + { WITH_ETAG, "/skin/jquery-ui/jquery-ui.min.js" }, + { WITH_ETAG, "/skin/jquery-ui/external/jquery/jquery.js" }, + { WITH_ETAG, "/skin/jquery-ui/jquery-ui.theme.min.css" }, + { WITH_ETAG, "/skin/jquery-ui/jquery-ui.min.css" }, + { WITH_ETAG, "/skin/taskbar.js" }, + { WITH_ETAG, "/skin/taskbar.css" }, + { WITH_ETAG, "/skin/block_external.js" }, - { "/search?content=zimfile&pattern=abcd" }, + { NO_ETAG, "/search?content=zimfile&pattern=abcd" }, - { "/suggest?content=zimfile&term=ray" }, + { NO_ETAG, "/suggest?content=zimfile&term=ray" }, - { "/catch/external?source=www.example.com" }, + { NO_ETAG, "/catch/external?source=www.example.com" }, - { "/zimfile/A/index" }, - { "/zimfile/A/Ray_Charles" }, + { WITH_ETAG, "/zimfile/A/index" }, + { WITH_ETAG, "/zimfile/A/Ray_Charles" }, }; const ResourceCollection resources200Uncompressible{ - { "/skin/jquery-ui/images/ui-bg_flat_0_aaaaaa_40x100.png" }, - { "/skin/jquery-ui/images/ui-bg_flat_75_ffffff_40x100.png" }, - { "/skin/jquery-ui/images/ui-icons_222222_256x240.png" }, - { "/skin/jquery-ui/images/ui-bg_glass_55_fbf9ee_1x400.png" }, - { "/skin/jquery-ui/images/ui-bg_highlight-soft_75_cccccc_1x100.png" }, - { "/skin/jquery-ui/images/ui-bg_glass_65_ffffff_1x400.png" }, - { "/skin/jquery-ui/images/ui-icons_2e83ff_256x240.png" }, - { "/skin/jquery-ui/images/ui-icons_cd0a0a_256x240.png" }, - { "/skin/jquery-ui/images/ui-icons_888888_256x240.png" }, - { "/skin/jquery-ui/images/ui-bg_glass_75_e6e6e6_1x400.png" }, - { "/skin/jquery-ui/images/animated-overlay.gif" }, - { "/skin/jquery-ui/images/ui-bg_glass_75_dadada_1x400.png" }, - { "/skin/jquery-ui/images/ui-icons_454545_256x240.png" }, - { "/skin/jquery-ui/images/ui-bg_glass_95_fef1ec_1x400.png" }, - { "/skin/caret.png" }, + { WITH_ETAG, "/skin/jquery-ui/images/animated-overlay.gif" }, + { WITH_ETAG, "/skin/caret.png" }, - { "/catalog/root.xml" }, - { "/catalog/searchdescription.xml" }, - { "/catalog/search" }, + { NO_ETAG, "/catalog/root.xml" }, + { NO_ETAG, "/catalog/searchdescription.xml" }, + { NO_ETAG, "/catalog/search" }, - { "/meta?content=zimfile&name=title" }, - { "/meta?content=zimfile&name=description" }, - { "/meta?content=zimfile&name=language" }, - { "/meta?content=zimfile&name=name" }, - { "/meta?content=zimfile&name=tags" }, - { "/meta?content=zimfile&name=date" }, - { "/meta?content=zimfile&name=creator" }, - { "/meta?content=zimfile&name=publisher" }, - { "/meta?content=zimfile&name=favicon" }, + { WITH_ETAG, "/meta?content=zimfile&name=title" }, + { WITH_ETAG, "/meta?content=zimfile&name=description" }, + { WITH_ETAG, "/meta?content=zimfile&name=language" }, + { WITH_ETAG, "/meta?content=zimfile&name=name" }, + { WITH_ETAG, "/meta?content=zimfile&name=tags" }, + { WITH_ETAG, "/meta?content=zimfile&name=date" }, + { WITH_ETAG, "/meta?content=zimfile&name=creator" }, + { WITH_ETAG, "/meta?content=zimfile&name=publisher" }, + { WITH_ETAG, "/meta?content=zimfile&name=favicon" }, - { "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg" }, + { WITH_ETAG, "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg" }, }; ResourceCollection all200Resources() @@ -260,3 +258,68 @@ TEST_F(ServerTest, HeadersAreTheSameInResponsesToHeadAndGetRequests) EXPECT_EQ(invariantHeaders(g), invariantHeaders(h)) << res; } } + +TEST_F(ServerTest, ETagHeaderIsSetAsNeeded) +{ + for ( const Resource& res : all200Resources() ) { + const auto responseToGet = zfs1_->GET(res.url); + EXPECT_EQ(res.etag_expected, responseToGet->has_header("ETag")) << res; + if ( res.etag_expected ) + EXPECT_TRUE(is_valid_etag(responseToGet->get_header_value("ETag"))); + } +} + +TEST_F(ServerTest, ETagIsTheSameInResponsesToDifferentRequestsOfTheSameURL) +{ + for ( const Resource& res : all200Resources() ) { + const auto h1 = zfs1_->HEAD(res.url); + const auto h2 = zfs1_->HEAD(res.url); + EXPECT_EQ(h1->get_header_value("ETag"), h2->get_header_value("ETag")); + } +} + +TEST_F(ServerTest, ETagIsTheSameAcrossHeadAndGet) +{ + for ( const Resource& res : all200Resources() ) { + const auto g = zfs1_->GET(res.url); + const auto h = zfs1_->HEAD(res.url); + EXPECT_EQ(h->get_header_value("ETag"), g->get_header_value("ETag")); + } +} + +TEST_F(ServerTest, DifferentServerInstancesProduceDifferentETags) +{ + ZimFileServer zfs2(PORT + 1, ZIMFILE); + for ( const Resource& res : all200Resources() ) { + if ( !res.etag_expected ) continue; + const auto h1 = zfs1_->HEAD(res.url); + const auto h2 = zfs2.HEAD(res.url); + EXPECT_NE(h1->get_header_value("ETag"), h2->get_header_value("ETag")); + } +} + +TEST_F(ServerTest, CompressionInfluencesETag) +{ + for ( const Resource& res : resources200Compressible ) { + if ( ! res.etag_expected ) continue; + const auto g1 = zfs1_->GET(res.url); + const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } ); + const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } ); + const auto etag = g1->get_header_value("ETag"); + EXPECT_EQ(etag, g2->get_header_value("ETag")); + EXPECT_NE(etag, g3->get_header_value("ETag")); + } +} + +TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding) +{ + for ( const Resource& res : resources200Uncompressible ) { + if ( ! res.etag_expected ) continue; + const auto g1 = zfs1_->GET(res.url); + const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } ); + const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } ); + const auto etag = g1->get_header_value("ETag"); + EXPECT_EQ(etag, g2->get_header_value("ETag")) << res; + EXPECT_EQ(etag, g3->get_header_value("ETag")) << res; + } +}