ETags are set in the response as needed

Also added server-unit tests related to ETags in the response.
This commit is contained in:
Veloman Yunkan 2020-05-14 16:35:35 +04:00
parent 3d08ef43f2
commit 95a5cde359
7 changed files with 293 additions and 55 deletions

View File

@ -21,6 +21,7 @@ kiwix_sources = [
'tools/otherTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/etag.cpp',
'server/request_context.cpp',
'server/response.cpp'
]

View File

@ -131,6 +131,7 @@ class InternalServer {
Response get_default_response() const;
std::shared_ptr<Reader> get_reader(const std::string& bookName) const;
bool etag_not_needed(const RequestContext& r) const;
private: // data
std::string m_addr;
@ -145,6 +146,8 @@ class InternalServer {
Library* mp_library;
NameMapper* mp_nameMapper;
std::string m_server_id;
};
@ -252,6 +255,8 @@ bool InternalServer::start() {
<< std::endl;
return false;
}
auto server_start_time = std::chrono::system_clock::now().time_since_epoch();
m_server_id = kiwix::to_string(server_start_time.count());
return true;
}
@ -319,6 +324,9 @@ int InternalServer::handlerCallback(struct MHD_Connection* connection,
}
}
if (response.getReturnCode() == MHD_HTTP_OK && !etag_not_needed(request))
response.set_server_id(m_server_id);
auto ret = response.send(request, connection);
auto end_time = std::chrono::steady_clock::now();
auto time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
@ -428,6 +436,16 @@ MustacheData InternalServer::homepage_data() const
return data;
}
bool InternalServer::etag_not_needed(const RequestContext& request) const
{
const std::string url = request.get_url();
return kiwix::startsWith(url, "/catalog")
|| url == "/search"
|| url == "/suggest"
|| url == "/random"
|| url == "/catch/external";
}
Response InternalServer::build_homepage(const RequestContext& request)
{
auto response = get_default_response();
@ -486,7 +504,7 @@ Response InternalServer::handle_meta(const RequestContext& request)
response.set_content(content);
response.set_mimeType(mimeType);
response.set_compress(false);
response.set_cache(true);
response.set_cacheable();
return response;
}
@ -570,7 +588,7 @@ Response InternalServer::handle_skin(const RequestContext& request)
}
response.set_mimeType(getMimeTypeForFile(resourceName));
response.set_compress(true);
response.set_cache(true);
response.set_cacheable();
return response;
}

64
src/server/etag.cpp Normal file
View File

@ -0,0 +1,64 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "etag.h"
#include <algorithm>
namespace kiwix {
namespace {
// Characters in the options part of the ETag could in principle be picked up
// from the latin alphabet in natural order (the character corresponding to
// ETag::Option opt would be 'a'+opt; that would somewhat simplify the code in
// this file). However it is better to have some mnemonics in the option names,
// hence below variable: all_options[opt] corresponds to the character going
// into the ETag for ETag::Option opt.
const char all_options[] = "cz";
static_assert(ETag::OPTION_COUNT == sizeof(all_options) - 1, "");
} // namespace
void ETag::set_option(Option opt)
{
if ( ! get_option(opt) )
{
m_options.push_back(all_options[opt]);
std::sort(m_options.begin(), m_options.end());
}
}
bool ETag::get_option(Option opt) const
{
return m_options.find(all_options[opt]) != std::string::npos;
}
std::string ETag::get_etag() const
{
if ( m_serverId.empty() )
return std::string();
return "\"" + m_serverId + "/" + m_options + "\"";
}
} // namespace kiwix

77
src/server/etag.h Normal file
View File

@ -0,0 +1,77 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_ETAG_H
#define KIWIXLIB_SERVER_ETAG_H
#include <string>
namespace kiwix {
// The ETag string used by Kiwix server (more precisely, its value inside the
// double quotes) consists of two parts:
//
// 1. ServerId - The string obtained on server start up
//
// 2. Options - Zero or more characters encoding the values of some of the
// headers of the response
//
// The two parts are separated with a slash (/) symbol (which is always present,
// even when the the options part is empty). Neither portion of a Kiwix ETag
// may contain the slash symbol.
// Examples of valid Kiwix server ETags (including the double quotes):
//
// "abcdefghijklmn/"
// "1234567890/z"
// "1234567890/cz"
//
// The options part of the Kiwix ETag allows to correctly set the required
// headers when responding to a conditional If-None-Match request with a 304
// (Not Modified) response without following the full code path that would
// discover the necessary options.
class ETag
{
public: // types
enum Option {
CACHEABLE_ENTITY,
COMPRESSED_CONTENT,
OPTION_COUNT
};
public: // functions
ETag() {}
void set_server_id(const std::string& id) { m_serverId = id; }
void set_option(Option opt);
explicit operator bool() const { return !m_serverId.empty(); }
bool get_option(Option opt) const;
std::string get_etag() const;
private: // data
std::string m_serverId;
std::string m_options;
};
} // namespace kiwix
#endif // KIWIXLIB_SERVER_ETAG_H

View File

@ -55,7 +55,6 @@ Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool
m_withTaskbar(withTaskbar),
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
m_useCache(false),
m_addTaskbar(false),
m_bookName(""),
m_startRange(0),
@ -168,6 +167,14 @@ void Response::inject_externallinks_blocker()
script_tag);
}
bool
Response::can_compress(const RequestContext& request) const
{
return request.can_compress()
&& is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
}
MHD_Response*
Response::create_raw_content_mhd_response(const RequestContext& request)
{
@ -178,10 +185,7 @@ Response::create_raw_content_mhd_response(const RequestContext& request)
inject_externallinks_blocker();
}
bool shouldCompress = m_compress && request.can_compress();
shouldCompress &= is_compressible_mime_type(m_mimeType);
shouldCompress &= (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
bool shouldCompress = m_compress && can_compress(request);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
@ -196,6 +200,7 @@ Response::create_raw_content_mhd_response(const RequestContext& request)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
@ -204,9 +209,11 @@ Response::create_raw_content_mhd_response(const RequestContext& request)
MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
if (shouldCompress) {
if ( m_etag.get_option(ETag::COMPRESSED_CONTENT) ) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
}
if (shouldCompress) {
MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
}
@ -267,7 +274,10 @@ int Response::send(const RequestContext& request, MHD_Connection* connection)
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_useCache ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
if (m_returnCode == MHD_HTTP_OK && request.has_range())
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;
@ -301,7 +311,7 @@ void Response::set_entry(const Entry& entry, const RequestContext& request) {
const std::string mimeType = get_mime_type(entry);
set_mimeType(mimeType);
set_cache(true);
set_cacheable();
if ( is_compressible_mime_type(mimeType) ) {
zim::Blob raw_content = entry.getBlob();

View File

@ -25,6 +25,7 @@
#include <mustache.hpp>
#include "entry.h"
#include "etag.h"
extern "C" {
#include <microhttpd.h>
@ -55,18 +56,22 @@ class Response {
void set_mimeType(const std::string& mimeType) { m_mimeType = mimeType; }
void set_code(int code) { m_returnCode = code; }
void set_cache(bool cache) { m_useCache = cache; }
void set_cacheable() { m_etag.set_option(ETag::CACHEABLE_ENTITY); }
void set_server_id(const std::string& id) { m_etag.set_server_id(id); }
void set_etag(const ETag& etag) { m_etag = etag; }
void set_compress(bool compress) { m_compress = compress; }
void set_taskbar(const std::string& bookName, const std::string& bookTitle);
void set_range_first(uint64_t start) { m_startRange = start; }
void set_range_len(uint64_t len) { m_lenRange = len; }
int getReturnCode() { return m_returnCode; }
int getReturnCode() const { return m_returnCode; }
std::string get_mimeType() const { return m_mimeType; }
void introduce_taskbar();
void inject_externallinks_blocker();
bool can_compress(const RequestContext& request) const;
private: // functions
MHD_Response* create_mhd_response(const RequestContext& request);
MHD_Response* create_raw_content_mhd_response(const RequestContext& request);
@ -84,13 +89,13 @@ class Response {
bool m_withTaskbar;
bool m_withLibraryButton;
bool m_blockExternalLinks;
bool m_useCache;
bool m_compress;
bool m_addTaskbar;
std::string m_bookName;
std::string m_bookTitle;
uint64_t m_startRange;
uint64_t m_lenRange;
ETag m_etag;
};
}

View File

@ -6,6 +6,13 @@
#include "./httplib.h"
bool is_valid_etag(const std::string& etag)
{
return etag.size() >= 2 &&
etag.front() == '"' &&
etag.back() == '"';
}
template<class T1, class T2>
T1 concat(T1 a, const T2& b)
{
@ -92,8 +99,12 @@ protected:
}
};
const bool WITH_ETAG = true;
const bool NO_ETAG = false;
struct Resource
{
bool etag_expected;
const char* url;
};
@ -106,59 +117,46 @@ std::ostream& operator<<(std::ostream& out, const Resource& r)
typedef std::vector<Resource> ResourceCollection;
const ResourceCollection resources200Compressible{
{ "/" },
{ WITH_ETAG, "/" },
{ "/skin/jquery-ui/jquery-ui.structure.min.css" },
{ "/skin/jquery-ui/jquery-ui.min.js" },
{ "/skin/jquery-ui/external/jquery/jquery.js" },
{ "/skin/jquery-ui/jquery-ui.theme.min.css" },
{ "/skin/jquery-ui/jquery-ui.min.css" },
{ "/skin/taskbar.js" },
{ "/skin/taskbar.css" },
{ "/skin/block_external.js" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.structure.min.css" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.min.js" },
{ WITH_ETAG, "/skin/jquery-ui/external/jquery/jquery.js" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.theme.min.css" },
{ WITH_ETAG, "/skin/jquery-ui/jquery-ui.min.css" },
{ WITH_ETAG, "/skin/taskbar.js" },
{ WITH_ETAG, "/skin/taskbar.css" },
{ WITH_ETAG, "/skin/block_external.js" },
{ "/search?content=zimfile&pattern=abcd" },
{ NO_ETAG, "/search?content=zimfile&pattern=abcd" },
{ "/suggest?content=zimfile&term=ray" },
{ NO_ETAG, "/suggest?content=zimfile&term=ray" },
{ "/catch/external?source=www.example.com" },
{ NO_ETAG, "/catch/external?source=www.example.com" },
{ "/zimfile/A/index" },
{ "/zimfile/A/Ray_Charles" },
{ WITH_ETAG, "/zimfile/A/index" },
{ WITH_ETAG, "/zimfile/A/Ray_Charles" },
};
const ResourceCollection resources200Uncompressible{
{ "/skin/jquery-ui/images/ui-bg_flat_0_aaaaaa_40x100.png" },
{ "/skin/jquery-ui/images/ui-bg_flat_75_ffffff_40x100.png" },
{ "/skin/jquery-ui/images/ui-icons_222222_256x240.png" },
{ "/skin/jquery-ui/images/ui-bg_glass_55_fbf9ee_1x400.png" },
{ "/skin/jquery-ui/images/ui-bg_highlight-soft_75_cccccc_1x100.png" },
{ "/skin/jquery-ui/images/ui-bg_glass_65_ffffff_1x400.png" },
{ "/skin/jquery-ui/images/ui-icons_2e83ff_256x240.png" },
{ "/skin/jquery-ui/images/ui-icons_cd0a0a_256x240.png" },
{ "/skin/jquery-ui/images/ui-icons_888888_256x240.png" },
{ "/skin/jquery-ui/images/ui-bg_glass_75_e6e6e6_1x400.png" },
{ "/skin/jquery-ui/images/animated-overlay.gif" },
{ "/skin/jquery-ui/images/ui-bg_glass_75_dadada_1x400.png" },
{ "/skin/jquery-ui/images/ui-icons_454545_256x240.png" },
{ "/skin/jquery-ui/images/ui-bg_glass_95_fef1ec_1x400.png" },
{ "/skin/caret.png" },
{ WITH_ETAG, "/skin/jquery-ui/images/animated-overlay.gif" },
{ WITH_ETAG, "/skin/caret.png" },
{ "/catalog/root.xml" },
{ "/catalog/searchdescription.xml" },
{ "/catalog/search" },
{ NO_ETAG, "/catalog/root.xml" },
{ NO_ETAG, "/catalog/searchdescription.xml" },
{ NO_ETAG, "/catalog/search" },
{ "/meta?content=zimfile&name=title" },
{ "/meta?content=zimfile&name=description" },
{ "/meta?content=zimfile&name=language" },
{ "/meta?content=zimfile&name=name" },
{ "/meta?content=zimfile&name=tags" },
{ "/meta?content=zimfile&name=date" },
{ "/meta?content=zimfile&name=creator" },
{ "/meta?content=zimfile&name=publisher" },
{ "/meta?content=zimfile&name=favicon" },
{ WITH_ETAG, "/meta?content=zimfile&name=title" },
{ WITH_ETAG, "/meta?content=zimfile&name=description" },
{ WITH_ETAG, "/meta?content=zimfile&name=language" },
{ WITH_ETAG, "/meta?content=zimfile&name=name" },
{ WITH_ETAG, "/meta?content=zimfile&name=tags" },
{ WITH_ETAG, "/meta?content=zimfile&name=date" },
{ WITH_ETAG, "/meta?content=zimfile&name=creator" },
{ WITH_ETAG, "/meta?content=zimfile&name=publisher" },
{ WITH_ETAG, "/meta?content=zimfile&name=favicon" },
{ "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg" },
{ WITH_ETAG, "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg" },
};
ResourceCollection all200Resources()
@ -260,3 +258,68 @@ TEST_F(ServerTest, HeadersAreTheSameInResponsesToHeadAndGetRequests)
EXPECT_EQ(invariantHeaders(g), invariantHeaders(h)) << res;
}
}
TEST_F(ServerTest, ETagHeaderIsSetAsNeeded)
{
for ( const Resource& res : all200Resources() ) {
const auto responseToGet = zfs1_->GET(res.url);
EXPECT_EQ(res.etag_expected, responseToGet->has_header("ETag")) << res;
if ( res.etag_expected )
EXPECT_TRUE(is_valid_etag(responseToGet->get_header_value("ETag")));
}
}
TEST_F(ServerTest, ETagIsTheSameInResponsesToDifferentRequestsOfTheSameURL)
{
for ( const Resource& res : all200Resources() ) {
const auto h1 = zfs1_->HEAD(res.url);
const auto h2 = zfs1_->HEAD(res.url);
EXPECT_EQ(h1->get_header_value("ETag"), h2->get_header_value("ETag"));
}
}
TEST_F(ServerTest, ETagIsTheSameAcrossHeadAndGet)
{
for ( const Resource& res : all200Resources() ) {
const auto g = zfs1_->GET(res.url);
const auto h = zfs1_->HEAD(res.url);
EXPECT_EQ(h->get_header_value("ETag"), g->get_header_value("ETag"));
}
}
TEST_F(ServerTest, DifferentServerInstancesProduceDifferentETags)
{
ZimFileServer zfs2(PORT + 1, ZIMFILE);
for ( const Resource& res : all200Resources() ) {
if ( !res.etag_expected ) continue;
const auto h1 = zfs1_->HEAD(res.url);
const auto h2 = zfs2.HEAD(res.url);
EXPECT_NE(h1->get_header_value("ETag"), h2->get_header_value("ETag"));
}
}
TEST_F(ServerTest, CompressionInfluencesETag)
{
for ( const Resource& res : resources200Compressible ) {
if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag"));
EXPECT_NE(etag, g3->get_header_value("ETag"));
}
}
TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
{
for ( const Resource& res : resources200Uncompressible ) {
if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
}
}