Merge pull request #757 from kiwix/gzip_compression

This commit is contained in:
Matthieu Gautier 2022-04-28 14:36:51 +02:00 committed by GitHub
commit f90cc39a52
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 42 deletions

View File

@ -78,7 +78,6 @@ extern "C" {
#include "response.h" #include "response.h"
#define MAX_SEARCH_LEN 140 #define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
#define DEFAULT_CACHE_SIZE 2 #define DEFAULT_CACHE_SIZE 2
namespace kiwix { namespace kiwix {

View File

@ -75,15 +75,15 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
method(str2RequestMethod(_method)), method(str2RequestMethod(_method)),
version(version), version(version),
requestIndex(s_requestIndex++), requestIndex(s_requestIndex++),
acceptEncodingDeflate(false), acceptEncodingGzip(false),
byteRange_() byteRange_()
{ {
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this); MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this); MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
try { try {
acceptEncodingDeflate = acceptEncodingGzip =
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos); (get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
} catch (const std::out_of_range&) {} } catch (const std::out_of_range&) {}
try { try {
@ -127,7 +127,7 @@ void RequestContext::print_debug_info() const {
printf("Parsed : \n"); printf("Parsed : \n");
printf("full_url: %s\n", full_url.c_str()); printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str()); printf("url : %s\n", url.c_str());
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate); printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE); printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url()); printf("is_valid_url : %d\n", is_valid_url());
printf(".............\n"); printf(".............\n");

View File

@ -92,7 +92,7 @@ class RequestContext {
ByteRange get_range() const; ByteRange get_range() const;
bool can_compress() const { return acceptEncodingDeflate; } bool can_compress() const { return acceptEncodingGzip; }
std::string get_user_language() const; std::string get_user_language() const;
@ -103,7 +103,7 @@ class RequestContext {
std::string version; std::string version;
unsigned long long requestIndex; unsigned long long requestIndex;
bool acceptEncodingDeflate; bool acceptEncodingGzip;
ByteRange byteRange_; ByteRange byteRange_;
std::map<std::string, std::string> headers; std::map<std::string, std::string> headers;

View File

@ -31,8 +31,17 @@
#include <mustache.hpp> #include <mustache.hpp>
#include <zlib.h> #include <zlib.h>
#include <array>
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100 // This is somehow a magic value.
// If this value is too small, we will compress (and lost cpu time) too much
// content.
// If this value is too big, we will not compress enough content and send too
// much data.
// If we assume that MTU is 1500 Bytes it is useless to compress
// content smaller as the content will be sent in one packet anyway.
// 1400 Bytes seems to be a common accepted limit.
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400
namespace kiwix { namespace kiwix {
@ -58,6 +67,41 @@ bool is_compressible_mime_type(const std::string& mimeType)
|| mimeType.find("application/json") != string::npos; || mimeType.find("application/json") != string::npos;
} }
bool compress(std::string &content) {
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
Z_DEFAULT_STRATEGY);
if (ret != Z_OK) { return false; }
strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
strm.next_in =
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));
std::string compressed;
std::array<char, 16384> buff{};
do {
strm.avail_out = buff.size();
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
ret = deflate(&strm, Z_FINISH);
assert(ret != Z_STREAM_ERROR);
compressed.append(buff.data(), buff.size() - strm.avail_out);
} while (strm.avail_out == 0);
assert(ret == Z_STREAM_END);
assert(strm.avail_in == 0);
content.swap(compressed);
deflateEnd(&strm);
return true;
}
} // unnamed namespace } // unnamed namespace
@ -331,7 +375,7 @@ ContentResponse::can_compress(const RequestContext& request) const
{ {
return request.can_compress() return request.can_compress()
&& is_compressible_mime_type(m_mimeType) && is_compressible_mime_type(m_mimeType)
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE); && (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
} }
bool bool
@ -365,35 +409,17 @@ ContentResponse::create_mhd_response(const RequestContext& request)
} }
} }
bool shouldCompress = can_compress(request); const bool isCompressed = can_compress(request) && compress(m_content);
if (shouldCompress) {
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
uLongf comprLen = compr_buffer.capacity();
int err = compress(&compr_buffer[0],
&comprLen,
(const Bytef*)(m_content.data()),
m_content.size());
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
/* /!\ Internet Explorer has a bug with deflate compression.
It can not handle the first two bytes (compression headers)
We need to chunk them off (move the content 2bytes)
It has no incidence on other browsers
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
m_content = string((char*)&compr_buffer[2], comprLen - 2);
m_etag.set_option(ETag::COMPRESSED_CONTENT);
} else {
shouldCompress = false;
}
}
MHD_Response* response = MHD_create_response_from_buffer( MHD_Response* response = MHD_create_response_from_buffer(
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY); m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
if (shouldCompress) { if (isCompressed) {
m_etag.set_option(ETag::COMPRESSED_CONTENT);
MHD_add_response_header( MHD_add_response_header(
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding"); response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
MHD_add_response_header( MHD_add_response_header(
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate"); response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
} }
return response; return response;
} }

View File

@ -1,4 +1,5 @@
#define CPPHTTPLIB_ZLIB_SUPPORT 1
#include "./httplib.h" #include "./httplib.h"
#include "gtest/gtest.h" #include "gtest/gtest.h"
@ -184,13 +185,10 @@ const ResourceCollection resources200Compressible{
{ WITH_ETAG, "/ROOT/skin/taskbar.css" }, { WITH_ETAG, "/ROOT/skin/taskbar.css" },
{ WITH_ETAG, "/ROOT/skin/block_external.js" }, { WITH_ETAG, "/ROOT/skin/block_external.js" },
{ NO_ETAG, "/ROOT/catalog/root.xml" },
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
{ NO_ETAG, "/ROOT/catalog/search" }, { NO_ETAG, "/ROOT/catalog/search" },
{ NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" }, { NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" },
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
{ NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" }, { NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" },
{ NO_ETAG, "/ROOT/catch/external?source=www.example.com" }, { NO_ETAG, "/ROOT/catch/external?source=www.example.com" },
@ -223,7 +221,10 @@ const ResourceCollection resources200Uncompressible{
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.css" }, { WITH_ETAG, "/ROOT/corner_cases/-/empty.css" },
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.js" }, { WITH_ETAG, "/ROOT/corner_cases/-/empty.js" },
// The title and creator are too small to be compressed // The following url's responses are too small to be compressed
{ NO_ETAG, "/ROOT/catalog/root.xml" },
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" }, { WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" },
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" }, { WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" },
}; };
@ -271,9 +272,9 @@ TEST_F(ServerTest, 200)
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable) TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
{ {
for ( const Resource& res : resources200Compressible ) { for ( const Resource& res : resources200Compressible ) {
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} }); const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
EXPECT_EQ(200, x->status) << res; EXPECT_EQ(200, x->status) << res;
EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res; EXPECT_EQ("gzip", x->get_header_value("Content-Encoding")) << res;
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res; EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
} }
} }
@ -281,7 +282,7 @@ TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
TEST_F(ServerTest, UncompressibleContentIsNotCompressed) TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
{ {
for ( const Resource& res : resources200Uncompressible ) { for ( const Resource& res : resources200Uncompressible ) {
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} }); const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
EXPECT_EQ(200, x->status) << res; EXPECT_EQ(200, x->status) << res;
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res; EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
} }
@ -1062,7 +1063,7 @@ TEST_F(ServerTest, CompressionInfluencesETag)
if ( ! res.etag_expected ) continue; if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url); const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } ); const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } ); const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
const auto etag = g1->get_header_value("ETag"); const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag")); EXPECT_EQ(etag, g2->get_header_value("ETag"));
EXPECT_NE(etag, g3->get_header_value("ETag")); EXPECT_NE(etag, g3->get_header_value("ETag"));
@ -1075,7 +1076,7 @@ TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
if ( ! res.etag_expected ) continue; if ( ! res.etag_expected ) continue;
const auto g1 = zfs1_->GET(res.url); const auto g1 = zfs1_->GET(res.url);
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } ); const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } ); const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
const auto etag = g1->get_header_value("ETag"); const auto etag = g1->get_header_value("ETag");
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res; EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res; EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
@ -1114,7 +1115,7 @@ std::string make_etag_list(const std::string& etag)
TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses) TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
{ {
const char* const encodings[] = { "", "deflate" }; const char* const encodings[] = { "", "gzip" };
for ( const Resource& res : all200Resources() ) { for ( const Resource& res : all200Resources() ) {
for ( const char* enc: encodings ) { for ( const char* enc: encodings ) {
if ( ! res.etag_expected ) continue; if ( ! res.etag_expected ) continue;
@ -1245,7 +1246,7 @@ TEST_F(ServerTest, RangeHasPrecedenceOverCompression)
const Headers onlyRange{ {"Range", "bytes=123-456"} }; const Headers onlyRange{ {"Range", "bytes=123-456"} };
Headers rangeAndCompression(onlyRange); Headers rangeAndCompression(onlyRange);
rangeAndCompression.insert({"Accept-Encoding", "deflate"}); rangeAndCompression.insert({"Accept-Encoding", "gzip"});
const auto p1 = zfs1_->GET(url, onlyRange); const auto p1 = zfs1_->GET(url, onlyRange);
const auto p2 = zfs1_->GET(url, rangeAndCompression); const auto p2 = zfs1_->GET(url, rangeAndCompression);