mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #757 from kiwix/gzip_compression
This commit is contained in:
commit
f90cc39a52
|
@ -78,7 +78,6 @@ extern "C" {
|
|||
#include "response.h"
|
||||
|
||||
#define MAX_SEARCH_LEN 140
|
||||
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
||||
#define DEFAULT_CACHE_SIZE 2
|
||||
|
||||
namespace kiwix {
|
||||
|
|
|
@ -75,15 +75,15 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
|
|||
method(str2RequestMethod(_method)),
|
||||
version(version),
|
||||
requestIndex(s_requestIndex++),
|
||||
acceptEncodingDeflate(false),
|
||||
acceptEncodingGzip(false),
|
||||
byteRange_()
|
||||
{
|
||||
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
|
||||
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
|
||||
|
||||
try {
|
||||
acceptEncodingDeflate =
|
||||
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
|
||||
acceptEncodingGzip =
|
||||
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
|
||||
} catch (const std::out_of_range&) {}
|
||||
|
||||
try {
|
||||
|
@ -127,7 +127,7 @@ void RequestContext::print_debug_info() const {
|
|||
printf("Parsed : \n");
|
||||
printf("full_url: %s\n", full_url.c_str());
|
||||
printf("url : %s\n", url.c_str());
|
||||
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
|
||||
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
|
||||
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
|
||||
printf("is_valid_url : %d\n", is_valid_url());
|
||||
printf(".............\n");
|
||||
|
|
|
@ -92,7 +92,7 @@ class RequestContext {
|
|||
|
||||
ByteRange get_range() const;
|
||||
|
||||
bool can_compress() const { return acceptEncodingDeflate; }
|
||||
bool can_compress() const { return acceptEncodingGzip; }
|
||||
|
||||
std::string get_user_language() const;
|
||||
|
||||
|
@ -103,7 +103,7 @@ class RequestContext {
|
|||
std::string version;
|
||||
unsigned long long requestIndex;
|
||||
|
||||
bool acceptEncodingDeflate;
|
||||
bool acceptEncodingGzip;
|
||||
|
||||
ByteRange byteRange_;
|
||||
std::map<std::string, std::string> headers;
|
||||
|
|
|
@ -31,8 +31,17 @@
|
|||
#include <mustache.hpp>
|
||||
#include <zlib.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
||||
// This is somehow a magic value.
|
||||
// If this value is too small, we will compress (and lost cpu time) too much
|
||||
// content.
|
||||
// If this value is too big, we will not compress enough content and send too
|
||||
// much data.
|
||||
// If we assume that MTU is 1500 Bytes it is useless to compress
|
||||
// content smaller as the content will be sent in one packet anyway.
|
||||
// 1400 Bytes seems to be a common accepted limit.
|
||||
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
|
@ -58,6 +67,41 @@ bool is_compressible_mime_type(const std::string& mimeType)
|
|||
|| mimeType.find("application/json") != string::npos;
|
||||
}
|
||||
|
||||
bool compress(std::string &content) {
|
||||
z_stream strm;
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
|
||||
auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
|
||||
Z_DEFAULT_STRATEGY);
|
||||
if (ret != Z_OK) { return false; }
|
||||
|
||||
strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
|
||||
strm.next_in =
|
||||
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));
|
||||
|
||||
std::string compressed;
|
||||
|
||||
std::array<char, 16384> buff{};
|
||||
do {
|
||||
strm.avail_out = buff.size();
|
||||
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
|
||||
ret = deflate(&strm, Z_FINISH);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
compressed.append(buff.data(), buff.size() - strm.avail_out);
|
||||
} while (strm.avail_out == 0);
|
||||
|
||||
assert(ret == Z_STREAM_END);
|
||||
assert(strm.avail_in == 0);
|
||||
|
||||
content.swap(compressed);
|
||||
|
||||
deflateEnd(&strm);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
|
@ -331,7 +375,7 @@ ContentResponse::can_compress(const RequestContext& request) const
|
|||
{
|
||||
return request.can_compress()
|
||||
&& is_compressible_mime_type(m_mimeType)
|
||||
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
|
||||
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -365,35 +409,17 @@ ContentResponse::create_mhd_response(const RequestContext& request)
|
|||
}
|
||||
}
|
||||
|
||||
bool shouldCompress = can_compress(request);
|
||||
if (shouldCompress) {
|
||||
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
|
||||
uLongf comprLen = compr_buffer.capacity();
|
||||
int err = compress(&compr_buffer[0],
|
||||
&comprLen,
|
||||
(const Bytef*)(m_content.data()),
|
||||
m_content.size());
|
||||
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
|
||||
/* /!\ Internet Explorer has a bug with deflate compression.
|
||||
It can not handle the first two bytes (compression headers)
|
||||
We need to chunk them off (move the content 2bytes)
|
||||
It has no incidence on other browsers
|
||||
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
|
||||
m_content = string((char*)&compr_buffer[2], comprLen - 2);
|
||||
m_etag.set_option(ETag::COMPRESSED_CONTENT);
|
||||
} else {
|
||||
shouldCompress = false;
|
||||
}
|
||||
}
|
||||
const bool isCompressed = can_compress(request) && compress(m_content);
|
||||
|
||||
MHD_Response* response = MHD_create_response_from_buffer(
|
||||
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
|
||||
|
||||
if (shouldCompress) {
|
||||
if (isCompressed) {
|
||||
m_etag.set_option(ETag::COMPRESSED_CONTENT);
|
||||
MHD_add_response_header(
|
||||
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
|
||||
MHD_add_response_header(
|
||||
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
|
||||
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
|
||||
#define CPPHTTPLIB_ZLIB_SUPPORT 1
|
||||
#include "./httplib.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
|
@ -184,13 +185,10 @@ const ResourceCollection resources200Compressible{
|
|||
{ WITH_ETAG, "/ROOT/skin/taskbar.css" },
|
||||
{ WITH_ETAG, "/ROOT/skin/block_external.js" },
|
||||
|
||||
{ NO_ETAG, "/ROOT/catalog/root.xml" },
|
||||
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
|
||||
{ NO_ETAG, "/ROOT/catalog/search" },
|
||||
|
||||
{ NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" },
|
||||
|
||||
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
|
||||
{ NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" },
|
||||
|
||||
{ NO_ETAG, "/ROOT/catch/external?source=www.example.com" },
|
||||
|
@ -223,7 +221,10 @@ const ResourceCollection resources200Uncompressible{
|
|||
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.css" },
|
||||
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.js" },
|
||||
|
||||
// The title and creator are too small to be compressed
|
||||
// The following url's responses are too small to be compressed
|
||||
{ NO_ETAG, "/ROOT/catalog/root.xml" },
|
||||
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
|
||||
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
|
||||
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" },
|
||||
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" },
|
||||
};
|
||||
|
@ -271,9 +272,9 @@ TEST_F(ServerTest, 200)
|
|||
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
|
||||
{
|
||||
for ( const Resource& res : resources200Compressible ) {
|
||||
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
|
||||
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
|
||||
EXPECT_EQ(200, x->status) << res;
|
||||
EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res;
|
||||
EXPECT_EQ("gzip", x->get_header_value("Content-Encoding")) << res;
|
||||
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
|
||||
}
|
||||
}
|
||||
|
@ -281,7 +282,7 @@ TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
|
|||
TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
|
||||
{
|
||||
for ( const Resource& res : resources200Uncompressible ) {
|
||||
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
|
||||
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
|
||||
EXPECT_EQ(200, x->status) << res;
|
||||
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
|
||||
}
|
||||
|
@ -1062,7 +1063,7 @@ TEST_F(ServerTest, CompressionInfluencesETag)
|
|||
if ( ! res.etag_expected ) continue;
|
||||
const auto g1 = zfs1_->GET(res.url);
|
||||
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
|
||||
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
|
||||
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
|
||||
const auto etag = g1->get_header_value("ETag");
|
||||
EXPECT_EQ(etag, g2->get_header_value("ETag"));
|
||||
EXPECT_NE(etag, g3->get_header_value("ETag"));
|
||||
|
@ -1075,7 +1076,7 @@ TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
|
|||
if ( ! res.etag_expected ) continue;
|
||||
const auto g1 = zfs1_->GET(res.url);
|
||||
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
|
||||
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
|
||||
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
|
||||
const auto etag = g1->get_header_value("ETag");
|
||||
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
|
||||
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
|
||||
|
@ -1114,7 +1115,7 @@ std::string make_etag_list(const std::string& etag)
|
|||
|
||||
TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
|
||||
{
|
||||
const char* const encodings[] = { "", "deflate" };
|
||||
const char* const encodings[] = { "", "gzip" };
|
||||
for ( const Resource& res : all200Resources() ) {
|
||||
for ( const char* enc: encodings ) {
|
||||
if ( ! res.etag_expected ) continue;
|
||||
|
@ -1245,7 +1246,7 @@ TEST_F(ServerTest, RangeHasPrecedenceOverCompression)
|
|||
|
||||
const Headers onlyRange{ {"Range", "bytes=123-456"} };
|
||||
Headers rangeAndCompression(onlyRange);
|
||||
rangeAndCompression.insert({"Accept-Encoding", "deflate"});
|
||||
rangeAndCompression.insert({"Accept-Encoding", "gzip"});
|
||||
|
||||
const auto p1 = zfs1_->GET(url, onlyRange);
|
||||
const auto p2 = zfs1_->GET(url, rangeAndCompression);
|
||||
|
|
Loading…
Reference in New Issue