mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #757 from kiwix/gzip_compression
This commit is contained in:
commit
f90cc39a52
|
@ -78,7 +78,6 @@ extern "C" {
|
||||||
#include "response.h"
|
#include "response.h"
|
||||||
|
|
||||||
#define MAX_SEARCH_LEN 140
|
#define MAX_SEARCH_LEN 140
|
||||||
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
|
||||||
#define DEFAULT_CACHE_SIZE 2
|
#define DEFAULT_CACHE_SIZE 2
|
||||||
|
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
|
@ -75,15 +75,15 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
|
||||||
method(str2RequestMethod(_method)),
|
method(str2RequestMethod(_method)),
|
||||||
version(version),
|
version(version),
|
||||||
requestIndex(s_requestIndex++),
|
requestIndex(s_requestIndex++),
|
||||||
acceptEncodingDeflate(false),
|
acceptEncodingGzip(false),
|
||||||
byteRange_()
|
byteRange_()
|
||||||
{
|
{
|
||||||
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
|
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
|
||||||
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
|
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
acceptEncodingDeflate =
|
acceptEncodingGzip =
|
||||||
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
|
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos);
|
||||||
} catch (const std::out_of_range&) {}
|
} catch (const std::out_of_range&) {}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -127,7 +127,7 @@ void RequestContext::print_debug_info() const {
|
||||||
printf("Parsed : \n");
|
printf("Parsed : \n");
|
||||||
printf("full_url: %s\n", full_url.c_str());
|
printf("full_url: %s\n", full_url.c_str());
|
||||||
printf("url : %s\n", url.c_str());
|
printf("url : %s\n", url.c_str());
|
||||||
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
|
printf("acceptEncodingGzip : %d\n", acceptEncodingGzip);
|
||||||
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
|
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
|
||||||
printf("is_valid_url : %d\n", is_valid_url());
|
printf("is_valid_url : %d\n", is_valid_url());
|
||||||
printf(".............\n");
|
printf(".............\n");
|
||||||
|
|
|
@ -92,7 +92,7 @@ class RequestContext {
|
||||||
|
|
||||||
ByteRange get_range() const;
|
ByteRange get_range() const;
|
||||||
|
|
||||||
bool can_compress() const { return acceptEncodingDeflate; }
|
bool can_compress() const { return acceptEncodingGzip; }
|
||||||
|
|
||||||
std::string get_user_language() const;
|
std::string get_user_language() const;
|
||||||
|
|
||||||
|
@ -103,7 +103,7 @@ class RequestContext {
|
||||||
std::string version;
|
std::string version;
|
||||||
unsigned long long requestIndex;
|
unsigned long long requestIndex;
|
||||||
|
|
||||||
bool acceptEncodingDeflate;
|
bool acceptEncodingGzip;
|
||||||
|
|
||||||
ByteRange byteRange_;
|
ByteRange byteRange_;
|
||||||
std::map<std::string, std::string> headers;
|
std::map<std::string, std::string> headers;
|
||||||
|
|
|
@ -31,8 +31,17 @@
|
||||||
#include <mustache.hpp>
|
#include <mustache.hpp>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
|
// This is somehow a magic value.
|
||||||
|
// If this value is too small, we will compress (and lost cpu time) too much
|
||||||
|
// content.
|
||||||
|
// If this value is too big, we will not compress enough content and send too
|
||||||
|
// much data.
|
||||||
|
// If we assume that MTU is 1500 Bytes it is useless to compress
|
||||||
|
// content smaller as the content will be sent in one packet anyway.
|
||||||
|
// 1400 Bytes seems to be a common accepted limit.
|
||||||
|
#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400
|
||||||
|
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
||||||
|
@ -58,6 +67,41 @@ bool is_compressible_mime_type(const std::string& mimeType)
|
||||||
|| mimeType.find("application/json") != string::npos;
|
|| mimeType.find("application/json") != string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool compress(std::string &content) {
|
||||||
|
z_stream strm;
|
||||||
|
strm.zalloc = Z_NULL;
|
||||||
|
strm.zfree = Z_NULL;
|
||||||
|
strm.opaque = Z_NULL;
|
||||||
|
|
||||||
|
auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8,
|
||||||
|
Z_DEFAULT_STRATEGY);
|
||||||
|
if (ret != Z_OK) { return false; }
|
||||||
|
|
||||||
|
strm.avail_in = static_cast<decltype(strm.avail_in)>(content.size());
|
||||||
|
strm.next_in =
|
||||||
|
const_cast<Bytef *>(reinterpret_cast<const Bytef *>(content.data()));
|
||||||
|
|
||||||
|
std::string compressed;
|
||||||
|
|
||||||
|
std::array<char, 16384> buff{};
|
||||||
|
do {
|
||||||
|
strm.avail_out = buff.size();
|
||||||
|
strm.next_out = reinterpret_cast<Bytef *>(buff.data());
|
||||||
|
ret = deflate(&strm, Z_FINISH);
|
||||||
|
assert(ret != Z_STREAM_ERROR);
|
||||||
|
compressed.append(buff.data(), buff.size() - strm.avail_out);
|
||||||
|
} while (strm.avail_out == 0);
|
||||||
|
|
||||||
|
assert(ret == Z_STREAM_END);
|
||||||
|
assert(strm.avail_in == 0);
|
||||||
|
|
||||||
|
content.swap(compressed);
|
||||||
|
|
||||||
|
deflateEnd(&strm);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // unnamed namespace
|
} // unnamed namespace
|
||||||
|
|
||||||
|
@ -331,7 +375,7 @@ ContentResponse::can_compress(const RequestContext& request) const
|
||||||
{
|
{
|
||||||
return request.can_compress()
|
return request.can_compress()
|
||||||
&& is_compressible_mime_type(m_mimeType)
|
&& is_compressible_mime_type(m_mimeType)
|
||||||
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
|
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
@ -365,35 +409,17 @@ ContentResponse::create_mhd_response(const RequestContext& request)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool shouldCompress = can_compress(request);
|
const bool isCompressed = can_compress(request) && compress(m_content);
|
||||||
if (shouldCompress) {
|
|
||||||
std::vector<Bytef> compr_buffer(compressBound(m_content.size()));
|
|
||||||
uLongf comprLen = compr_buffer.capacity();
|
|
||||||
int err = compress(&compr_buffer[0],
|
|
||||||
&comprLen,
|
|
||||||
(const Bytef*)(m_content.data()),
|
|
||||||
m_content.size());
|
|
||||||
if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) {
|
|
||||||
/* /!\ Internet Explorer has a bug with deflate compression.
|
|
||||||
It can not handle the first two bytes (compression headers)
|
|
||||||
We need to chunk them off (move the content 2bytes)
|
|
||||||
It has no incidence on other browsers
|
|
||||||
See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */
|
|
||||||
m_content = string((char*)&compr_buffer[2], comprLen - 2);
|
|
||||||
m_etag.set_option(ETag::COMPRESSED_CONTENT);
|
|
||||||
} else {
|
|
||||||
shouldCompress = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MHD_Response* response = MHD_create_response_from_buffer(
|
MHD_Response* response = MHD_create_response_from_buffer(
|
||||||
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
|
m_content.size(), const_cast<char*>(m_content.data()), MHD_RESPMEM_MUST_COPY);
|
||||||
|
|
||||||
if (shouldCompress) {
|
if (isCompressed) {
|
||||||
|
m_etag.set_option(ETag::COMPRESSED_CONTENT);
|
||||||
MHD_add_response_header(
|
MHD_add_response_header(
|
||||||
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
|
response, MHD_HTTP_HEADER_VARY, "Accept-Encoding");
|
||||||
MHD_add_response_header(
|
MHD_add_response_header(
|
||||||
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate");
|
response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip");
|
||||||
}
|
}
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
|
|
||||||
|
#define CPPHTTPLIB_ZLIB_SUPPORT 1
|
||||||
#include "./httplib.h"
|
#include "./httplib.h"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
@ -184,13 +185,10 @@ const ResourceCollection resources200Compressible{
|
||||||
{ WITH_ETAG, "/ROOT/skin/taskbar.css" },
|
{ WITH_ETAG, "/ROOT/skin/taskbar.css" },
|
||||||
{ WITH_ETAG, "/ROOT/skin/block_external.js" },
|
{ WITH_ETAG, "/ROOT/skin/block_external.js" },
|
||||||
|
|
||||||
{ NO_ETAG, "/ROOT/catalog/root.xml" },
|
|
||||||
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
|
|
||||||
{ NO_ETAG, "/ROOT/catalog/search" },
|
{ NO_ETAG, "/ROOT/catalog/search" },
|
||||||
|
|
||||||
{ NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" },
|
{ NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" },
|
||||||
|
|
||||||
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
|
|
||||||
{ NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" },
|
{ NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" },
|
||||||
|
|
||||||
{ NO_ETAG, "/ROOT/catch/external?source=www.example.com" },
|
{ NO_ETAG, "/ROOT/catch/external?source=www.example.com" },
|
||||||
|
@ -223,7 +221,10 @@ const ResourceCollection resources200Uncompressible{
|
||||||
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.css" },
|
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.css" },
|
||||||
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.js" },
|
{ WITH_ETAG, "/ROOT/corner_cases/-/empty.js" },
|
||||||
|
|
||||||
// The title and creator are too small to be compressed
|
// The following url's responses are too small to be compressed
|
||||||
|
{ NO_ETAG, "/ROOT/catalog/root.xml" },
|
||||||
|
{ NO_ETAG, "/ROOT/catalog/searchdescription.xml" },
|
||||||
|
{ NO_ETAG, "/ROOT/suggest?content=zimfile" },
|
||||||
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" },
|
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" },
|
||||||
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" },
|
{ WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" },
|
||||||
};
|
};
|
||||||
|
@ -271,9 +272,9 @@ TEST_F(ServerTest, 200)
|
||||||
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
|
TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
|
||||||
{
|
{
|
||||||
for ( const Resource& res : resources200Compressible ) {
|
for ( const Resource& res : resources200Compressible ) {
|
||||||
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
|
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
|
||||||
EXPECT_EQ(200, x->status) << res;
|
EXPECT_EQ(200, x->status) << res;
|
||||||
EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res;
|
EXPECT_EQ("gzip", x->get_header_value("Content-Encoding")) << res;
|
||||||
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
|
EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -281,7 +282,7 @@ TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable)
|
||||||
TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
|
TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
|
||||||
{
|
{
|
||||||
for ( const Resource& res : resources200Uncompressible ) {
|
for ( const Resource& res : resources200Uncompressible ) {
|
||||||
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} });
|
const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} });
|
||||||
EXPECT_EQ(200, x->status) << res;
|
EXPECT_EQ(200, x->status) << res;
|
||||||
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
|
EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res;
|
||||||
}
|
}
|
||||||
|
@ -1062,7 +1063,7 @@ TEST_F(ServerTest, CompressionInfluencesETag)
|
||||||
if ( ! res.etag_expected ) continue;
|
if ( ! res.etag_expected ) continue;
|
||||||
const auto g1 = zfs1_->GET(res.url);
|
const auto g1 = zfs1_->GET(res.url);
|
||||||
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
|
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
|
||||||
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
|
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
|
||||||
const auto etag = g1->get_header_value("ETag");
|
const auto etag = g1->get_header_value("ETag");
|
||||||
EXPECT_EQ(etag, g2->get_header_value("ETag"));
|
EXPECT_EQ(etag, g2->get_header_value("ETag"));
|
||||||
EXPECT_NE(etag, g3->get_header_value("ETag"));
|
EXPECT_NE(etag, g3->get_header_value("ETag"));
|
||||||
|
@ -1075,7 +1076,7 @@ TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding)
|
||||||
if ( ! res.etag_expected ) continue;
|
if ( ! res.etag_expected ) continue;
|
||||||
const auto g1 = zfs1_->GET(res.url);
|
const auto g1 = zfs1_->GET(res.url);
|
||||||
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
|
const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } );
|
||||||
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } );
|
const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } );
|
||||||
const auto etag = g1->get_header_value("ETag");
|
const auto etag = g1->get_header_value("ETag");
|
||||||
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
|
EXPECT_EQ(etag, g2->get_header_value("ETag")) << res;
|
||||||
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
|
EXPECT_EQ(etag, g3->get_header_value("ETag")) << res;
|
||||||
|
@ -1114,7 +1115,7 @@ std::string make_etag_list(const std::string& etag)
|
||||||
|
|
||||||
TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
|
TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses)
|
||||||
{
|
{
|
||||||
const char* const encodings[] = { "", "deflate" };
|
const char* const encodings[] = { "", "gzip" };
|
||||||
for ( const Resource& res : all200Resources() ) {
|
for ( const Resource& res : all200Resources() ) {
|
||||||
for ( const char* enc: encodings ) {
|
for ( const char* enc: encodings ) {
|
||||||
if ( ! res.etag_expected ) continue;
|
if ( ! res.etag_expected ) continue;
|
||||||
|
@ -1245,7 +1246,7 @@ TEST_F(ServerTest, RangeHasPrecedenceOverCompression)
|
||||||
|
|
||||||
const Headers onlyRange{ {"Range", "bytes=123-456"} };
|
const Headers onlyRange{ {"Range", "bytes=123-456"} };
|
||||||
Headers rangeAndCompression(onlyRange);
|
Headers rangeAndCompression(onlyRange);
|
||||||
rangeAndCompression.insert({"Accept-Encoding", "deflate"});
|
rangeAndCompression.insert({"Accept-Encoding", "gzip"});
|
||||||
|
|
||||||
const auto p1 = zfs1_->GET(url, onlyRange);
|
const auto p1 = zfs1_->GET(url, onlyRange);
|
||||||
const auto p2 = zfs1_->GET(url, rangeAndCompression);
|
const auto p2 = zfs1_->GET(url, rangeAndCompression);
|
||||||
|
|
Loading…
Reference in New Issue