diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index a60de9e9f..748765615 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -78,7 +78,6 @@ extern "C" { #include "response.h" #define MAX_SEARCH_LEN 140 -#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100 #define DEFAULT_CACHE_SIZE 2 namespace kiwix { diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 0946b123b..6b435e5ad 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -75,15 +75,15 @@ RequestContext::RequestContext(struct MHD_Connection* connection, method(str2RequestMethod(_method)), version(version), requestIndex(s_requestIndex++), - acceptEncodingDeflate(false), + acceptEncodingGzip(false), byteRange_() { MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this); MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this); try { - acceptEncodingDeflate = - (get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos); + acceptEncodingGzip = + (get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("gzip") != std::string::npos); } catch (const std::out_of_range&) {} try { @@ -127,7 +127,7 @@ void RequestContext::print_debug_info() const { printf("Parsed : \n"); printf("full_url: %s\n", full_url.c_str()); printf("url : %s\n", url.c_str()); - printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate); + printf("acceptEncodingGzip : %d\n", acceptEncodingGzip); printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE); printf("is_valid_url : %d\n", is_valid_url()); printf(".............\n"); diff --git a/src/server/request_context.h b/src/server/request_context.h index 7bdd7d87c..79b0a60e9 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -92,7 +92,7 @@ class RequestContext { ByteRange get_range() const; - bool can_compress() const { return acceptEncodingDeflate; } + bool can_compress() const { return acceptEncodingGzip; } std::string get_user_language() const; @@ -103,7 +103,7 @@ class RequestContext { std::string version; unsigned long long requestIndex; - bool acceptEncodingDeflate; + bool acceptEncodingGzip; ByteRange byteRange_; std::map headers; diff --git a/src/server/response.cpp b/src/server/response.cpp index 20a236020..ae80a5846 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -31,8 +31,17 @@ #include #include +#include -#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100 +// This is somehow a magic value. +// If this value is too small, we will compress (and lost cpu time) too much +// content. +// If this value is too big, we will not compress enough content and send too +// much data. +// If we assume that MTU is 1500 Bytes it is useless to compress +// content smaller as the content will be sent in one packet anyway. +// 1400 Bytes seems to be a common accepted limit. +#define KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS 1400 namespace kiwix { @@ -58,6 +67,41 @@ bool is_compressible_mime_type(const std::string& mimeType) || mimeType.find("application/json") != string::npos; } +bool compress(std::string &content) { + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8, + Z_DEFAULT_STRATEGY); + if (ret != Z_OK) { return false; } + + strm.avail_in = static_cast(content.size()); + strm.next_in = + const_cast(reinterpret_cast(content.data())); + + std::string compressed; + + std::array buff{}; + do { + strm.avail_out = buff.size(); + strm.next_out = reinterpret_cast(buff.data()); + ret = deflate(&strm, Z_FINISH); + assert(ret != Z_STREAM_ERROR); + compressed.append(buff.data(), buff.size() - strm.avail_out); + } while (strm.avail_out == 0); + + assert(ret == Z_STREAM_END); + assert(strm.avail_in == 0); + + content.swap(compressed); + + deflateEnd(&strm); + return true; +} + + } // unnamed namespace @@ -331,7 +375,7 @@ ContentResponse::can_compress(const RequestContext& request) const { return request.can_compress() && is_compressible_mime_type(m_mimeType) - && (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE); + && (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_COMPRESS); } bool @@ -365,35 +409,17 @@ ContentResponse::create_mhd_response(const RequestContext& request) } } - bool shouldCompress = can_compress(request); - if (shouldCompress) { - std::vector compr_buffer(compressBound(m_content.size())); - uLongf comprLen = compr_buffer.capacity(); - int err = compress(&compr_buffer[0], - &comprLen, - (const Bytef*)(m_content.data()), - m_content.size()); - if (err == Z_OK && comprLen > 2 && comprLen < (m_content.size() + 2)) { - /* /!\ Internet Explorer has a bug with deflate compression. - It can not handle the first two bytes (compression headers) - We need to chunk them off (move the content 2bytes) - It has no incidence on other browsers - See http://www.subbu.org/blog/2008/03/ie7-deflate-or-not and comments */ - m_content = string((char*)&compr_buffer[2], comprLen - 2); - m_etag.set_option(ETag::COMPRESSED_CONTENT); - } else { - shouldCompress = false; - } - } + const bool isCompressed = can_compress(request) && compress(m_content); MHD_Response* response = MHD_create_response_from_buffer( m_content.size(), const_cast(m_content.data()), MHD_RESPMEM_MUST_COPY); - if (shouldCompress) { + if (isCompressed) { + m_etag.set_option(ETag::COMPRESSED_CONTENT); MHD_add_response_header( response, MHD_HTTP_HEADER_VARY, "Accept-Encoding"); MHD_add_response_header( - response, MHD_HTTP_HEADER_CONTENT_ENCODING, "deflate"); + response, MHD_HTTP_HEADER_CONTENT_ENCODING, "gzip"); } return response; } diff --git a/test/server.cpp b/test/server.cpp index 2fa31f203..9f4356b5a 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -1,4 +1,5 @@ +#define CPPHTTPLIB_ZLIB_SUPPORT 1 #include "./httplib.h" #include "gtest/gtest.h" @@ -184,13 +185,10 @@ const ResourceCollection resources200Compressible{ { WITH_ETAG, "/ROOT/skin/taskbar.css" }, { WITH_ETAG, "/ROOT/skin/block_external.js" }, - { NO_ETAG, "/ROOT/catalog/root.xml" }, - { NO_ETAG, "/ROOT/catalog/searchdescription.xml" }, { NO_ETAG, "/ROOT/catalog/search" }, { NO_ETAG, "/ROOT/search?content=zimfile&pattern=a" }, - { NO_ETAG, "/ROOT/suggest?content=zimfile" }, { NO_ETAG, "/ROOT/suggest?content=zimfile&term=ray" }, { NO_ETAG, "/ROOT/catch/external?source=www.example.com" }, @@ -223,7 +221,10 @@ const ResourceCollection resources200Uncompressible{ { WITH_ETAG, "/ROOT/corner_cases/-/empty.css" }, { WITH_ETAG, "/ROOT/corner_cases/-/empty.js" }, - // The title and creator are too small to be compressed + // The following url's responses are too small to be compressed + { NO_ETAG, "/ROOT/catalog/root.xml" }, + { NO_ETAG, "/ROOT/catalog/searchdescription.xml" }, + { NO_ETAG, "/ROOT/suggest?content=zimfile" }, { WITH_ETAG, "/ROOT/raw/zimfile/meta/Creator" }, { WITH_ETAG, "/ROOT/raw/zimfile/meta/Title" }, }; @@ -271,9 +272,9 @@ TEST_F(ServerTest, 200) TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable) { for ( const Resource& res : resources200Compressible ) { - const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} }); + const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} }); EXPECT_EQ(200, x->status) << res; - EXPECT_EQ("deflate", x->get_header_value("Content-Encoding")) << res; + EXPECT_EQ("gzip", x->get_header_value("Content-Encoding")) << res; EXPECT_EQ("Accept-Encoding", x->get_header_value("Vary")) << res; } } @@ -281,7 +282,7 @@ TEST_F(ServerTest, CompressibleContentIsCompressedIfAcceptable) TEST_F(ServerTest, UncompressibleContentIsNotCompressed) { for ( const Resource& res : resources200Uncompressible ) { - const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} }); + const auto x = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} }); EXPECT_EQ(200, x->status) << res; EXPECT_EQ("", x->get_header_value("Content-Encoding")) << res; } @@ -1062,7 +1063,7 @@ TEST_F(ServerTest, CompressionInfluencesETag) if ( ! res.etag_expected ) continue; const auto g1 = zfs1_->GET(res.url); const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } ); - const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } ); + const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } ); const auto etag = g1->get_header_value("ETag"); EXPECT_EQ(etag, g2->get_header_value("ETag")); EXPECT_NE(etag, g3->get_header_value("ETag")); @@ -1075,7 +1076,7 @@ TEST_F(ServerTest, ETagOfUncompressibleContentIsNotAffectedByAcceptEncoding) if ( ! res.etag_expected ) continue; const auto g1 = zfs1_->GET(res.url); const auto g2 = zfs1_->GET(res.url, { {"Accept-Encoding", ""} } ); - const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "deflate"} } ); + const auto g3 = zfs1_->GET(res.url, { {"Accept-Encoding", "gzip"} } ); const auto etag = g1->get_header_value("ETag"); EXPECT_EQ(etag, g2->get_header_value("ETag")) << res; EXPECT_EQ(etag, g3->get_header_value("ETag")) << res; @@ -1114,7 +1115,7 @@ std::string make_etag_list(const std::string& etag) TEST_F(ServerTest, IfNoneMatchRequestsWithMatchingETagResultIn304Responses) { - const char* const encodings[] = { "", "deflate" }; + const char* const encodings[] = { "", "gzip" }; for ( const Resource& res : all200Resources() ) { for ( const char* enc: encodings ) { if ( ! res.etag_expected ) continue; @@ -1245,7 +1246,7 @@ TEST_F(ServerTest, RangeHasPrecedenceOverCompression) const Headers onlyRange{ {"Range", "bytes=123-456"} }; Headers rangeAndCompression(onlyRange); - rangeAndCompression.insert({"Accept-Encoding", "deflate"}); + rangeAndCompression.insert({"Accept-Encoding", "gzip"}); const auto p1 = zfs1_->GET(url, onlyRange); const auto p2 = zfs1_->GET(url, rangeAndCompression);