diff --git a/src/meson.build b/src/meson.build index 411f14f28..ddb188119 100644 --- a/src/meson.build +++ b/src/meson.build @@ -21,6 +21,7 @@ kiwix_sources = [ 'tools/otherTools.cpp', 'kiwixserve.cpp', 'name_mapper.cpp', + 'server/byte_range.cpp', 'server/etag.cpp', 'server/request_context.cpp', 'server/response.cpp' diff --git a/src/server/byte_range.cpp b/src/server/byte_range.cpp new file mode 100644 index 000000000..e43faef1d --- /dev/null +++ b/src/server/byte_range.cpp @@ -0,0 +1,126 @@ +/* + * Copyright 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + + +#include "byte_range.h" + +#include "tools/stringTools.h" + +#include + +namespace kiwix { + +namespace { + +ByteRange parseByteRange(const std::string& rangeStr) +{ + std::istringstream iss(rangeStr); + + int64_t start, end = INT64_MAX; + if (iss >> start) { + if ( start < 0 ) { + if ( iss.eof() ) + return ByteRange(-start); + } else { + char c; + if (iss >> c && c=='-') { + iss >> end; // if this fails, end is not modified, which is OK + if (iss.eof() && start <= end) + return ByteRange(ByteRange::PARSED, start, end); + } + } + } + + return ByteRange(ByteRange::INVALID, 0, INT64_MAX); +} + +} // unnamed namespace + +ByteRange::ByteRange() + : kind_(NONE) + , first_(0) + , last_(INT64_MAX) +{} + +ByteRange::ByteRange(Kind kind, int64_t first, int64_t last) + : kind_(kind) + , first_(first) + , last_(last) +{ + assert(kind != NONE); + assert(first >= 0); + assert(last >= first); +} + +ByteRange::ByteRange(int64_t suffix_length) + : kind_(PARSED) + , first_(-suffix_length) + , last_(INT64_MAX) +{ + assert(suffix_length > 0); +} + +int64_t ByteRange::first() const +{ + assert(kind_ > PARSED); + return first_; +} + +int64_t ByteRange::last() const +{ + assert(kind_ > PARSED); + return last_; +} + +int64_t ByteRange::length() const +{ + assert(kind_ > PARSED); + return last_ + 1 - first_; +} + +ByteRange ByteRange::parse(const std::string& rangeStr) +{ + const std::string byteUnitSpec("bytes="); + if ( ! kiwix::startsWith(rangeStr, byteUnitSpec) ) + return ByteRange(INVALID, 0, INT64_MAX); + + return parseByteRange(rangeStr.substr(byteUnitSpec.size())); +} + +ByteRange ByteRange::resolve(int64_t contentSize) const +{ + if ( kind() == NONE ) + return ByteRange(RESOLVED_FULL_CONTENT, 0, contentSize-1); + + if ( kind() == INVALID ) + return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1); + + const int64_t resolved_first = first_ < 0 + ? std::max(int64_t(0), contentSize + first_) + : first_; + + const int64_t resolved_last = std::min(contentSize-1, last_); + + if ( resolved_first > resolved_last ) + return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1); + + return ByteRange(RESOLVED_PARTIAL_CONTENT, resolved_first, resolved_last); +} + +} // namespace kiwix diff --git a/src/server/byte_range.h b/src/server/byte_range.h new file mode 100644 index 000000000..bba0ccb2f --- /dev/null +++ b/src/server/byte_range.h @@ -0,0 +1,86 @@ +/* + * Copyright 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + + +#ifndef KIWIXLIB_SERVER_BYTE_RANGE_H +#define KIWIXLIB_SERVER_BYTE_RANGE_H + +#include +#include + +namespace kiwix { + +class ByteRange +{ + public: // types + // ByteRange is parsed in a request, then it must be resolved (taking + // into account the actual size of the requested resource) before + // being applied in the response. + // The Kind enum represents possible states in such a lifecycle. + enum Kind { + // The request is not a range request (no Range header) + NONE, + + // The value of the Range header is not a valid continuous + // range. Note that a valid (according to RFC7233) sequence of multiple + // byte ranges is considered invalid in the current implementation + // (i.e. only single-range partial requests are supported). + INVALID, + + // This byte-range has been successfully parsed from the request + PARSED, + + // This is a response to a regular (non-range) request + RESOLVED_FULL_CONTENT, + + // The range request is invalid or unsatisfiable + RESOLVED_UNSATISFIABLE, + + // This is a response to a (satisfiable) range request + RESOLVED_PARTIAL_CONTENT, + }; + + public: // functions + // Constructs a ByteRange object of NONE kind + ByteRange(); + + // Constructs a ByteRange object of the given kind (except NONE) + ByteRange(Kind kind, int64_t first, int64_t last); + + // Constructs a ByteRange object of PARSED kind corresponding to a + // range request of the form "Range: bytes=-suffix_length" + explicit ByteRange(int64_t suffix_length); + + Kind kind() const { return kind_; } + int64_t first() const; + int64_t last() const; + int64_t length() const; + + static ByteRange parse(const std::string& rangeStr); + ByteRange resolve(int64_t contentSize) const; + + private: // data + Kind kind_; + int64_t first_; + int64_t last_; +}; + +} // namespace kiwix + +#endif //KIWIXLIB_SERVER_BYTE_RANGE_H diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 37d490bf9..9ed28be84 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -74,8 +74,7 @@ RequestContext::RequestContext(struct MHD_Connection* connection, version(version), requestIndex(s_requestIndex++), acceptEncodingDeflate(false), - accept_range(false), - range_pair(0, -1) + byteRange_() { MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this); MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this); @@ -85,33 +84,14 @@ RequestContext::RequestContext(struct MHD_Connection* connection, (get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos); } catch (const std::out_of_range&) {} - /*Check if range is requested. */ try { - auto range = get_header(MHD_HTTP_HEADER_RANGE); - int start = 0; - int end = -1; - std::istringstream iss(range); - char c; - - iss >> start >> c; - if (iss.good() && c=='-') { - iss >> end; - if (iss.fail()) { - // Something went wrong will extracting. - end = -1; - } - if (iss.eof()) { - accept_range = true; - range_pair = std::pair(start, end); - } - } + byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE)); } catch (const std::out_of_range&) {} } RequestContext::~RequestContext() {} - int RequestContext::fill_header(void *__this, enum MHD_ValueKind kind, const char *key, const char *value) { @@ -146,7 +126,7 @@ void RequestContext::print_debug_info() const { printf("full_url: %s\n", full_url.c_str()); printf("url : %s\n", url.c_str()); printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate); - printf("has_range : %d\n", accept_range); + printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE); printf("is_valid_url : %d\n", is_valid_url()); printf(".............\n"); } @@ -188,12 +168,8 @@ bool RequestContext::is_valid_url() const { return !url.empty(); } -bool RequestContext::has_range() const { - return accept_range; -} - -std::pair RequestContext::get_range() const { - return range_pair; +ByteRange RequestContext::get_range() const { + return byteRange_; } template<> diff --git a/src/server/request_context.h b/src/server/request_context.h index d58040f07..4860fcf6e 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -27,6 +27,8 @@ #include #include +#include "byte_range.h" + extern "C" { #include } @@ -51,9 +53,6 @@ class IndexError: public std::runtime_error {}; class RequestContext { - public: // types - typedef std::pair ByteRange; - public: // functions RequestContext(struct MHD_Connection* connection, std::string rootLocation, @@ -81,7 +80,6 @@ class RequestContext { std::string get_url_part(int part) const; std::string get_full_url() const; - bool has_range() const; ByteRange get_range() const; bool can_compress() const { return acceptEncodingDeflate; } @@ -95,8 +93,7 @@ class RequestContext { bool acceptEncodingDeflate; - bool accept_range; - ByteRange range_pair; + ByteRange byteRange_; std::map headers; std::map arguments; diff --git a/src/server/response.cpp b/src/server/response.cpp index cdd723a26..2f76ed15e 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -39,12 +39,6 @@ bool is_compressible_mime_type(const std::string& mimeType) || mimeType.find("application/json") != string::npos; } -int get_range_len(const kiwix::Entry& entry, RequestContext::ByteRange range) -{ - return range.second == -1 - ? entry.getSize() - range.first - : range.second - range.first; -} } // unnamed namespace @@ -58,9 +52,7 @@ Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool m_withLibraryButton(withLibraryButton), m_blockExternalLinks(blockExternalLinks), m_addTaskbar(false), - m_bookName(""), - m_startRange(0), - m_lenRange(0) + m_bookName("") { } @@ -177,6 +169,20 @@ Response::can_compress(const RequestContext& request) const && (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE); } +MHD_Response* +Response::create_error_response(const RequestContext& request) const +{ + MHD_Response* response = MHD_create_response_from_buffer(0, NULL, MHD_RESPMEM_PERSISTENT); + if ( m_returnCode == 416 ) { + std::ostringstream oss; + oss << "bytes */" << m_byteRange.length(); + + MHD_add_response_header(response, + MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str()); + } + return response; +} + MHD_Response* Response::create_raw_content_mhd_response(const RequestContext& request) { @@ -240,23 +246,26 @@ Response::create_redirection_mhd_response() const MHD_Response* Response::create_entry_mhd_response() const { - MHD_Response* response = MHD_create_response_from_callback(m_entry.getSize(), + const auto content_length = m_byteRange.length(); + MHD_Response* response = MHD_create_response_from_callback(content_length, 16384, callback_reader_from_entry, - new RunningResponse(m_entry, m_startRange), + new RunningResponse(m_entry, m_byteRange.first()), callback_free_response); MHD_add_response_header(response, MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str()); MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes"); - std::ostringstream oss; - oss << "bytes " << m_startRange << "-" << m_startRange + m_lenRange - 1 - << "/" << m_entry.getSize(); + if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) { + std::ostringstream oss; + oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last() + << "/" << m_entry.getSize(); + + MHD_add_response_header(response, + MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str()); + } MHD_add_response_header(response, - MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str()); - - MHD_add_response_header(response, - MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(m_lenRange).c_str()); + MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(content_length).c_str()); return response; } @@ -264,6 +273,9 @@ MHD_Response* Response::create_mhd_response(const RequestContext& request) { switch (m_mode) { + case ResponseMode::ERROR_RESPONSE: + return create_error_response(request); + case ResponseMode::RAW_CONTENT : return create_raw_content_mhd_response(request); @@ -280,14 +292,16 @@ int Response::send(const RequestContext& request, MHD_Connection* connection) { MHD_Response* response = create_mhd_response(request); - MHD_add_response_header(response, "Access-Control-Allow-Origin", "*"); - MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, - m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate"); - const std::string etag = m_etag.get_etag(); - if ( ! etag.empty() ) - MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str()); + if ( m_mode != ResponseMode::ERROR_RESPONSE ) { + MHD_add_response_header(response, "Access-Control-Allow-Origin", "*"); + MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL, + m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate"); + const std::string etag = m_etag.get_etag(); + if ( ! etag.empty() ) + MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str()); + } - if (m_returnCode == MHD_HTTP_OK && request.has_range()) + if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT) m_returnCode = MHD_HTTP_PARTIAL_CONTENT; if (m_verbose) @@ -321,16 +335,18 @@ void Response::set_entry(const Entry& entry, const RequestContext& request) { set_mimeType(mimeType); set_cacheable(); - if ( is_compressible_mime_type(mimeType) ) { + m_byteRange = request.get_range().resolve(entry.getSize()); + const bool noRange = m_byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT; + if ( noRange && is_compressible_mime_type(mimeType) ) { zim::Blob raw_content = entry.getBlob(); const std::string content = string(raw_content.data(), raw_content.size()); set_content(content); set_compress(true); - } else { - const int range_len = get_range_len(entry, request.get_range()); - set_range_first(request.get_range().first); - set_range_len(range_len); + } else if ( m_byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE ) { + set_code(416); + set_content(""); + m_mode = ResponseMode::ERROR_RESPONSE; } } diff --git a/src/server/response.h b/src/server/response.h index bd15f9543..49e346c02 100644 --- a/src/server/response.h +++ b/src/server/response.h @@ -24,6 +24,7 @@ #include #include +#include "byte_range.h" #include "entry.h" #include "etag.h" @@ -34,6 +35,7 @@ extern "C" { namespace kiwix { enum class ResponseMode { + ERROR_RESPONSE, RAW_CONTENT, REDIRECTION, ENTRY @@ -61,8 +63,6 @@ class Response { void set_etag(const ETag& etag) { m_etag = etag; } void set_compress(bool compress) { m_compress = compress; } void set_taskbar(const std::string& bookName, const std::string& bookTitle); - void set_range_first(uint64_t start) { m_startRange = start; } - void set_range_len(uint64_t len) { m_lenRange = len; } int getReturnCode() const { return m_returnCode; } std::string get_mimeType() const { return m_mimeType; } @@ -74,6 +74,7 @@ class Response { private: // functions MHD_Response* create_mhd_response(const RequestContext& request); + MHD_Response* create_error_response(const RequestContext& request) const; MHD_Response* create_raw_content_mhd_response(const RequestContext& request); MHD_Response* create_redirection_mhd_response() const; MHD_Response* create_entry_mhd_response() const; @@ -93,8 +94,7 @@ class Response { bool m_addTaskbar; std::string m_bookName; std::string m_bookTitle; - uint64_t m_startRange; - uint64_t m_lenRange; + ByteRange m_byteRange; ETag m_etag; }; diff --git a/test/server.cpp b/test/server.cpp index 6ed18e409..16718b5dc 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -408,3 +408,89 @@ TEST_F(ServerTest, IfNoneMatchRequestsWithMismatchingETagResultIn200Responses) EXPECT_EQ(200, g2->status); } } + +TEST_F(ServerTest, ValidSingleRangeByteRangeRequestsAreHandledProperly) +{ + const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg"; + const auto full = zfs1_->GET(url); + EXPECT_FALSE(full->has_header("Content-Range")); + EXPECT_EQ("bytes", full->get_header_value("Accept-Ranges")); + + { + const auto p = zfs1_->GET(url, { {"Range", "bytes=0-100000"} } ); + EXPECT_EQ(206, p->status); + EXPECT_EQ(full->body, p->body); + EXPECT_EQ("bytes 0-20076/20077", p->get_header_value("Content-Range")); + EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges")); + } + + { + const auto p = zfs1_->GET(url, { {"Range", "bytes=0-10"} } ); + EXPECT_EQ(206, p->status); + EXPECT_EQ("bytes 0-10/20077", p->get_header_value("Content-Range")); + EXPECT_EQ(11, p->body.size()); + EXPECT_EQ(full->body.substr(0, 11), p->body); + EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges")); + } + + { + const auto p = zfs1_->GET(url, { {"Range", "bytes=123-456"} } ); + EXPECT_EQ(206, p->status); + EXPECT_EQ("bytes 123-456/20077", p->get_header_value("Content-Range")); + EXPECT_EQ(334, p->body.size()); + EXPECT_EQ(full->body.substr(123, 334), p->body); + EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges")); + } + + { + const auto p = zfs1_->GET(url, { {"Range", "bytes=20000-"} } ); + EXPECT_EQ(206, p->status); + EXPECT_EQ(full->body.substr(20000), p->body); + EXPECT_EQ("bytes 20000-20076/20077", p->get_header_value("Content-Range")); + EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges")); + } + + { + const auto p = zfs1_->GET(url, { {"Range", "bytes=-100"} } ); + EXPECT_EQ(206, p->status); + EXPECT_EQ(full->body.substr(19977), p->body); + EXPECT_EQ("bytes 19977-20076/20077", p->get_header_value("Content-Range")); + EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges")); + } +} + +TEST_F(ServerTest, InvalidAndMultiRangeByteRangeRequestsResultIn416Responses) +{ + const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg"; + + const char* invalidRanges[] = { + "0-10", "bytes=", "bytes=123", "bytes=-10-20", "bytes=10-20xxx", + "bytes=10-0", // reversed range + "bytes=10-20, 30-40", // multi-range + "bytes=1000000-", "bytes=30000-30100" // unsatisfiable ranges + }; + + for( const char* range : invalidRanges ) + { + const TestContext ctx{ {"Range", range} }; + const auto p = zfs1_->GET(url, { {"Range", range } } ); + EXPECT_EQ(416, p->status) << ctx; + EXPECT_TRUE(p->body.empty()) << ctx; + EXPECT_EQ("bytes */20077", p->get_header_value("Content-Range")) << ctx; + } +} + +TEST_F(ServerTest, RangeHasPrecedenceOverCompression) +{ + const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg"; + + const Headers onlyRange{ {"Range", "bytes=123-456"} }; + Headers rangeAndCompression(onlyRange); + rangeAndCompression.insert({"Accept-Encoding", "deflate"}); + + const auto p1 = zfs1_->GET(url, onlyRange); + const auto p2 = zfs1_->GET(url, rangeAndCompression); + EXPECT_EQ(p1->status, p2->status); + EXPECT_EQ(invariantHeaders(p1->headers), invariantHeaders(p2->headers)); + EXPECT_EQ(p1->body, p2->body); +}