Merge pull request #360 from kiwix/http_byte_range

This commit is contained in:
Matthieu Gautier 2020-06-01 17:41:59 +02:00 committed by GitHub
commit 7dcaeed33a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 357 additions and 69 deletions

View File

@ -21,6 +21,7 @@ kiwix_sources = [
'tools/otherTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/byte_range.cpp',
'server/etag.cpp',
'server/request_context.cpp',
'server/response.cpp'

126
src/server/byte_range.cpp Normal file
View File

@ -0,0 +1,126 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "byte_range.h"
#include "tools/stringTools.h"
#include <cassert>
namespace kiwix {
namespace {
ByteRange parseByteRange(const std::string& rangeStr)
{
std::istringstream iss(rangeStr);
int64_t start, end = INT64_MAX;
if (iss >> start) {
if ( start < 0 ) {
if ( iss.eof() )
return ByteRange(-start);
} else {
char c;
if (iss >> c && c=='-') {
iss >> end; // if this fails, end is not modified, which is OK
if (iss.eof() && start <= end)
return ByteRange(ByteRange::PARSED, start, end);
}
}
}
return ByteRange(ByteRange::INVALID, 0, INT64_MAX);
}
} // unnamed namespace
ByteRange::ByteRange()
: kind_(NONE)
, first_(0)
, last_(INT64_MAX)
{}
ByteRange::ByteRange(Kind kind, int64_t first, int64_t last)
: kind_(kind)
, first_(first)
, last_(last)
{
assert(kind != NONE);
assert(first >= 0);
assert(last >= first);
}
ByteRange::ByteRange(int64_t suffix_length)
: kind_(PARSED)
, first_(-suffix_length)
, last_(INT64_MAX)
{
assert(suffix_length > 0);
}
int64_t ByteRange::first() const
{
assert(kind_ > PARSED);
return first_;
}
int64_t ByteRange::last() const
{
assert(kind_ > PARSED);
return last_;
}
int64_t ByteRange::length() const
{
assert(kind_ > PARSED);
return last_ + 1 - first_;
}
ByteRange ByteRange::parse(const std::string& rangeStr)
{
const std::string byteUnitSpec("bytes=");
if ( ! kiwix::startsWith(rangeStr, byteUnitSpec) )
return ByteRange(INVALID, 0, INT64_MAX);
return parseByteRange(rangeStr.substr(byteUnitSpec.size()));
}
ByteRange ByteRange::resolve(int64_t contentSize) const
{
if ( kind() == NONE )
return ByteRange(RESOLVED_FULL_CONTENT, 0, contentSize-1);
if ( kind() == INVALID )
return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1);
const int64_t resolved_first = first_ < 0
? std::max(int64_t(0), contentSize + first_)
: first_;
const int64_t resolved_last = std::min(contentSize-1, last_);
if ( resolved_first > resolved_last )
return ByteRange(RESOLVED_UNSATISFIABLE, 0, contentSize-1);
return ByteRange(RESOLVED_PARTIAL_CONTENT, resolved_first, resolved_last);
}
} // namespace kiwix

86
src/server/byte_range.h Normal file
View File

@ -0,0 +1,86 @@
/*
* Copyright 2020 Veloman Yunkan <veloman.yunkan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIXLIB_SERVER_BYTE_RANGE_H
#define KIWIXLIB_SERVER_BYTE_RANGE_H
#include <cstdint>
#include <string>
namespace kiwix {
class ByteRange
{
public: // types
// ByteRange is parsed in a request, then it must be resolved (taking
// into account the actual size of the requested resource) before
// being applied in the response.
// The Kind enum represents possible states in such a lifecycle.
enum Kind {
// The request is not a range request (no Range header)
NONE,
// The value of the Range header is not a valid continuous
// range. Note that a valid (according to RFC7233) sequence of multiple
// byte ranges is considered invalid in the current implementation
// (i.e. only single-range partial requests are supported).
INVALID,
// This byte-range has been successfully parsed from the request
PARSED,
// This is a response to a regular (non-range) request
RESOLVED_FULL_CONTENT,
// The range request is invalid or unsatisfiable
RESOLVED_UNSATISFIABLE,
// This is a response to a (satisfiable) range request
RESOLVED_PARTIAL_CONTENT,
};
public: // functions
// Constructs a ByteRange object of NONE kind
ByteRange();
// Constructs a ByteRange object of the given kind (except NONE)
ByteRange(Kind kind, int64_t first, int64_t last);
// Constructs a ByteRange object of PARSED kind corresponding to a
// range request of the form "Range: bytes=-suffix_length"
explicit ByteRange(int64_t suffix_length);
Kind kind() const { return kind_; }
int64_t first() const;
int64_t last() const;
int64_t length() const;
static ByteRange parse(const std::string& rangeStr);
ByteRange resolve(int64_t contentSize) const;
private: // data
Kind kind_;
int64_t first_;
int64_t last_;
};
} // namespace kiwix
#endif //KIWIXLIB_SERVER_BYTE_RANGE_H

View File

@ -74,8 +74,7 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
version(version),
requestIndex(s_requestIndex++),
acceptEncodingDeflate(false),
accept_range(false),
range_pair(0, -1)
byteRange_()
{
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
@ -85,33 +84,14 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
(get_header(MHD_HTTP_HEADER_ACCEPT_ENCODING).find("deflate") != std::string::npos);
} catch (const std::out_of_range&) {}
/*Check if range is requested. */
try {
auto range = get_header(MHD_HTTP_HEADER_RANGE);
int start = 0;
int end = -1;
std::istringstream iss(range);
char c;
iss >> start >> c;
if (iss.good() && c=='-') {
iss >> end;
if (iss.fail()) {
// Something went wrong will extracting.
end = -1;
}
if (iss.eof()) {
accept_range = true;
range_pair = std::pair<int, int>(start, end);
}
}
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
} catch (const std::out_of_range&) {}
}
RequestContext::~RequestContext()
{}
int RequestContext::fill_header(void *__this, enum MHD_ValueKind kind,
const char *key, const char *value)
{
@ -146,7 +126,7 @@ void RequestContext::print_debug_info() const {
printf("full_url: %s\n", full_url.c_str());
printf("url : %s\n", url.c_str());
printf("acceptEncodingDeflate : %d\n", acceptEncodingDeflate);
printf("has_range : %d\n", accept_range);
printf("has_range : %d\n", byteRange_.kind() != ByteRange::NONE);
printf("is_valid_url : %d\n", is_valid_url());
printf(".............\n");
}
@ -188,12 +168,8 @@ bool RequestContext::is_valid_url() const {
return !url.empty();
}
bool RequestContext::has_range() const {
return accept_range;
}
std::pair<int, int> RequestContext::get_range() const {
return range_pair;
ByteRange RequestContext::get_range() const {
return byteRange_;
}
template<>

View File

@ -27,6 +27,8 @@
#include <map>
#include <stdexcept>
#include "byte_range.h"
extern "C" {
#include <microhttpd.h>
}
@ -51,9 +53,6 @@ class IndexError: public std::runtime_error {};
class RequestContext {
public: // types
typedef std::pair<int, int> ByteRange;
public: // functions
RequestContext(struct MHD_Connection* connection,
std::string rootLocation,
@ -81,7 +80,6 @@ class RequestContext {
std::string get_url_part(int part) const;
std::string get_full_url() const;
bool has_range() const;
ByteRange get_range() const;
bool can_compress() const { return acceptEncodingDeflate; }
@ -95,8 +93,7 @@ class RequestContext {
bool acceptEncodingDeflate;
bool accept_range;
ByteRange range_pair;
ByteRange byteRange_;
std::map<std::string, std::string> headers;
std::map<std::string, std::string> arguments;

View File

@ -39,12 +39,6 @@ bool is_compressible_mime_type(const std::string& mimeType)
|| mimeType.find("application/json") != string::npos;
}
int get_range_len(const kiwix::Entry& entry, RequestContext::ByteRange range)
{
return range.second == -1
? entry.getSize() - range.first
: range.second - range.first;
}
} // unnamed namespace
@ -58,9 +52,7 @@ Response::Response(const std::string& root, bool verbose, bool withTaskbar, bool
m_withLibraryButton(withLibraryButton),
m_blockExternalLinks(blockExternalLinks),
m_addTaskbar(false),
m_bookName(""),
m_startRange(0),
m_lenRange(0)
m_bookName("")
{
}
@ -177,6 +169,20 @@ Response::can_compress(const RequestContext& request) const
&& (m_content.size() > KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE);
}
MHD_Response*
Response::create_error_response(const RequestContext& request) const
{
MHD_Response* response = MHD_create_response_from_buffer(0, NULL, MHD_RESPMEM_PERSISTENT);
if ( m_returnCode == 416 ) {
std::ostringstream oss;
oss << "bytes */" << m_byteRange.length();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
return response;
}
MHD_Response*
Response::create_raw_content_mhd_response(const RequestContext& request)
{
@ -240,23 +246,26 @@ Response::create_redirection_mhd_response() const
MHD_Response*
Response::create_entry_mhd_response() const
{
MHD_Response* response = MHD_create_response_from_callback(m_entry.getSize(),
const auto content_length = m_byteRange.length();
MHD_Response* response = MHD_create_response_from_callback(content_length,
16384,
callback_reader_from_entry,
new RunningResponse(m_entry, m_startRange),
new RunningResponse(m_entry, m_byteRange.first()),
callback_free_response);
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType.c_str());
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
std::ostringstream oss;
oss << "bytes " << m_startRange << "-" << m_startRange + m_lenRange - 1
<< "/" << m_entry.getSize();
if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) {
std::ostringstream oss;
oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last()
<< "/" << m_entry.getSize();
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
}
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
MHD_add_response_header(response,
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(m_lenRange).c_str());
MHD_HTTP_HEADER_CONTENT_LENGTH, kiwix::to_string(content_length).c_str());
return response;
}
@ -264,6 +273,9 @@ MHD_Response*
Response::create_mhd_response(const RequestContext& request)
{
switch (m_mode) {
case ResponseMode::ERROR_RESPONSE:
return create_error_response(request);
case ResponseMode::RAW_CONTENT :
return create_raw_content_mhd_response(request);
@ -280,14 +292,16 @@ int Response::send(const RequestContext& request, MHD_Connection* connection)
{
MHD_Response* response = create_mhd_response(request);
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
if ( m_mode != ResponseMode::ERROR_RESPONSE ) {
MHD_add_response_header(response, "Access-Control-Allow-Origin", "*");
MHD_add_response_header(response, MHD_HTTP_HEADER_CACHE_CONTROL,
m_etag.get_option(ETag::CACHEABLE_ENTITY) ? "max-age=2723040, public" : "no-cache, no-store, must-revalidate");
const std::string etag = m_etag.get_etag();
if ( ! etag.empty() )
MHD_add_response_header(response, MHD_HTTP_HEADER_ETAG, etag.c_str());
}
if (m_returnCode == MHD_HTTP_OK && request.has_range())
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;
if (m_verbose)
@ -321,16 +335,18 @@ void Response::set_entry(const Entry& entry, const RequestContext& request) {
set_mimeType(mimeType);
set_cacheable();
if ( is_compressible_mime_type(mimeType) ) {
m_byteRange = request.get_range().resolve(entry.getSize());
const bool noRange = m_byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
if ( noRange && is_compressible_mime_type(mimeType) ) {
zim::Blob raw_content = entry.getBlob();
const std::string content = string(raw_content.data(), raw_content.size());
set_content(content);
set_compress(true);
} else {
const int range_len = get_range_len(entry, request.get_range());
set_range_first(request.get_range().first);
set_range_len(range_len);
} else if ( m_byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE ) {
set_code(416);
set_content("");
m_mode = ResponseMode::ERROR_RESPONSE;
}
}

View File

@ -24,6 +24,7 @@
#include <string>
#include <mustache.hpp>
#include "byte_range.h"
#include "entry.h"
#include "etag.h"
@ -34,6 +35,7 @@ extern "C" {
namespace kiwix {
enum class ResponseMode {
ERROR_RESPONSE,
RAW_CONTENT,
REDIRECTION,
ENTRY
@ -61,8 +63,6 @@ class Response {
void set_etag(const ETag& etag) { m_etag = etag; }
void set_compress(bool compress) { m_compress = compress; }
void set_taskbar(const std::string& bookName, const std::string& bookTitle);
void set_range_first(uint64_t start) { m_startRange = start; }
void set_range_len(uint64_t len) { m_lenRange = len; }
int getReturnCode() const { return m_returnCode; }
std::string get_mimeType() const { return m_mimeType; }
@ -74,6 +74,7 @@ class Response {
private: // functions
MHD_Response* create_mhd_response(const RequestContext& request);
MHD_Response* create_error_response(const RequestContext& request) const;
MHD_Response* create_raw_content_mhd_response(const RequestContext& request);
MHD_Response* create_redirection_mhd_response() const;
MHD_Response* create_entry_mhd_response() const;
@ -93,8 +94,7 @@ class Response {
bool m_addTaskbar;
std::string m_bookName;
std::string m_bookTitle;
uint64_t m_startRange;
uint64_t m_lenRange;
ByteRange m_byteRange;
ETag m_etag;
};

View File

@ -408,3 +408,89 @@ TEST_F(ServerTest, IfNoneMatchRequestsWithMismatchingETagResultIn200Responses)
EXPECT_EQ(200, g2->status);
}
}
TEST_F(ServerTest, ValidSingleRangeByteRangeRequestsAreHandledProperly)
{
const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg";
const auto full = zfs1_->GET(url);
EXPECT_FALSE(full->has_header("Content-Range"));
EXPECT_EQ("bytes", full->get_header_value("Accept-Ranges"));
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=0-100000"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ(full->body, p->body);
EXPECT_EQ("bytes 0-20076/20077", p->get_header_value("Content-Range"));
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=0-10"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ("bytes 0-10/20077", p->get_header_value("Content-Range"));
EXPECT_EQ(11, p->body.size());
EXPECT_EQ(full->body.substr(0, 11), p->body);
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=123-456"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ("bytes 123-456/20077", p->get_header_value("Content-Range"));
EXPECT_EQ(334, p->body.size());
EXPECT_EQ(full->body.substr(123, 334), p->body);
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=20000-"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ(full->body.substr(20000), p->body);
EXPECT_EQ("bytes 20000-20076/20077", p->get_header_value("Content-Range"));
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
{
const auto p = zfs1_->GET(url, { {"Range", "bytes=-100"} } );
EXPECT_EQ(206, p->status);
EXPECT_EQ(full->body.substr(19977), p->body);
EXPECT_EQ("bytes 19977-20076/20077", p->get_header_value("Content-Range"));
EXPECT_EQ("bytes", p->get_header_value("Accept-Ranges"));
}
}
TEST_F(ServerTest, InvalidAndMultiRangeByteRangeRequestsResultIn416Responses)
{
const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg";
const char* invalidRanges[] = {
"0-10", "bytes=", "bytes=123", "bytes=-10-20", "bytes=10-20xxx",
"bytes=10-0", // reversed range
"bytes=10-20, 30-40", // multi-range
"bytes=1000000-", "bytes=30000-30100" // unsatisfiable ranges
};
for( const char* range : invalidRanges )
{
const TestContext ctx{ {"Range", range} };
const auto p = zfs1_->GET(url, { {"Range", range } } );
EXPECT_EQ(416, p->status) << ctx;
EXPECT_TRUE(p->body.empty()) << ctx;
EXPECT_EQ("bytes */20077", p->get_header_value("Content-Range")) << ctx;
}
}
TEST_F(ServerTest, RangeHasPrecedenceOverCompression)
{
const char url[] = "/zimfile/I/m/Ray_Charles_classic_piano_pose.jpg";
const Headers onlyRange{ {"Range", "bytes=123-456"} };
Headers rangeAndCompression(onlyRange);
rangeAndCompression.insert({"Accept-Encoding", "deflate"});
const auto p1 = zfs1_->GET(url, onlyRange);
const auto p2 = zfs1_->GET(url, rangeAndCompression);
EXPECT_EQ(p1->status, p2->status);
EXPECT_EQ(invariantHeaders(p1->headers), invariantHeaders(p2->headers));
EXPECT_EQ(p1->body, p2->body);
}