mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #849 from kiwix/backend_userlang_control
This commit is contained in:
commit
a10067e6b6
|
@ -70,6 +70,14 @@ public: // functions
|
|||
return s;
|
||||
}
|
||||
|
||||
size_t getStringCount(const std::string& lang) const {
|
||||
try {
|
||||
return lang2TableMap.at(lang)->entryCount;
|
||||
} catch(const std::out_of_range&) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private: // functions
|
||||
const I18nStringTable* getStringsFor(const std::string& lang) const {
|
||||
try {
|
||||
|
@ -84,13 +92,17 @@ private: // data
|
|||
const I18nStringTable* enStrings;
|
||||
};
|
||||
|
||||
const I18nStringDB& getStringDb()
|
||||
{
|
||||
static const I18nStringDB stringDb;
|
||||
return stringDb;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
std::string getTranslatedString(const std::string& lang, const std::string& key)
|
||||
{
|
||||
static const I18nStringDB stringDb;
|
||||
|
||||
return stringDb.get(lang, key);
|
||||
return getStringDb().get(lang, key);
|
||||
}
|
||||
|
||||
namespace i18n
|
||||
|
@ -111,4 +123,70 @@ std::string ParameterizedMessage::getText(const std::string& lang) const
|
|||
return i18n::expandParameterizedString(lang, msgId, params);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
LangPreference parseSingleLanguagePreference(const std::string& s)
|
||||
{
|
||||
const size_t langStart = s.find_first_not_of(" \t\n");
|
||||
if ( langStart == std::string::npos ) {
|
||||
return {"", 0};
|
||||
}
|
||||
|
||||
const size_t langEnd = s.find(';', langStart);
|
||||
if ( langEnd == std::string::npos ) {
|
||||
return {s.substr(langStart), 1};
|
||||
}
|
||||
|
||||
const std::string lang = s.substr(langStart, langEnd - langStart);
|
||||
// We don't care about langEnd == langStart which will result in an empty
|
||||
// language name - it will be dismissed by parseUserLanguagePreferences()
|
||||
|
||||
float q = 1.0;
|
||||
int nCharsScanned;
|
||||
if ( 1 == sscanf(s.c_str() + langEnd + 1, "q=%f%n", &q, &nCharsScanned)
|
||||
&& langEnd + 1 + nCharsScanned == s.size() ) {
|
||||
return {lang, q};
|
||||
}
|
||||
|
||||
return {"", 0};
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
UserLangPreferences parseUserLanguagePreferences(const std::string& s)
|
||||
{
|
||||
UserLangPreferences result;
|
||||
std::istringstream iss(s);
|
||||
std::string singleLangPrefStr;
|
||||
while ( std::getline(iss, singleLangPrefStr, ',') )
|
||||
{
|
||||
const auto langPref = parseSingleLanguagePreference(singleLangPrefStr);
|
||||
if ( !langPref.lang.empty() && langPref.preference > 0 ) {
|
||||
result.push_back(langPref);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs)
|
||||
{
|
||||
if ( prefs.empty() ) {
|
||||
return "en";
|
||||
}
|
||||
|
||||
std::string bestLangSoFar("en");
|
||||
float bestScoreSoFar = 0;
|
||||
const auto& stringDb = getStringDb();
|
||||
for ( const auto& entry : prefs ) {
|
||||
const float score = entry.preference * stringDb.getStringCount(entry.lang);
|
||||
if ( score > bestScoreSoFar ) {
|
||||
bestScoreSoFar = score;
|
||||
bestLangSoFar = entry.lang;
|
||||
}
|
||||
}
|
||||
return bestLangSoFar;
|
||||
}
|
||||
|
||||
} // namespace kiwix
|
||||
|
|
|
@ -89,6 +89,18 @@ private: // data
|
|||
const Parameters params;
|
||||
};
|
||||
|
||||
struct LangPreference
|
||||
{
|
||||
const std::string lang;
|
||||
const float preference;
|
||||
};
|
||||
|
||||
typedef std::vector<LangPreference> UserLangPreferences;
|
||||
|
||||
UserLangPreferences parseUserLanguagePreferences(const std::string& s);
|
||||
|
||||
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs);
|
||||
|
||||
} // namespace kiwix
|
||||
|
||||
#endif // KIWIX_SERVER_I18N
|
||||
|
|
|
@ -25,8 +25,10 @@
|
|||
#include <sstream>
|
||||
#include <cstdio>
|
||||
#include <atomic>
|
||||
#include <cctype>
|
||||
|
||||
#include "tools/stringTools.h"
|
||||
#include "i18n.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
|
@ -80,6 +82,7 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
|
|||
{
|
||||
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
|
||||
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
|
||||
MHD_get_connection_values(connection, MHD_COOKIE_KIND, &RequestContext::fill_cookie, this);
|
||||
|
||||
try {
|
||||
acceptEncodingGzip =
|
||||
|
@ -89,6 +92,8 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
|
|||
try {
|
||||
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
|
||||
} catch (const std::out_of_range&) {}
|
||||
|
||||
userlang = determine_user_language();
|
||||
}
|
||||
|
||||
RequestContext::~RequestContext()
|
||||
|
@ -118,6 +123,14 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
|
|||
return MHD_YES;
|
||||
}
|
||||
|
||||
MHD_Result RequestContext::fill_cookie(void *__this, enum MHD_ValueKind kind,
|
||||
const char *key, const char* value)
|
||||
{
|
||||
RequestContext *_this = static_cast<RequestContext*>(__this);
|
||||
_this->cookies[key] = value == nullptr ? "" : value;
|
||||
return MHD_YES;
|
||||
}
|
||||
|
||||
void RequestContext::print_debug_info() const {
|
||||
printf("method : %s (%d)\n", method==RequestMethod::GET ? "GET" :
|
||||
method==RequestMethod::POST ? "POST" :
|
||||
|
@ -198,13 +211,24 @@ std::string RequestContext::get_header(const std::string& name) const {
|
|||
}
|
||||
|
||||
std::string RequestContext::get_user_language() const
|
||||
{
|
||||
return userlang;
|
||||
}
|
||||
|
||||
std::string RequestContext::determine_user_language() const
|
||||
{
|
||||
try {
|
||||
return get_argument("userlang");
|
||||
} catch(const std::out_of_range&) {}
|
||||
|
||||
try {
|
||||
return get_header("Accept-Language");
|
||||
return cookies.at("userlang");
|
||||
} catch(const std::out_of_range&) {}
|
||||
|
||||
try {
|
||||
const std::string acceptLanguage = get_header("Accept-Language");
|
||||
const auto userLangPrefs = parseUserLanguagePreferences(acceptLanguage);
|
||||
return selectMostSuitableLanguage(userLangPrefs);
|
||||
} catch(const std::out_of_range&) {}
|
||||
|
||||
return "en";
|
||||
|
|
|
@ -130,10 +130,15 @@ class RequestContext {
|
|||
ByteRange byteRange_;
|
||||
std::map<std::string, std::string> headers;
|
||||
std::map<std::string, std::vector<std::string>> arguments;
|
||||
std::map<std::string, std::string> cookies;
|
||||
std::string queryString;
|
||||
std::string userlang;
|
||||
|
||||
private: // functions
|
||||
std::string determine_user_language() const;
|
||||
|
||||
static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*);
|
||||
static MHD_Result fill_cookie(void *, enum MHD_ValueKind, const char*, const char*);
|
||||
static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
|
||||
};
|
||||
|
||||
|
|
|
@ -387,6 +387,9 @@ MHD_Result Response::send(const RequestContext& request, MHD_Connection* connect
|
|||
MHD_add_response_header(response, p.first.c_str(), p.second.c_str());
|
||||
}
|
||||
|
||||
const std::string cookie = "userlang=" + request.get_user_language();
|
||||
MHD_add_response_header(response, MHD_HTTP_HEADER_SET_COOKIE, cookie.c_str());
|
||||
|
||||
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
|
||||
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "gtest/gtest.h"
|
||||
#include "../src/tools/otherTools.h"
|
||||
#include "zim/suggestion_iterator.h"
|
||||
#include "../src/server/i18n.h"
|
||||
|
||||
#include <regex>
|
||||
|
||||
|
@ -172,3 +173,63 @@ R"EXPECTEDJSON([
|
|||
)EXPECTEDJSON"
|
||||
);
|
||||
}
|
||||
|
||||
std::string toString(const kiwix::LangPreference& x)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "{" << x.lang << ", " << x.preference << "}";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
std::string toString(const kiwix::UserLangPreferences& prefs) {
|
||||
std::ostringstream oss;
|
||||
for ( const auto& x : prefs )
|
||||
oss << toString(x);
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
TEST(I18n, parseUserLanguagePreferences)
|
||||
{
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("")),
|
||||
""
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("*")),
|
||||
"{*, 1}"
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr")),
|
||||
"{fr, 1}"
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH")),
|
||||
"{fr-CH, 1}"
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr, en-US")),
|
||||
"{fr, 1}{en-US, 1}"
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5")),
|
||||
"{ru, 0.5}"
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH,ru;q=0.5")),
|
||||
"{fr-CH, 1}{ru, 0.5}"
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5, *;q=0.1")),
|
||||
"{ru, 0.5}{*, 0.1}"
|
||||
);
|
||||
|
||||
// rejected input
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;")),
|
||||
""
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q")),
|
||||
""
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=")),
|
||||
""
|
||||
);
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;0.8")),
|
||||
""
|
||||
);
|
||||
|
||||
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr,ru;0.8,en;q=0.5")),
|
||||
"{fr, 1}{en, 0.5}"
|
||||
);
|
||||
}
|
||||
|
|
103
test/server.cpp
103
test/server.cpp
|
@ -976,58 +976,145 @@ TEST_F(ServerTest, UserLanguageControl)
|
|||
{
|
||||
struct TestData
|
||||
{
|
||||
const std::string description;
|
||||
const std::string url;
|
||||
const std::string acceptLanguageHeader;
|
||||
const char* const requestCookie; // Cookie: header of the request
|
||||
const char* const responseSetCookie; // Set-Cookie: header of the response
|
||||
const std::string expectedH1;
|
||||
|
||||
operator TestContext() const
|
||||
{
|
||||
return TestContext{
|
||||
TestContext ctx{
|
||||
{"description", description},
|
||||
{"url", url},
|
||||
{"acceptLanguageHeader", acceptLanguageHeader},
|
||||
};
|
||||
|
||||
if ( requestCookie ) {
|
||||
ctx.push_back({"requestCookie", requestCookie});
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
};
|
||||
|
||||
const char* const NO_COOKIE = nullptr;
|
||||
|
||||
const TestData testData[] = {
|
||||
{
|
||||
"Default user language is English",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=en",
|
||||
/* expected <h1> */ "Not Found"
|
||||
},
|
||||
{
|
||||
"userlang URL query parameter is respected",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=en",
|
||||
/* expected <h1> */ "Not Found"
|
||||
},
|
||||
{
|
||||
"userlang URL query parameter is respected",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=test",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
"'Accept-Language: *' is handled",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "*",
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=en",
|
||||
/* expected <h1> */ "Not Found"
|
||||
},
|
||||
{
|
||||
"Accept-Language: header is respected",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "test",
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
// userlang query parameter takes precedence over Accept-Language
|
||||
"userlang cookie is respected",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ "userlang=test",
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
"userlang cookie is correctly parsed",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ "anothercookie=123; userlang=test",
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
"userlang cookie is correctly parsed",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ "userlang=test; anothercookie=abc",
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
"userlang cookie is correctly parsed",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ "cookie1=abc; userlang=test; cookie2=xyz",
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
"Multiple userlang cookies are not a problem",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ "cookie1=abc; userlang=en; userlang=test; cookie2=xyz",
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
{
|
||||
"userlang query parameter takes precedence over Accept-Language",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en",
|
||||
/*Accept-Language:*/ "test",
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=en",
|
||||
/* expected <h1> */ "Not Found"
|
||||
},
|
||||
{
|
||||
// The value of the Accept-Language header is not currently parsed.
|
||||
"userlang query parameter takes precedence over its cookie counterpart",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en",
|
||||
/*Accept-Language:*/ "",
|
||||
/*Request Cookie:*/ "userlang=test",
|
||||
/*Response Set-Cookie:*/ "userlang=en",
|
||||
/* expected <h1> */ "Not Found"
|
||||
},
|
||||
{
|
||||
"userlang in cookies takes precedence over Accept-Language",
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "test",
|
||||
/*Request Cookie:*/ "userlang=en",
|
||||
/*Response Set-Cookie:*/ "userlang=en",
|
||||
/* expected <h1> */ "Not Found"
|
||||
},
|
||||
{
|
||||
"The value of the Accept-Language header is not currently parsed.",
|
||||
// In case of a comma separated list of languages (optionally weighted
|
||||
// with quality values) the default (en) language is used instead.
|
||||
/*url*/ "/ROOT/content/zimfile/invalid-article",
|
||||
/*Accept-Language:*/ "test;q=0.9, en;q=0.2",
|
||||
/* expected <h1> */ "Not Found"
|
||||
/*Request Cookie:*/ NO_COOKIE,
|
||||
/*Response Set-Cookie:*/ "userlang=test",
|
||||
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -1038,7 +1125,15 @@ TEST_F(ServerTest, UserLanguageControl)
|
|||
if ( !t.acceptLanguageHeader.empty() ) {
|
||||
headers.insert({"Accept-Language", t.acceptLanguageHeader});
|
||||
}
|
||||
if ( t.requestCookie ) {
|
||||
headers.insert({"Cookie", t.requestCookie});
|
||||
}
|
||||
const auto r = zfs1_->GET(t.url.c_str(), headers);
|
||||
if ( t.responseSetCookie ) {
|
||||
EXPECT_EQ(t.responseSetCookie, getHeaderValue(r->headers, "Set-Cookie")) << t;
|
||||
} else {
|
||||
EXPECT_FALSE(r->has_header("Set-Cookie"));
|
||||
}
|
||||
std::regex_search(r->body, h1Match, h1Regex);
|
||||
const std::string h1(h1Match[1]);
|
||||
EXPECT_EQ(h1, t.expectedH1) << t;
|
||||
|
|
Loading…
Reference in New Issue