Merge pull request #849 from kiwix/backend_userlang_control

This commit is contained in:
Matthieu Gautier 2022-12-14 15:39:31 +01:00 committed by GitHub
commit a10067e6b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 286 additions and 8 deletions

View File

@ -70,6 +70,14 @@ public: // functions
return s;
}
size_t getStringCount(const std::string& lang) const {
try {
return lang2TableMap.at(lang)->entryCount;
} catch(const std::out_of_range&) {
return 0;
}
}
private: // functions
const I18nStringTable* getStringsFor(const std::string& lang) const {
try {
@ -84,13 +92,17 @@ private: // data
const I18nStringTable* enStrings;
};
const I18nStringDB& getStringDb()
{
static const I18nStringDB stringDb;
return stringDb;
}
} // unnamed namespace
std::string getTranslatedString(const std::string& lang, const std::string& key)
{
static const I18nStringDB stringDb;
return stringDb.get(lang, key);
return getStringDb().get(lang, key);
}
namespace i18n
@ -111,4 +123,70 @@ std::string ParameterizedMessage::getText(const std::string& lang) const
return i18n::expandParameterizedString(lang, msgId, params);
}
namespace
{
LangPreference parseSingleLanguagePreference(const std::string& s)
{
const size_t langStart = s.find_first_not_of(" \t\n");
if ( langStart == std::string::npos ) {
return {"", 0};
}
const size_t langEnd = s.find(';', langStart);
if ( langEnd == std::string::npos ) {
return {s.substr(langStart), 1};
}
const std::string lang = s.substr(langStart, langEnd - langStart);
// We don't care about langEnd == langStart which will result in an empty
// language name - it will be dismissed by parseUserLanguagePreferences()
float q = 1.0;
int nCharsScanned;
if ( 1 == sscanf(s.c_str() + langEnd + 1, "q=%f%n", &q, &nCharsScanned)
&& langEnd + 1 + nCharsScanned == s.size() ) {
return {lang, q};
}
return {"", 0};
}
} // unnamed namespace
UserLangPreferences parseUserLanguagePreferences(const std::string& s)
{
UserLangPreferences result;
std::istringstream iss(s);
std::string singleLangPrefStr;
while ( std::getline(iss, singleLangPrefStr, ',') )
{
const auto langPref = parseSingleLanguagePreference(singleLangPrefStr);
if ( !langPref.lang.empty() && langPref.preference > 0 ) {
result.push_back(langPref);
}
}
return result;
}
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs)
{
if ( prefs.empty() ) {
return "en";
}
std::string bestLangSoFar("en");
float bestScoreSoFar = 0;
const auto& stringDb = getStringDb();
for ( const auto& entry : prefs ) {
const float score = entry.preference * stringDb.getStringCount(entry.lang);
if ( score > bestScoreSoFar ) {
bestScoreSoFar = score;
bestLangSoFar = entry.lang;
}
}
return bestLangSoFar;
}
} // namespace kiwix

View File

@ -89,6 +89,18 @@ private: // data
const Parameters params;
};
struct LangPreference
{
const std::string lang;
const float preference;
};
typedef std::vector<LangPreference> UserLangPreferences;
UserLangPreferences parseUserLanguagePreferences(const std::string& s);
std::string selectMostSuitableLanguage(const UserLangPreferences& prefs);
} // namespace kiwix
#endif // KIWIX_SERVER_I18N

View File

@ -25,8 +25,10 @@
#include <sstream>
#include <cstdio>
#include <atomic>
#include <cctype>
#include "tools/stringTools.h"
#include "i18n.h"
namespace kiwix {
@ -80,6 +82,7 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
{
MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this);
MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this);
MHD_get_connection_values(connection, MHD_COOKIE_KIND, &RequestContext::fill_cookie, this);
try {
acceptEncodingGzip =
@ -89,6 +92,8 @@ RequestContext::RequestContext(struct MHD_Connection* connection,
try {
byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE));
} catch (const std::out_of_range&) {}
userlang = determine_user_language();
}
RequestContext::~RequestContext()
@ -118,6 +123,14 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind,
return MHD_YES;
}
MHD_Result RequestContext::fill_cookie(void *__this, enum MHD_ValueKind kind,
const char *key, const char* value)
{
RequestContext *_this = static_cast<RequestContext*>(__this);
_this->cookies[key] = value == nullptr ? "" : value;
return MHD_YES;
}
void RequestContext::print_debug_info() const {
printf("method : %s (%d)\n", method==RequestMethod::GET ? "GET" :
method==RequestMethod::POST ? "POST" :
@ -198,13 +211,24 @@ std::string RequestContext::get_header(const std::string& name) const {
}
std::string RequestContext::get_user_language() const
{
return userlang;
}
std::string RequestContext::determine_user_language() const
{
try {
return get_argument("userlang");
} catch(const std::out_of_range&) {}
try {
return get_header("Accept-Language");
return cookies.at("userlang");
} catch(const std::out_of_range&) {}
try {
const std::string acceptLanguage = get_header("Accept-Language");
const auto userLangPrefs = parseUserLanguagePreferences(acceptLanguage);
return selectMostSuitableLanguage(userLangPrefs);
} catch(const std::out_of_range&) {}
return "en";

View File

@ -130,10 +130,15 @@ class RequestContext {
ByteRange byteRange_;
std::map<std::string, std::string> headers;
std::map<std::string, std::vector<std::string>> arguments;
std::map<std::string, std::string> cookies;
std::string queryString;
std::string userlang;
private: // functions
std::string determine_user_language() const;
static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*);
static MHD_Result fill_cookie(void *, enum MHD_ValueKind, const char*, const char*);
static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*);
};

View File

@ -387,6 +387,9 @@ MHD_Result Response::send(const RequestContext& request, MHD_Connection* connect
MHD_add_response_header(response, p.first.c_str(), p.second.c_str());
}
const std::string cookie = "userlang=" + request.get_user_language();
MHD_add_response_header(response, MHD_HTTP_HEADER_SET_COOKIE, cookie.c_str());
if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT)
m_returnCode = MHD_HTTP_PARTIAL_CONTENT;

View File

@ -20,6 +20,7 @@
#include "gtest/gtest.h"
#include "../src/tools/otherTools.h"
#include "zim/suggestion_iterator.h"
#include "../src/server/i18n.h"
#include <regex>
@ -172,3 +173,63 @@ R"EXPECTEDJSON([
)EXPECTEDJSON"
);
}
std::string toString(const kiwix::LangPreference& x)
{
std::ostringstream oss;
oss << "{" << x.lang << ", " << x.preference << "}";
return oss.str();
}
std::string toString(const kiwix::UserLangPreferences& prefs) {
std::ostringstream oss;
for ( const auto& x : prefs )
oss << toString(x);
return oss.str();
}
TEST(I18n, parseUserLanguagePreferences)
{
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("")),
""
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("*")),
"{*, 1}"
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr")),
"{fr, 1}"
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH")),
"{fr-CH, 1}"
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr, en-US")),
"{fr, 1}{en-US, 1}"
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5")),
"{ru, 0.5}"
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH,ru;q=0.5")),
"{fr-CH, 1}{ru, 0.5}"
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5, *;q=0.1")),
"{ru, 0.5}{*, 0.1}"
);
// rejected input
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;")),
""
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q")),
""
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=")),
""
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;0.8")),
""
);
EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr,ru;0.8,en;q=0.5")),
"{fr, 1}{en, 0.5}"
);
}

View File

@ -976,58 +976,145 @@ TEST_F(ServerTest, UserLanguageControl)
{
struct TestData
{
const std::string description;
const std::string url;
const std::string acceptLanguageHeader;
const char* const requestCookie; // Cookie: header of the request
const char* const responseSetCookie; // Set-Cookie: header of the response
const std::string expectedH1;
operator TestContext() const
{
return TestContext{
TestContext ctx{
{"description", description},
{"url", url},
{"acceptLanguageHeader", acceptLanguageHeader},
};
if ( requestCookie ) {
ctx.push_back({"requestCookie", requestCookie});
}
return ctx;
}
};
const char* const NO_COOKIE = nullptr;
const TestData testData[] = {
{
"Default user language is English",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "",
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=en",
/* expected <h1> */ "Not Found"
},
{
"userlang URL query parameter is respected",
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en",
/*Accept-Language:*/ "",
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=en",
/* expected <h1> */ "Not Found"
},
{
"userlang URL query parameter is respected",
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=test",
/*Accept-Language:*/ "",
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
"'Accept-Language: *' is handled",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "*",
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=en",
/* expected <h1> */ "Not Found"
},
{
"Accept-Language: header is respected",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "test",
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
// userlang query parameter takes precedence over Accept-Language
"userlang cookie is respected",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "",
/*Request Cookie:*/ "userlang=test",
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
"userlang cookie is correctly parsed",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "",
/*Request Cookie:*/ "anothercookie=123; userlang=test",
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
"userlang cookie is correctly parsed",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "",
/*Request Cookie:*/ "userlang=test; anothercookie=abc",
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
"userlang cookie is correctly parsed",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "",
/*Request Cookie:*/ "cookie1=abc; userlang=test; cookie2=xyz",
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
"Multiple userlang cookies are not a problem",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "",
/*Request Cookie:*/ "cookie1=abc; userlang=en; userlang=test; cookie2=xyz",
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
{
"userlang query parameter takes precedence over Accept-Language",
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en",
/*Accept-Language:*/ "test",
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=en",
/* expected <h1> */ "Not Found"
},
{
// The value of the Accept-Language header is not currently parsed.
"userlang query parameter takes precedence over its cookie counterpart",
/*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en",
/*Accept-Language:*/ "",
/*Request Cookie:*/ "userlang=test",
/*Response Set-Cookie:*/ "userlang=en",
/* expected <h1> */ "Not Found"
},
{
"userlang in cookies takes precedence over Accept-Language",
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "test",
/*Request Cookie:*/ "userlang=en",
/*Response Set-Cookie:*/ "userlang=en",
/* expected <h1> */ "Not Found"
},
{
"The value of the Accept-Language header is not currently parsed.",
// In case of a comma separated list of languages (optionally weighted
// with quality values) the default (en) language is used instead.
/*url*/ "/ROOT/content/zimfile/invalid-article",
/*Accept-Language:*/ "test;q=0.9, en;q=0.2",
/* expected <h1> */ "Not Found"
/*Request Cookie:*/ NO_COOKIE,
/*Response Set-Cookie:*/ "userlang=test",
/* expected <h1> */ "[I18N TESTING] Content not found, but at least the server is alive"
},
};
@ -1038,7 +1125,15 @@ TEST_F(ServerTest, UserLanguageControl)
if ( !t.acceptLanguageHeader.empty() ) {
headers.insert({"Accept-Language", t.acceptLanguageHeader});
}
if ( t.requestCookie ) {
headers.insert({"Cookie", t.requestCookie});
}
const auto r = zfs1_->GET(t.url.c_str(), headers);
if ( t.responseSetCookie ) {
EXPECT_EQ(t.responseSetCookie, getHeaderValue(r->headers, "Set-Cookie")) << t;
} else {
EXPECT_FALSE(r->has_header("Set-Cookie"));
}
std::regex_search(r->body, h1Match, h1Regex);
const std::string h1(h1Match[1]);
EXPECT_EQ(h1, t.expectedH1) << t;