From c0fe6f4aeec21e5f19a6994a7fd91069c2794e52 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 28 Nov 2022 11:42:21 +0400 Subject: [PATCH 1/9] Added cookies to ServerTest.UserLanguageControl --- test/server.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/test/server.cpp b/test/server.cpp index d110707c5..be611ad68 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -978,47 +978,70 @@ TEST_F(ServerTest, UserLanguageControl) { const std::string url; const std::string acceptLanguageHeader; + const char* const requestCookie; // Cookie: header of the request + const char* const responseSetCookie; // Set-Cookie: header of the response const std::string expectedH1; operator TestContext() const { - return TestContext{ + TestContext ctx{ {"url", url}, {"acceptLanguageHeader", acceptLanguageHeader}, }; + + if ( requestCookie ) { + ctx.push_back({"requestCookie", requestCookie}); + } + + return ctx; } }; + const char* const NO_COOKIE = nullptr; + const char* const NO_SET_COOKIE = nullptr; + const TestData testData[] = { { /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "Not Found" }, { /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "Not Found" }, { /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=test", /*Accept-Language:*/ "", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "*", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "Not Found" }, { /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { // userlang query parameter takes precedence over Accept-Language /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "test", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "Not Found" }, { @@ -1027,6 +1050,8 @@ TEST_F(ServerTest, UserLanguageControl) // with quality values) the default (en) language is used instead. /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test;q=0.9, en;q=0.2", + /*Request Cookie:*/ NO_COOKIE, + /*Response Set-Cookie:*/ NO_SET_COOKIE, /* expected

*/ "Not Found" }, }; @@ -1038,7 +1063,15 @@ TEST_F(ServerTest, UserLanguageControl) if ( !t.acceptLanguageHeader.empty() ) { headers.insert({"Accept-Language", t.acceptLanguageHeader}); } + if ( t.requestCookie ) { + headers.insert({"Cookie", t.requestCookie}); + } const auto r = zfs1_->GET(t.url.c_str(), headers); + if ( t.responseSetCookie ) { + EXPECT_EQ(t.responseSetCookie, getHeaderValue(r->headers, "Set-Cookie")) << t; + } else { + EXPECT_FALSE(r->has_header("Set-Cookie")); + } std::regex_search(r->body, h1Match, h1Regex); const std::string h1(h1Match[1]); EXPECT_EQ(h1, t.expectedH1) << t; From 1d74b5e3115fc71855c9f925f5a52768ff24ae35 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 28 Nov 2022 12:08:51 +0400 Subject: [PATCH 2/9] Server sets the userlang cookie on every response --- src/server/request_context.cpp | 19 ++++++++++++++++++- src/server/response.cpp | 3 +++ test/server.cpp | 15 +++++++-------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 5e191afa7..db552a920 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include "tools/stringTools.h" @@ -63,6 +64,22 @@ fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation) } } +std::string parseAcceptLanguageHeader(const std::string& s) +{ + // TODO: implement properly + + if ( s.empty() ) + return "en"; + + for ( const char c : s ) { + if ( ! std::isalpha(c) ) { + return "en"; + } + } + + return s; +} + } // unnamed namespace RequestContext::RequestContext(struct MHD_Connection* connection, @@ -204,7 +221,7 @@ std::string RequestContext::get_user_language() const } catch(const std::out_of_range&) {} try { - return get_header("Accept-Language"); + return parseAcceptLanguageHeader(get_header("Accept-Language")); } catch(const std::out_of_range&) {} return "en"; diff --git a/src/server/response.cpp b/src/server/response.cpp index 6020f9f78..c0cd8bd5d 100644 --- a/src/server/response.cpp +++ b/src/server/response.cpp @@ -387,6 +387,9 @@ MHD_Result Response::send(const RequestContext& request, MHD_Connection* connect MHD_add_response_header(response, p.first.c_str(), p.second.c_str()); } + const std::string cookie = "userlang=" + request.get_user_language(); + MHD_add_response_header(response, MHD_HTTP_HEADER_SET_COOKIE, cookie.c_str()); + if (m_returnCode == MHD_HTTP_OK && m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT) m_returnCode = MHD_HTTP_PARTIAL_CONTENT; diff --git a/test/server.cpp b/test/server.cpp index be611ad68..5874f8be2 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -998,42 +998,41 @@ TEST_F(ServerTest, UserLanguageControl) }; const char* const NO_COOKIE = nullptr; - const char* const NO_SET_COOKIE = nullptr; const TestData testData[] = { { /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=test", /*Accept-Language:*/ "", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=test", /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "*", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=test", /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { @@ -1041,7 +1040,7 @@ TEST_F(ServerTest, UserLanguageControl) /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "test", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, { @@ -1051,7 +1050,7 @@ TEST_F(ServerTest, UserLanguageControl) /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test;q=0.9, en;q=0.2", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ NO_SET_COOKIE, + /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, }; From 600ff079867b474d315ee0910ba0358a6e80769e Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 28 Nov 2022 12:20:44 +0400 Subject: [PATCH 3/9] Test descriptions in ServerTest.UserLanguageControl --- test/server.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/server.cpp b/test/server.cpp index 5874f8be2..09c73863a 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -976,6 +976,7 @@ TEST_F(ServerTest, UserLanguageControl) { struct TestData { + const std::string description; const std::string url; const std::string acceptLanguageHeader; const char* const requestCookie; // Cookie: header of the request @@ -985,6 +986,7 @@ TEST_F(ServerTest, UserLanguageControl) operator TestContext() const { TestContext ctx{ + {"description", description}, {"url", url}, {"acceptLanguageHeader", acceptLanguageHeader}, }; @@ -1001,6 +1003,7 @@ TEST_F(ServerTest, UserLanguageControl) const TestData testData[] = { { + "Default user language is English", /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "", /*Request Cookie:*/ NO_COOKIE, @@ -1008,6 +1011,7 @@ TEST_F(ServerTest, UserLanguageControl) /* expected

*/ "Not Found" }, { + "userlang URL query parameter is respected", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "", /*Request Cookie:*/ NO_COOKIE, @@ -1015,6 +1019,7 @@ TEST_F(ServerTest, UserLanguageControl) /* expected

*/ "Not Found" }, { + "userlang URL query parameter is respected", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=test", /*Accept-Language:*/ "", /*Request Cookie:*/ NO_COOKIE, @@ -1022,6 +1027,7 @@ TEST_F(ServerTest, UserLanguageControl) /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { + "'Accept-Language: *' is handled", /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "*", /*Request Cookie:*/ NO_COOKIE, @@ -1029,6 +1035,7 @@ TEST_F(ServerTest, UserLanguageControl) /* expected

*/ "Not Found" }, { + "Accept-Language: header is respected", /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test", /*Request Cookie:*/ NO_COOKIE, @@ -1036,7 +1043,7 @@ TEST_F(ServerTest, UserLanguageControl) /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, { - // userlang query parameter takes precedence over Accept-Language + "userlang query parameter takes precedence over Accept-Language", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", /*Accept-Language:*/ "test", /*Request Cookie:*/ NO_COOKIE, @@ -1044,7 +1051,7 @@ TEST_F(ServerTest, UserLanguageControl) /* expected

*/ "Not Found" }, { - // The value of the Accept-Language header is not currently parsed. + "The value of the Accept-Language header is not currently parsed.", // In case of a comma separated list of languages (optionally weighted // with quality values) the default (en) language is used instead. /*url*/ "/ROOT/content/zimfile/invalid-article", From 14f0f7906157f381c8b133cf174a0b75fa4ba68d Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 28 Nov 2022 13:43:20 +0400 Subject: [PATCH 4/9] User language control via userlang cookie --- src/server/request_context.cpp | 20 ++++++++++++ src/server/request_context.h | 5 +++ test/server.cpp | 56 ++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index db552a920..bc99610fd 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -97,6 +97,7 @@ RequestContext::RequestContext(struct MHD_Connection* connection, { MHD_get_connection_values(connection, MHD_HEADER_KIND, &RequestContext::fill_header, this); MHD_get_connection_values(connection, MHD_GET_ARGUMENT_KIND, &RequestContext::fill_argument, this); + MHD_get_connection_values(connection, MHD_COOKIE_KIND, &RequestContext::fill_cookie, this); try { acceptEncodingGzip = @@ -106,6 +107,8 @@ RequestContext::RequestContext(struct MHD_Connection* connection, try { byteRange_ = ByteRange::parse(get_header(MHD_HTTP_HEADER_RANGE)); } catch (const std::out_of_range&) {} + + userlang = determine_user_language(); } RequestContext::~RequestContext() @@ -135,6 +138,14 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind, return MHD_YES; } +MHD_Result RequestContext::fill_cookie(void *__this, enum MHD_ValueKind kind, + const char *key, const char* value) +{ + RequestContext *_this = static_cast(__this); + _this->cookies[key] = value == nullptr ? "" : value; + return MHD_YES; +} + void RequestContext::print_debug_info() const { printf("method : %s (%d)\n", method==RequestMethod::GET ? "GET" : method==RequestMethod::POST ? "POST" : @@ -215,11 +226,20 @@ std::string RequestContext::get_header(const std::string& name) const { } std::string RequestContext::get_user_language() const +{ + return userlang; +} + +std::string RequestContext::determine_user_language() const { try { return get_argument("userlang"); } catch(const std::out_of_range&) {} + try { + return cookies.at("userlang"); + } catch(const std::out_of_range&) {} + try { return parseAcceptLanguageHeader(get_header("Accept-Language")); } catch(const std::out_of_range&) {} diff --git a/src/server/request_context.h b/src/server/request_context.h index de02d465f..07339324b 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -130,10 +130,15 @@ class RequestContext { ByteRange byteRange_; std::map headers; std::map> arguments; + std::map cookies; std::string queryString; + std::string userlang; private: // functions + std::string determine_user_language() const; + static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*); + static MHD_Result fill_cookie(void *, enum MHD_ValueKind, const char*, const char*); static MHD_Result fill_argument(void *, enum MHD_ValueKind, const char*, const char*); }; diff --git a/test/server.cpp b/test/server.cpp index 09c73863a..7b7548bb4 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -1042,6 +1042,46 @@ TEST_F(ServerTest, UserLanguageControl) /*Response Set-Cookie:*/ "userlang=test", /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, + { + "userlang cookie is respected", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "userlang=test", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang cookie is correctly parsed", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "anothercookie=123; userlang=test", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang cookie is correctly parsed", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "userlang=test; anothercookie=abc", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "userlang cookie is correctly parsed", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "cookie1=abc; userlang=test; cookie2=xyz", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, + { + "Multiple userlang cookies are not a problem", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "cookie1=abc; userlang=en; userlang=test; cookie2=xyz", + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" + }, { "userlang query parameter takes precedence over Accept-Language", /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", @@ -1050,6 +1090,22 @@ TEST_F(ServerTest, UserLanguageControl) /*Response Set-Cookie:*/ "userlang=en", /* expected

*/ "Not Found" }, + { + "userlang query parameter takes precedence over its cookie counterpart", + /*url*/ "/ROOT/content/zimfile/invalid-article?userlang=en", + /*Accept-Language:*/ "", + /*Request Cookie:*/ "userlang=test", + /*Response Set-Cookie:*/ "userlang=en", + /* expected

*/ "Not Found" + }, + { + "userlang in cookies takes precedence over Accept-Language", + /*url*/ "/ROOT/content/zimfile/invalid-article", + /*Accept-Language:*/ "test", + /*Request Cookie:*/ "userlang=en", + /*Response Set-Cookie:*/ "userlang=en", + /* expected

*/ "Not Found" + }, { "The value of the Accept-Language header is not currently parsed.", // In case of a comma separated list of languages (optionally weighted From 669d8898ac2f2c129481c9fc63d702427e314817 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 29 Nov 2022 12:37:08 +0400 Subject: [PATCH 5/9] Enter UserLangPreferences --- src/server/request_context.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index bc99610fd..bf14ee783 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -64,20 +64,29 @@ fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation) } } -std::string parseAcceptLanguageHeader(const std::string& s) +struct LangPreference +{ + const std::string lang; + const float preference; +}; + +typedef std::vector UserLangPreferences; + +UserLangPreferences parseUserLanguagePreferences(const std::string& s) { // TODO: implement properly + const UserLangPreferences defaultPref{{"en", 1}}; if ( s.empty() ) - return "en"; + return defaultPref; for ( const char c : s ) { if ( ! std::isalpha(c) ) { - return "en"; + return defaultPref; } } - return s; + return {{s, 1}}; } } // unnamed namespace @@ -241,7 +250,8 @@ std::string RequestContext::determine_user_language() const } catch(const std::out_of_range&) {} try { - return parseAcceptLanguageHeader(get_header("Accept-Language")); + const std::string acceptLanguage = get_header("Accept-Language"); + return parseUserLanguagePreferences(acceptLanguage)[0].lang; } catch(const std::out_of_range&) {} return "en"; From 69b3e1f8a770cfaa590fa63504f7fad8c90c791d Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 29 Nov 2022 12:42:54 +0400 Subject: [PATCH 6/9] Moved user language preferences into i18n.{h,cpp} --- src/server/i18n.cpp | 17 +++++++++++++++++ src/server/i18n.h | 10 ++++++++++ src/server/request_context.cpp | 26 +------------------------- 3 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/server/i18n.cpp b/src/server/i18n.cpp index 2aecc724d..acde36993 100644 --- a/src/server/i18n.cpp +++ b/src/server/i18n.cpp @@ -111,4 +111,21 @@ std::string ParameterizedMessage::getText(const std::string& lang) const return i18n::expandParameterizedString(lang, msgId, params); } +UserLangPreferences parseUserLanguagePreferences(const std::string& s) +{ + // TODO: implement properly + const UserLangPreferences defaultPref{{"en", 1}}; + + if ( s.empty() ) + return defaultPref; + + for ( const char c : s ) { + if ( ! std::isalpha(c) ) { + return defaultPref; + } + } + + return {{s, 1}}; +} + } // namespace kiwix diff --git a/src/server/i18n.h b/src/server/i18n.h index d4b084d3e..c3d648f0c 100644 --- a/src/server/i18n.h +++ b/src/server/i18n.h @@ -89,6 +89,16 @@ private: // data const Parameters params; }; +struct LangPreference +{ + const std::string lang; + const float preference; +}; + +typedef std::vector UserLangPreferences; + +UserLangPreferences parseUserLanguagePreferences(const std::string& s); + } // namespace kiwix #endif // KIWIX_SERVER_I18N diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index bf14ee783..d2898ed47 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -28,6 +28,7 @@ #include #include "tools/stringTools.h" +#include "i18n.h" namespace kiwix { @@ -64,31 +65,6 @@ fullURL2LocalURL(const std::string& full_url, const std::string& rootLocation) } } -struct LangPreference -{ - const std::string lang; - const float preference; -}; - -typedef std::vector UserLangPreferences; - -UserLangPreferences parseUserLanguagePreferences(const std::string& s) -{ - // TODO: implement properly - const UserLangPreferences defaultPref{{"en", 1}}; - - if ( s.empty() ) - return defaultPref; - - for ( const char c : s ) { - if ( ! std::isalpha(c) ) { - return defaultPref; - } - } - - return {{s, 1}}; -} - } // unnamed namespace RequestContext::RequestContext(struct MHD_Connection* connection, From 88597e1834a3005ad39ce3e0023b1ca09f933ae3 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 29 Nov 2022 12:54:45 +0400 Subject: [PATCH 7/9] Enter selectMostSuitableLanguage() --- src/server/i18n.cpp | 6 ++++++ src/server/i18n.h | 2 ++ src/server/request_context.cpp | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/server/i18n.cpp b/src/server/i18n.cpp index acde36993..faf14d5cd 100644 --- a/src/server/i18n.cpp +++ b/src/server/i18n.cpp @@ -128,4 +128,10 @@ UserLangPreferences parseUserLanguagePreferences(const std::string& s) return {{s, 1}}; } +std::string selectMostSuitableLanguage(const UserLangPreferences& prefs) +{ + // TOOD: implement properly + return prefs[0].lang; +} + } // namespace kiwix diff --git a/src/server/i18n.h b/src/server/i18n.h index c3d648f0c..23236074a 100644 --- a/src/server/i18n.h +++ b/src/server/i18n.h @@ -99,6 +99,8 @@ typedef std::vector UserLangPreferences; UserLangPreferences parseUserLanguagePreferences(const std::string& s); +std::string selectMostSuitableLanguage(const UserLangPreferences& prefs); + } // namespace kiwix #endif // KIWIX_SERVER_I18N diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index d2898ed47..4eac4ad33 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -227,7 +227,8 @@ std::string RequestContext::determine_user_language() const try { const std::string acceptLanguage = get_header("Accept-Language"); - return parseUserLanguagePreferences(acceptLanguage)[0].lang; + const auto userLangPrefs = parseUserLanguagePreferences(acceptLanguage); + return selectMostSuitableLanguage(userLangPrefs); } catch(const std::out_of_range&) {} return "en"; From 634f3fcf14f514cef3bf8386b00d1aa026a2cd70 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 29 Nov 2022 13:04:17 +0400 Subject: [PATCH 8/9] Properly implemented selectMostSuitableLanguage() --- src/server/i18n.cpp | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/server/i18n.cpp b/src/server/i18n.cpp index faf14d5cd..2d9f3bb56 100644 --- a/src/server/i18n.cpp +++ b/src/server/i18n.cpp @@ -70,6 +70,14 @@ public: // functions return s; } + size_t getStringCount(const std::string& lang) const { + try { + return lang2TableMap.at(lang)->entryCount; + } catch(const std::out_of_range&) { + return 0; + } + } + private: // functions const I18nStringTable* getStringsFor(const std::string& lang) const { try { @@ -84,13 +92,17 @@ private: // data const I18nStringTable* enStrings; }; +const I18nStringDB& getStringDb() +{ + static const I18nStringDB stringDb; + return stringDb; +} + } // unnamed namespace std::string getTranslatedString(const std::string& lang, const std::string& key) { - static const I18nStringDB stringDb; - - return stringDb.get(lang, key); + return getStringDb().get(lang, key); } namespace i18n @@ -130,8 +142,17 @@ UserLangPreferences parseUserLanguagePreferences(const std::string& s) std::string selectMostSuitableLanguage(const UserLangPreferences& prefs) { - // TOOD: implement properly - return prefs[0].lang; + std::string bestLangSoFar("en"); + float bestScoreSoFar = 0; + const auto& stringDb = getStringDb(); + for ( const auto& entry : prefs ) { + const float score = entry.preference * stringDb.getStringCount(entry.lang); + if ( score > bestScoreSoFar ) { + bestScoreSoFar = score; + bestLangSoFar = entry.lang; + } + } + return bestLangSoFar; } } // namespace kiwix From 28e9fb48b6f3c49775d1820a487a36bf3a3e4f30 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 29 Nov 2022 14:13:29 +0400 Subject: [PATCH 9/9] Properly implemented parseUserLanguagePreferences() --- src/server/i18n.cpp | 54 +++++++++++++++++++++++++++++++-------- test/otherTools.cpp | 61 +++++++++++++++++++++++++++++++++++++++++++++ test/server.cpp | 4 +-- 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/src/server/i18n.cpp b/src/server/i18n.cpp index 2d9f3bb56..0a2cd8c73 100644 --- a/src/server/i18n.cpp +++ b/src/server/i18n.cpp @@ -123,25 +123,59 @@ std::string ParameterizedMessage::getText(const std::string& lang) const return i18n::expandParameterizedString(lang, msgId, params); } +namespace +{ + +LangPreference parseSingleLanguagePreference(const std::string& s) +{ + const size_t langStart = s.find_first_not_of(" \t\n"); + if ( langStart == std::string::npos ) { + return {"", 0}; + } + + const size_t langEnd = s.find(';', langStart); + if ( langEnd == std::string::npos ) { + return {s.substr(langStart), 1}; + } + + const std::string lang = s.substr(langStart, langEnd - langStart); + // We don't care about langEnd == langStart which will result in an empty + // language name - it will be dismissed by parseUserLanguagePreferences() + + float q = 1.0; + int nCharsScanned; + if ( 1 == sscanf(s.c_str() + langEnd + 1, "q=%f%n", &q, &nCharsScanned) + && langEnd + 1 + nCharsScanned == s.size() ) { + return {lang, q}; + } + + return {"", 0}; +} + +} // unnamed namespace + UserLangPreferences parseUserLanguagePreferences(const std::string& s) { - // TODO: implement properly - const UserLangPreferences defaultPref{{"en", 1}}; - - if ( s.empty() ) - return defaultPref; - - for ( const char c : s ) { - if ( ! std::isalpha(c) ) { - return defaultPref; + UserLangPreferences result; + std::istringstream iss(s); + std::string singleLangPrefStr; + while ( std::getline(iss, singleLangPrefStr, ',') ) + { + const auto langPref = parseSingleLanguagePreference(singleLangPrefStr); + if ( !langPref.lang.empty() && langPref.preference > 0 ) { + result.push_back(langPref); } } - return {{s, 1}}; + return result; } std::string selectMostSuitableLanguage(const UserLangPreferences& prefs) { + if ( prefs.empty() ) { + return "en"; + } + std::string bestLangSoFar("en"); float bestScoreSoFar = 0; const auto& stringDb = getStringDb(); diff --git a/test/otherTools.cpp b/test/otherTools.cpp index 221c2ac30..9b6ce1fac 100644 --- a/test/otherTools.cpp +++ b/test/otherTools.cpp @@ -20,6 +20,7 @@ #include "gtest/gtest.h" #include "../src/tools/otherTools.h" #include "zim/suggestion_iterator.h" +#include "../src/server/i18n.h" #include @@ -172,3 +173,63 @@ R"EXPECTEDJSON([ )EXPECTEDJSON" ); } + +std::string toString(const kiwix::LangPreference& x) +{ + std::ostringstream oss; + oss << "{" << x.lang << ", " << x.preference << "}"; + return oss.str(); +} + +std::string toString(const kiwix::UserLangPreferences& prefs) { + std::ostringstream oss; + for ( const auto& x : prefs ) + oss << toString(x); + return oss.str(); +} + +TEST(I18n, parseUserLanguagePreferences) +{ + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("*")), + "{*, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr")), + "{fr, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH")), + "{fr-CH, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr, en-US")), + "{fr, 1}{en-US, 1}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5")), + "{ru, 0.5}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr-CH,ru;q=0.5")), + "{fr-CH, 1}{ru, 0.5}" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=0.5, *;q=0.1")), + "{ru, 0.5}{*, 0.1}" + ); + + // rejected input + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;q=")), + "" + ); + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("ru;0.8")), + "" + ); + + EXPECT_EQ(toString(kiwix::parseUserLanguagePreferences("fr,ru;0.8,en;q=0.5")), + "{fr, 1}{en, 0.5}" + ); +} diff --git a/test/server.cpp b/test/server.cpp index 7b7548bb4..2c6156c23 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -1113,8 +1113,8 @@ TEST_F(ServerTest, UserLanguageControl) /*url*/ "/ROOT/content/zimfile/invalid-article", /*Accept-Language:*/ "test;q=0.9, en;q=0.2", /*Request Cookie:*/ NO_COOKIE, - /*Response Set-Cookie:*/ "userlang=en", - /* expected

*/ "Not Found" + /*Response Set-Cookie:*/ "userlang=test", + /* expected

*/ "[I18N TESTING] Content not found, but at least the server is alive" }, };