Preventing confusion of tongues in multizim search

Multizim search requires that all selected books be in the same
language.

No new URL query parameter was introduced for specifying the intended
search language - `books.filter.lang` can be used for that purpose.

The server_search unit-test was updated to use a slightly cheating
library xml file where the language of example.zim was tweaked from "en"
to "eng" in order to match that of zimfile.zim. Note that this change
drops from the tested server two other goofy ZIM files corner_cases.zim
and poor.zim that have been/are included in ServerTest.
This commit is contained in:
Veloman Yunkan 2022-10-21 18:24:27 +04:00
parent cd62b5dd91
commit 9409e8bd91
5 changed files with 42 additions and 5 deletions

View File

@ -211,6 +211,16 @@ void checkBookNumber(const Library::BookIdSet& bookIds, size_t limit) {
}
}
typedef std::set<std::string> Languages;
Languages getLanguages(const Library& lib, const Library::BookIdSet& bookIds) {
Languages langs;
for ( const auto& b : bookIds ) {
langs.insert(lib.getBookById(b).getLanguage());
}
return langs;
}
struct CustomizedResourceData
{
std::string mimeType;
@ -306,6 +316,10 @@ SearchInfo InternalServer::getSearchInfo(const RequestContext& request) const
{
auto bookIds = selectBooks(request);
checkBookNumber(bookIds.second, m_multizimSearchLimit);
if ( getLanguages(*mp_library, bookIds.second).size() != 1 ) {
throw Error(nonParameterizedMessage("confusion-of-tongues"));
}
auto pattern = request.get_optional_param<std::string>("pattern", "");
GeoQuery geoQuery;

View File

@ -27,4 +27,5 @@
, "home-button-text": "Go to the main page of '{{BOOK_TITLE}}'"
, "random-page-button-text": "Go to a randomly selected page"
, "searchbox-tooltip": "Search '{{BOOK_TITLE}}'"
, "confusion-of-tongues": "Two or more books in different languages would participate in search, which may lead to confusing results."
}

View File

@ -0,0 +1,4 @@
<library version="20110515">
<book id="5dc0b3af-5df2-0925-f0ca-d2bf75e78af6" path="example.zim" title="Wikibooks" description="testZim" language="eng" creator="test" publisher="test" tags="_ftindex:yes;_ftindex:yes;_pictures:yes;_videos:yes;_details:yes" date="2021-04-17" mediaCount="22" size="253" />
<book id="6f1d19d0-633f-087b-fb55-7ac324ff9baf" path="zimfile.zim" title="Ray Charles" description="Wikipedia articles about Ray Charles" language="eng" creator="Wikipedia" publisher="Kiwix" name="wikipedia_en_ray_charles" flavour="_mini" tags="wikipedia;_category:wikipedia;_pictures:no;_videos:no;_details:no;_ftindex:yes" date="2020-03-31" articleCount="129" mediaCount="45" size="555" />
</library>

View File

@ -37,6 +37,7 @@ if gtest_dep.found() and not meson.is_cross_build()
'corner_cases.zim',
'poor.zim',
'library.xml',
'lib_for_server_search_test.xml',
'customized_resources.txt',
'helloworld.txt',
'welcome.html',

View File

@ -6,6 +6,17 @@
#define SERVER_PORT 8101
#include "server_testing_tools.h"
class ServerSearchTest : public ServerTest
{
void SetUp() override {
zfs1_.reset(new ZimFileServer(SERVER_PORT,
ZimFileServer::DEFAULT_OPTIONS,
"./test/lib_for_server_search_test.xml")
);
}
};
std::string makeSearchResultsHtml(const std::string& pattern,
const std::string& header,
const std::string& results,
@ -555,7 +566,7 @@ const std::vector<SearchResult> LARGE_SEARCH_RESULTS = {
//
// In order to be able to share the same expected output data
// LARGE_SEARCH_RESULTS between multiple build platforms and test-points
// of the ServerTest.searchResults test-case
// of the ServerSearchTest.searchResults test-case
//
// 1. Snippets are excluded from the plain-text comparison of actual and
// expected HTML strings. This is done with the help of the
@ -916,7 +927,7 @@ struct TestData
}
};
TEST_F(ServerTest, searchResults)
TEST_F(ServerSearchTest, searchResults)
{
const TestData testData[] = {
{
@ -1340,14 +1351,12 @@ TEST_F(ServerTest, searchResults)
/* pagination */ {}
},
// Only RayCharles is in English.
// [TODO] We should extend our test data to have another zim file in english returning results.
{
/* query */ "pattern=travel"
"&books.filter.lang=eng",
/* start */ 0,
/* resultsPerPage */ 10,
/* totalResultCount */ 1,
/* totalResultCount */ 2,
/* firstResultIndex */ 1,
/* results */ {
SEARCH_RESULT(
@ -1357,6 +1366,14 @@ TEST_F(ServerTest, searchResults)
/*bookTitle*/ "Ray Charles",
/*wordCount*/ "204"
),
SEARCH_RESULT(
/*link*/ "/ROOT/content/example/Wikibooks.html",
/*title*/ "Wikibooks",
/*snippet*/ R"SNIPPET(...<b>Travel</b> guide Wikidata Knowledge database Commons Media repository Meta Coordination MediaWiki MediaWiki software Phabricator MediaWiki bug tracker Wikimedia Labs MediaWiki development The Wikimedia Foundation is a non-profit organization that depends on your voluntarism and donations to operate. If you find Wikibooks or other projects hosted by the Wikimedia Foundation useful, please volunteer or make a donation. Your donations primarily helps to purchase server equipment, launch new projects......)SNIPPET",
/*bookTitle*/ "Wikibooks",
/*wordCount*/ "538"
)
},
/* pagination */ {}
},