Support for multilang ZIMs

This commit is contained in:
Veloman Yunkan 2023-02-27 17:47:37 +04:00 committed by Matthieu Gautier
parent 30725136c8
commit 5bda7fd45c
4 changed files with 39 additions and 19 deletions

View File

@ -373,12 +373,28 @@ std::vector<std::string> Library::getBookPropValueSet(BookStrPropMemFn p) const
std::vector<std::string> Library::getBooksLanguages() const std::vector<std::string> Library::getBooksLanguages() const
{ {
return getBookPropValueSet(&Book::getLanguage); std::vector<std::string> langs;
for ( const auto& langAndCount : getBooksLanguagesWithCounts() ) {
langs.push_back(langAndCount.first);
}
return langs;
} }
Library::AttributeCounts Library::getBooksLanguagesWithCounts() const Library::AttributeCounts Library::getBooksLanguagesWithCounts() const
{ {
return getBookAttributeCounts(&Book::getLanguage); std::lock_guard<std::mutex> lock(m_mutex);
AttributeCounts langsWithCounts;
for (const auto& pair: mp_impl->m_books) {
const auto& book = pair.second;
if (book.getOrigId().empty()) {
const std::string commaSeparatedLangList = book.getLanguage();
for ( const auto& lang : kiwix::split(commaSeparatedLangList, ",") ) {
++langsWithCounts[lang];
}
}
}
return langsWithCounts;
} }
std::vector<std::string> Library::getBooksCategories() const std::vector<std::string> Library::getBooksCategories() const

View File

@ -23,7 +23,7 @@
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
title="Ray (uncategorized) Charles" title="Ray (uncategorized) Charles"
description="No category is assigned to this library entry." description="No category is assigned to this library entry."
language="rus" language="rus,eng"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"
date="2020-03-31" date="2020-03-31"

View File

@ -69,7 +69,7 @@ const char * sampleOpdsStream = R"(
<id>urn:uuid:0ea1cde6-441d-6c58-f2c7-21c2838e659f</id> <id>urn:uuid:0ea1cde6-441d-6c58-f2c7-21c2838e659f</id>
<icon>/meta?name=favicon&amp;content=wikiquote_fr_all_nopic_2019-06</icon> <icon>/meta?name=favicon&amp;content=wikiquote_fr_all_nopic_2019-06</icon>
<updated>2019-06-05T00:00::00:Z</updated> <updated>2019-06-05T00:00::00:Z</updated>
<language>fra</language> <language>fra,ita</language>
<summary>Une page de Wikiquote, le recueil des citations libres.</summary> <summary>Une page de Wikiquote, le recueil des citations libres.</summary>
<category>category_defined_via_category_element_only</category> <category>category_defined_via_category_element_only</category>
<tags>wikiquote;nopic</tags> <tags>wikiquote;nopic</tags>
@ -199,7 +199,7 @@ const char sampleLibraryXML[] = R"(
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
title="Ray Charles" title="Ray Charles"
description="Wikipedia articles about Ray Charles" description="Wikipedia articles about Ray Charles"
language="eng" language="eng,spa"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"
date="2020-03-31" date="2020-03-31"
@ -344,7 +344,7 @@ TEST_F(LibraryTest, sanityCheck)
{ {
EXPECT_EQ(lib.getBookCount(true, true), 12U); EXPECT_EQ(lib.getBookCount(true, true), 12U);
EXPECT_EQ(lib.getBooksLanguages(), EXPECT_EQ(lib.getBooksLanguages(),
std::vector<std::string>({"deu", "eng", "fra"}) std::vector<std::string>({"deu", "eng", "fra", "ita", "spa"})
); );
EXPECT_EQ(lib.getBooksCreators(), std::vector<std::string>({ EXPECT_EQ(lib.getBooksCreators(), std::vector<std::string>({
"Islam Stack Exchange", "Islam Stack Exchange",

View File

@ -140,7 +140,7 @@ std::string maskVariableOPDSFeedData(std::string s)
"raycharles_uncategorized",\ "raycharles_uncategorized",\
"Ray (uncategorized) Charles",\ "Ray (uncategorized) Charles",\
"No category is assigned to this library entry.",\ "No category is assigned to this library entry.",\
"rus",\ "rus,eng",\
"wikipedia_ru_ray_charles",\ "wikipedia_ru_ray_charles",\
"",\ "",\
"public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\ "public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\
@ -327,10 +327,11 @@ TEST_F(LibraryServerTest, catalog_search_by_language)
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n" " <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (lang=eng)</title>\n" " <title>Filtered zims (lang=eng)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" " <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n" " <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
@ -344,12 +345,13 @@ TEST_F(LibraryServerTest, catalog_search_by_language)
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n" " <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
" <title>Filtered zims (lang=eng%2Cfra)</title>\n" " <title>Filtered zims (lang=eng%2Cfra)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" " <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n" " <totalResults>3</totalResults>\n"
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>3</itemsPerPage>\n"
CATALOG_LINK_TAGS CATALOG_LINK_TAGS
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -582,7 +584,7 @@ TEST_F(LibraryServerTest, catalog_v2_languages)
<entry> <entry>
<title>English</title> <title>English</title>
<dc:language>eng</dc:language> <dc:language>eng</dc:language>
<thr:count>1</thr:count> <thr:count>2</thr:count>
<link rel="subsection" <link rel="subsection"
href="/ROOT%23%3F/catalog/v2/entries?lang=eng" href="/ROOT%23%3F/catalog/v2/entries?lang=eng"
type="application/atom+xml;profile=opds-catalog;kind=acquisition"/> type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
@ -764,9 +766,10 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language)
CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng") CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng")
" <title>Filtered Entries (lang=eng)</title>\n" " <title>Filtered Entries (lang=eng)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" " <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>1</totalResults>\n" " <totalResults>2</totalResults>\n"
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>1</itemsPerPage>\n" " <itemsPerPage>2</itemsPerPage>\n"
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
@ -779,11 +782,12 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language)
CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng%2Cfra") CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng%2Cfra")
" <title>Filtered Entries (lang=eng%2Cfra)</title>\n" " <title>Filtered Entries (lang=eng%2Cfra)</title>\n"
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" " <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
" <totalResults>2</totalResults>\n" " <totalResults>3</totalResults>\n"
" <startIndex>0</startIndex>\n" " <startIndex>0</startIndex>\n"
" <itemsPerPage>2</itemsPerPage>\n" " <itemsPerPage>3</itemsPerPage>\n"
RAY_CHARLES_CATALOG_ENTRY
CHARLES_RAY_CATALOG_ENTRY CHARLES_RAY_CATALOG_ENTRY
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
"</feed>\n" "</feed>\n"
); );
} }
@ -874,8 +878,8 @@ TEST_F(LibraryServerTest, catalog_search_includes_public_tags)
// prefix search works on tag names // prefix search works on tag names
EXPECT_SEARCH_RESULTS("public_tag", EXPECT_SEARCH_RESULTS("public_tag",
2, 2,
RAY_CHARLES_CATALOG_ENTRY
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
RAY_CHARLES_CATALOG_ENTRY
); );
EXPECT_SEARCH_RESULTS("value_of_a_public_tag", EXPECT_SEARCH_RESULTS("value_of_a_public_tag",