mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #904 from kiwix/support_for_multilang_zims
This commit is contained in:
commit
88de978a9c
|
@ -79,7 +79,9 @@ class Book
|
|||
bool isPathValid() const { return m_pathValid; }
|
||||
const std::string& getTitle() const { return m_title; }
|
||||
const std::string& getDescription() const { return m_description; }
|
||||
const std::string& getLanguage() const { return m_language; }
|
||||
DEPRECATED const std::string& getLanguage() const { return m_language; }
|
||||
const std::string& getCommaSeparatedLanguages() const { return m_language; }
|
||||
const std::vector<std::string> getLanguages() const;
|
||||
const std::string& getCreator() const { return m_creator; }
|
||||
const std::string& getPublisher() const { return m_publisher; }
|
||||
const std::string& getDate() const { return m_date; }
|
||||
|
|
|
@ -286,4 +286,9 @@ std::string Book::getCategoryFromTags() const
|
|||
}
|
||||
}
|
||||
|
||||
const std::vector<std::string> Book::getLanguages() const
|
||||
{
|
||||
return kiwix::split(m_language, ",");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -373,12 +373,27 @@ std::vector<std::string> Library::getBookPropValueSet(BookStrPropMemFn p) const
|
|||
|
||||
std::vector<std::string> Library::getBooksLanguages() const
|
||||
{
|
||||
return getBookPropValueSet(&Book::getLanguage);
|
||||
std::vector<std::string> langs;
|
||||
for ( const auto& langAndCount : getBooksLanguagesWithCounts() ) {
|
||||
langs.push_back(langAndCount.first);
|
||||
}
|
||||
return langs;
|
||||
}
|
||||
|
||||
Library::AttributeCounts Library::getBooksLanguagesWithCounts() const
|
||||
{
|
||||
return getBookAttributeCounts(&Book::getLanguage);
|
||||
std::lock_guard<std::mutex> lock(m_mutex);
|
||||
AttributeCounts langsWithCounts;
|
||||
|
||||
for (const auto& pair: mp_impl->m_books) {
|
||||
const auto& book = pair.second;
|
||||
if (book.getOrigId().empty()) {
|
||||
for ( const auto& lang : book.getLanguages() ) {
|
||||
++langsWithCounts[lang];
|
||||
}
|
||||
}
|
||||
}
|
||||
return langsWithCounts;
|
||||
}
|
||||
|
||||
std::vector<std::string> Library::getBooksCategories() const
|
||||
|
@ -440,12 +455,14 @@ void Library::updateBookDB(const Book& book)
|
|||
{
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::TermGenerator indexer;
|
||||
const std::string lang = book.getLanguage();
|
||||
const auto langs = book.getLanguages();
|
||||
if ( langs.size() == 1 ) {
|
||||
try {
|
||||
stemmer = Xapian::Stem(iso639_3ToXapian(lang));
|
||||
stemmer = Xapian::Stem(iso639_3ToXapian(langs[0]));
|
||||
indexer.set_stemmer(stemmer);
|
||||
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
|
||||
} catch (...) {}
|
||||
}
|
||||
Xapian::Document doc;
|
||||
indexer.set_document(doc);
|
||||
|
||||
|
@ -460,7 +477,9 @@ void Library::updateBookDB(const Book& book)
|
|||
// Index all fields for field-based search
|
||||
indexer.index_text(title, 1, "S");
|
||||
indexer.index_text(desc, 1, "XD");
|
||||
for ( const auto& lang : langs ) {
|
||||
indexer.index_text(lang, 1, "L");
|
||||
}
|
||||
indexer.index_text(normalizeText(book.getCreator()), 1, "A");
|
||||
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
|
||||
indexer.index_text(normalizeText(book.getName()), 1, "XN");
|
||||
|
|
|
@ -54,7 +54,7 @@ void LibXMLDumper::handleBook(Book book, pugi::xml_node root_node) {
|
|||
if (book.getOrigId().empty()) {
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "title", book.getTitle());
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "description", book.getDescription());
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "language", book.getLanguage());
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "language", book.getCommaSeparatedLanguages());
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "creator", book.getCreator());
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "publisher", book.getPublisher());
|
||||
ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName());
|
||||
|
@ -97,7 +97,7 @@ void LibXMLDumper::handleBookmark(Bookmark bookmark, pugi::xml_node root_node) {
|
|||
auto book = library->getBookByIdThreadSafe(bookmark.getBookId());
|
||||
ADD_TEXT_ENTRY(book_node, "id", book.getId());
|
||||
ADD_TEXT_ENTRY(book_node, "title", book.getTitle());
|
||||
ADD_TEXT_ENTRY(book_node, "language", book.getLanguage());
|
||||
ADD_TEXT_ENTRY(book_node, "language", book.getCommaSeparatedLanguages());
|
||||
ADD_TEXT_ENTRY(book_node, "date", book.getDate());
|
||||
} catch (...) {
|
||||
ADD_TEXT_ENTRY(book_node, "id", bookmark.getBookId());
|
||||
|
|
|
@ -238,7 +238,7 @@ std::string Manager::addBookFromPathAndGetId(const std::string& pathToOpen,
|
|||
}
|
||||
|
||||
if (!checkMetaData
|
||||
|| (checkMetaData && !book.getTitle().empty() && !book.getLanguage().empty()
|
||||
|| (!book.getTitle().empty() && !book.getLanguages().empty()
|
||||
&& !book.getDate().empty())) {
|
||||
book.setUrl(url);
|
||||
manipulator->addBookToLibrary(book);
|
||||
|
|
|
@ -81,7 +81,7 @@ std::string fullEntryXML(const Book& book, const std::string& rootLocation, cons
|
|||
{"name", book.getName()},
|
||||
{"title", book.getTitle()},
|
||||
{"description", book.getDescription()},
|
||||
{"language", book.getLanguage()},
|
||||
{"language", book.getCommaSeparatedLanguages()},
|
||||
{"content_id", urlEncode(contentId)},
|
||||
{"updated", bookDate}, // XXX: this should be the entry update datetime
|
||||
{"book_date", bookDate},
|
||||
|
|
|
@ -223,7 +223,8 @@ typedef std::set<std::string> Languages;
|
|||
Languages getLanguages(const Library& lib, const Library::BookIdSet& bookIds) {
|
||||
Languages langs;
|
||||
for ( const auto& b : bookIds ) {
|
||||
langs.insert(lib.getBookById(b).getLanguage());
|
||||
const auto bookLangs = lib.getBookById(b).getLanguages();
|
||||
langs.insert(bookLangs.begin(), bookLangs.end());
|
||||
}
|
||||
return langs;
|
||||
}
|
||||
|
|
|
@ -58,60 +58,53 @@ TEST(BookTest, updateFromXMLTest)
|
|||
EXPECT_EQ(defaultIllustration->url, "http://who.org/zara.fav");
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
kiwix::Book makeBook(const std::string& attr, const std::string& baseDir="")
|
||||
{
|
||||
const XMLDoc xml("<book " + attr + "></book>");
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), baseDir);
|
||||
return book;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
TEST(BookTest, updateFromXMLCategoryHandlingTest)
|
||||
{
|
||||
{
|
||||
const XMLDoc xml(R"(
|
||||
<book id="abcd"
|
||||
const kiwix::Book book = makeBook(R"(
|
||||
id="abcd"
|
||||
tags="_category:category_defined_via_tags_only"
|
||||
>
|
||||
</book>
|
||||
)");
|
||||
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), "");
|
||||
|
||||
EXPECT_EQ(book.getCategory(), "category_defined_via_tags_only");
|
||||
}
|
||||
{
|
||||
const XMLDoc xml(R"(
|
||||
<book id="abcd"
|
||||
const kiwix::Book book = makeBook(R"(
|
||||
id="abcd"
|
||||
category="category_defined_via_attribute_only"
|
||||
>
|
||||
</book>
|
||||
)");
|
||||
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), "");
|
||||
|
||||
EXPECT_EQ(book.getCategory(), "category_defined_via_attribute_only");
|
||||
}
|
||||
{
|
||||
const XMLDoc xml(R"(
|
||||
<book id="abcd"
|
||||
const kiwix::Book book = makeBook(R"(
|
||||
id="abcd"
|
||||
category="category_attribute_overrides_tags"
|
||||
tags="_category:tags_override_category_attribute"
|
||||
>
|
||||
</book>
|
||||
)");
|
||||
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), "");
|
||||
|
||||
EXPECT_EQ(book.getCategory(), "category_attribute_overrides_tags");
|
||||
}
|
||||
{
|
||||
const XMLDoc xml(R"(
|
||||
<book id="abcd"
|
||||
const kiwix::Book book = makeBook(R"(
|
||||
id="abcd"
|
||||
tags="_category:tags_override_category_attribute"
|
||||
category="category_attribute_overrides_tags"
|
||||
>
|
||||
</book>
|
||||
)");
|
||||
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), "");
|
||||
|
||||
EXPECT_EQ(book.getCategory(), "category_attribute_overrides_tags");
|
||||
}
|
||||
}
|
||||
|
@ -126,10 +119,7 @@ TEST(BookTest, setTagsDoesntAffectCategory)
|
|||
|
||||
TEST(BookTest, updateCopiesCategory)
|
||||
{
|
||||
const XMLDoc xml(R"(<book id="abcd" category="ted"></book>)");
|
||||
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), "");
|
||||
const kiwix::Book book = makeBook(R"(id="abcd" category="ted")");
|
||||
|
||||
kiwix::Book newBook;
|
||||
newBook.setId("abcd");
|
||||
|
@ -140,20 +130,15 @@ TEST(BookTest, updateCopiesCategory)
|
|||
|
||||
TEST(BookTest, updateTest)
|
||||
{
|
||||
const XMLDoc xml(R"(
|
||||
<book id="xyz"
|
||||
kiwix::Book book = makeBook(R"(
|
||||
id="xyz"
|
||||
path="/home/user/Downloads/skin-of-color-society_en_all_2019-11.zim"
|
||||
url="book-url"
|
||||
name="skin-of-color-society_en_all"
|
||||
tags="youtube;_videos:yes;_ftindex:yes;_ftindex:yes;_pictures:yes;_details:yes"
|
||||
favicon="Ym9vay1mYXZpY29u"
|
||||
faviconMimeType="book-favicon-mimetype"
|
||||
>
|
||||
</book>
|
||||
)");
|
||||
|
||||
kiwix::Book book;
|
||||
book.updateFromXml(xml.child("book"), "/data/zim");
|
||||
)", "/data/zim");
|
||||
|
||||
book.setReadOnly(false);
|
||||
book.setPathValid(true);
|
||||
|
@ -210,3 +195,22 @@ TEST(BookTest, getHumanReadableIdFromPath)
|
|||
#endif
|
||||
EXPECT_EQ("3plus2", path2HumanReadableId("3+2.zim"));
|
||||
}
|
||||
|
||||
TEST(BookTest, getLanguages)
|
||||
{
|
||||
typedef std::vector<std::string> Langs;
|
||||
|
||||
{
|
||||
const kiwix::Book book = makeBook(R"(id="abcd" language="fra")");
|
||||
|
||||
EXPECT_EQ(book.getCommaSeparatedLanguages(), "fra");
|
||||
EXPECT_EQ(book.getLanguages(), Langs{ "fra" });
|
||||
}
|
||||
|
||||
{
|
||||
const kiwix::Book book = makeBook(R"(id="abcd" language="eng,ong,ing")");
|
||||
|
||||
EXPECT_EQ(book.getCommaSeparatedLanguages(), "eng,ong,ing");
|
||||
EXPECT_EQ(book.getLanguages(), Langs({ "eng", "ong", "ing" }));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
|
||||
title="Ray (uncategorized) Charles"
|
||||
description="No category is assigned to this library entry."
|
||||
language="rus"
|
||||
language="rus,eng"
|
||||
creator="Wikipedia"
|
||||
publisher="Kiwix"
|
||||
date="2020-03-31"
|
||||
|
|
|
@ -69,7 +69,7 @@ const char * sampleOpdsStream = R"(
|
|||
<id>urn:uuid:0ea1cde6-441d-6c58-f2c7-21c2838e659f</id>
|
||||
<icon>/meta?name=favicon&content=wikiquote_fr_all_nopic_2019-06</icon>
|
||||
<updated>2019-06-05T00:00::00:Z</updated>
|
||||
<language>fra</language>
|
||||
<language>fra,ita</language>
|
||||
<summary>Une page de Wikiquote, le recueil des citations libres.</summary>
|
||||
<category>category_defined_via_category_element_only</category>
|
||||
<tags>wikiquote;nopic</tags>
|
||||
|
@ -199,7 +199,7 @@ const char sampleLibraryXML[] = R"(
|
|||
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
|
||||
title="Ray Charles"
|
||||
description="Wikipedia articles about Ray Charles"
|
||||
language="eng"
|
||||
language="eng,spa"
|
||||
creator="Wikipedia"
|
||||
publisher="Kiwix"
|
||||
date="2020-03-31"
|
||||
|
@ -234,6 +234,8 @@ const char sampleLibraryXML[] = R"(
|
|||
namespace
|
||||
{
|
||||
|
||||
typedef std::vector<std::string> Langs;
|
||||
|
||||
TEST(LibraryOpdsImportTest, allInOne)
|
||||
{
|
||||
kiwix::Library lib;
|
||||
|
@ -248,7 +250,8 @@ TEST(LibraryOpdsImportTest, allInOne)
|
|||
EXPECT_EQ(book1.getTitle(), "Encyclopédie de la Tunisie");
|
||||
EXPECT_EQ(book1.getName(), "wikipedia_fr_tunisie_novid_2018-10");
|
||||
EXPECT_EQ(book1.getFlavour(), "unforgettable");
|
||||
EXPECT_EQ(book1.getLanguage(), "fra");
|
||||
EXPECT_EQ(book1.getLanguages(), Langs{ "fra" });
|
||||
EXPECT_EQ(book1.getCommaSeparatedLanguages(), "fra");
|
||||
EXPECT_EQ(book1.getDate(), "8 Oct 2018");
|
||||
EXPECT_EQ(book1.getDescription(), "Le meilleur de Wikipédia sur la Tunisie");
|
||||
EXPECT_EQ(book1.getCreator(), "Wikipedia");
|
||||
|
@ -272,7 +275,8 @@ TEST(LibraryOpdsImportTest, allInOne)
|
|||
EXPECT_EQ(book2.getTitle(), "TED talks - Business");
|
||||
EXPECT_EQ(book2.getName(), "");
|
||||
EXPECT_EQ(book2.getFlavour(), "");
|
||||
EXPECT_EQ(book2.getLanguage(), "eng");
|
||||
EXPECT_EQ(book2.getLanguages(), Langs{ "eng" });
|
||||
EXPECT_EQ(book2.getCommaSeparatedLanguages(), "eng");
|
||||
EXPECT_EQ(book2.getDate(), "2018-07-23");
|
||||
EXPECT_EQ(book2.getDescription(), "Ideas worth spreading");
|
||||
EXPECT_EQ(book2.getCreator(), "TED");
|
||||
|
@ -344,7 +348,7 @@ TEST_F(LibraryTest, sanityCheck)
|
|||
{
|
||||
EXPECT_EQ(lib.getBookCount(true, true), 12U);
|
||||
EXPECT_EQ(lib.getBooksLanguages(),
|
||||
std::vector<std::string>({"deu", "eng", "fra"})
|
||||
std::vector<std::string>({"deu", "eng", "fra", "ita", "spa"})
|
||||
);
|
||||
EXPECT_EQ(lib.getBooksCreators(), std::vector<std::string>({
|
||||
"Islam Stack Exchange",
|
||||
|
|
|
@ -140,7 +140,7 @@ std::string maskVariableOPDSFeedData(std::string s)
|
|||
"raycharles_uncategorized",\
|
||||
"Ray (uncategorized) Charles",\
|
||||
"No category is assigned to this library entry.",\
|
||||
"rus",\
|
||||
"rus,eng",\
|
||||
"wikipedia_ru_ray_charles",\
|
||||
"",\
|
||||
"public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\
|
||||
|
@ -327,10 +327,11 @@ TEST_F(LibraryServerTest, catalog_search_by_language)
|
|||
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
|
||||
" <title>Filtered zims (lang=eng)</title>\n"
|
||||
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
|
||||
" <totalResults>1</totalResults>\n"
|
||||
" <totalResults>2</totalResults>\n"
|
||||
" <startIndex>0</startIndex>\n"
|
||||
" <itemsPerPage>1</itemsPerPage>\n"
|
||||
" <itemsPerPage>2</itemsPerPage>\n"
|
||||
CATALOG_LINK_TAGS
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
"</feed>\n"
|
||||
);
|
||||
|
@ -344,12 +345,13 @@ TEST_F(LibraryServerTest, catalog_search_by_language)
|
|||
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
|
||||
" <title>Filtered zims (lang=eng%2Cfra)</title>\n"
|
||||
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
|
||||
" <totalResults>2</totalResults>\n"
|
||||
" <totalResults>3</totalResults>\n"
|
||||
" <startIndex>0</startIndex>\n"
|
||||
" <itemsPerPage>2</itemsPerPage>\n"
|
||||
" <itemsPerPage>3</itemsPerPage>\n"
|
||||
CATALOG_LINK_TAGS
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
CHARLES_RAY_CATALOG_ENTRY
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
"</feed>\n"
|
||||
);
|
||||
}
|
||||
|
@ -582,7 +584,7 @@ TEST_F(LibraryServerTest, catalog_v2_languages)
|
|||
<entry>
|
||||
<title>English</title>
|
||||
<dc:language>eng</dc:language>
|
||||
<thr:count>1</thr:count>
|
||||
<thr:count>2</thr:count>
|
||||
<link rel="subsection"
|
||||
href="/ROOT%23%3F/catalog/v2/entries?lang=eng"
|
||||
type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
|
||||
|
@ -764,9 +766,10 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language)
|
|||
CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng")
|
||||
" <title>Filtered Entries (lang=eng)</title>\n"
|
||||
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
|
||||
" <totalResults>1</totalResults>\n"
|
||||
" <totalResults>2</totalResults>\n"
|
||||
" <startIndex>0</startIndex>\n"
|
||||
" <itemsPerPage>1</itemsPerPage>\n"
|
||||
" <itemsPerPage>2</itemsPerPage>\n"
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
"</feed>\n"
|
||||
);
|
||||
|
@ -779,11 +782,12 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_language)
|
|||
CATALOG_V2_ENTRIES_PREAMBLE("?lang=eng%2Cfra")
|
||||
" <title>Filtered Entries (lang=eng%2Cfra)</title>\n"
|
||||
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
|
||||
" <totalResults>2</totalResults>\n"
|
||||
" <totalResults>3</totalResults>\n"
|
||||
" <startIndex>0</startIndex>\n"
|
||||
" <itemsPerPage>2</itemsPerPage>\n"
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
" <itemsPerPage>3</itemsPerPage>\n"
|
||||
CHARLES_RAY_CATALOG_ENTRY
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
"</feed>\n"
|
||||
);
|
||||
}
|
||||
|
@ -874,8 +878,8 @@ TEST_F(LibraryServerTest, catalog_search_includes_public_tags)
|
|||
// prefix search works on tag names
|
||||
EXPECT_SEARCH_RESULTS("public_tag",
|
||||
2,
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
);
|
||||
|
||||
EXPECT_SEARCH_RESULTS("value_of_a_public_tag",
|
||||
|
|
|
@ -57,7 +57,7 @@ TEST(ManagerTest, readXml)
|
|||
EXPECT_EQ("https://example.com/zimfiles/unittest.zim", book.getUrl());
|
||||
EXPECT_EQ("Unit Test", book.getTitle());
|
||||
EXPECT_EQ("Wikipedia articles about unit testing", book.getDescription());
|
||||
EXPECT_EQ("eng", book.getLanguage());
|
||||
EXPECT_EQ("eng", book.getCommaSeparatedLanguages());
|
||||
EXPECT_EQ("Wikipedia", book.getCreator());
|
||||
EXPECT_EQ("Kiwix", book.getPublisher());
|
||||
EXPECT_EQ("2020-03-31", book.getDate());
|
||||
|
|
Loading…
Reference in New Issue