Merge pull request #553 from kiwix/catalog_languages_endpoint

This commit is contained in:
Matthieu Gautier 2021-08-03 11:41:31 +02:00 committed by GitHub
commit b4f7dfa5a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 233 additions and 60 deletions

View File

@ -154,6 +154,7 @@ class Library
public: public:
typedef std::vector<std::string> BookIdCollection; typedef std::vector<std::string> BookIdCollection;
typedef std::map<std::string, int> AttributeCounts;
public: public:
Library(); Library();
@ -242,6 +243,13 @@ class Library
*/ */
std::vector<std::string> getBooksLanguages() const; std::vector<std::string> getBooksLanguages() const;
/**
* Get all languagues of the books in the library with counts.
*
* @return A list of languages with the count of books in each language.
*/
AttributeCounts getBooksLanguagesWithCounts() const;
/** /**
* Get all categories of the books in the library. * Get all categories of the books in the library.
* *
@ -341,7 +349,12 @@ class Library
friend class OPDSDumper; friend class OPDSDumper;
friend class libXMLDumper; friend class libXMLDumper;
private: // types
typedef const std::string& (Book::*BookStrPropMemFn)() const;
private: // functions private: // functions
AttributeCounts getBookAttributeCounts(BookStrPropMemFn p) const;
std::vector<std::string> getBookPropValueSet(BookStrPropMemFn p) const;
BookIdCollection filterViaBookDB(const Filter& filter) const; BookIdCollection filterViaBookDB(const Filter& filter) const;
void updateBookDB(const Book& book); void updateBookDB(const Book& book);
}; };

View File

@ -66,10 +66,16 @@ class OPDSDumper
/** /**
* Dump the categories OPDS feed. * Dump the categories OPDS feed.
* *
* @param categories list of category names
* @return The OPDS feed. * @return The OPDS feed.
*/ */
std::string categoriesOPDSFeed(const std::vector<std::string>& categories) const; std::string categoriesOPDSFeed() const;
/**
* Dump the languages OPDS feed.
*
* @return The OPDS feed.
*/
std::string languagesOPDSFeed() const;
/** /**
* Set the id of the library. * Set the id of the library.

View File

@ -208,23 +208,36 @@ bool Library::writeBookmarksToFile(const std::string& path) const
return writeTextFile(path, dumper.dumpLibXMLBookmark()); return writeTextFile(path, dumper.dumpLibXMLBookmark());
} }
Library::AttributeCounts Library::getBookAttributeCounts(BookStrPropMemFn p) const
{
AttributeCounts propValueCounts;
for (const auto& pair: m_books) {
const auto& book = pair.second;
if (book.getOrigId().empty()) {
propValueCounts[(book.*p)()] += 1;
}
}
return propValueCounts;
}
std::vector<std::string> Library::getBookPropValueSet(BookStrPropMemFn p) const
{
std::vector<std::string> result;
for ( const auto& kv : getBookAttributeCounts(p) ) {
result.push_back(kv.first);
}
return result;
}
std::vector<std::string> Library::getBooksLanguages() const std::vector<std::string> Library::getBooksLanguages() const
{ {
std::vector<std::string> booksLanguages; return getBookPropValueSet(&Book::getLanguage);
std::map<std::string, bool> booksLanguagesMap; }
for (auto& pair: m_books) { Library::AttributeCounts Library::getBooksLanguagesWithCounts() const
auto& book = pair.second; {
auto& language = book.getLanguage(); return getBookAttributeCounts(&Book::getLanguage);
if (booksLanguagesMap.find(language) == booksLanguagesMap.end()) {
if (book.getOrigId().empty()) {
booksLanguagesMap[language] = true;
booksLanguages.push_back(language);
}
}
}
return booksLanguages;
} }
std::vector<std::string> Library::getBooksCategories() const std::vector<std::string> Library::getBooksCategories() const
@ -244,40 +257,12 @@ std::vector<std::string> Library::getBooksCategories() const
std::vector<std::string> Library::getBooksCreators() const std::vector<std::string> Library::getBooksCreators() const
{ {
std::vector<std::string> booksCreators; return getBookPropValueSet(&Book::getCreator);
std::map<std::string, bool> booksCreatorsMap;
for (auto& pair: m_books) {
auto& book = pair.second;
auto& creator = book.getCreator();
if (booksCreatorsMap.find(creator) == booksCreatorsMap.end()) {
if (book.getOrigId().empty()) {
booksCreatorsMap[creator] = true;
booksCreators.push_back(creator);
}
}
}
return booksCreators;
} }
std::vector<std::string> Library::getBooksPublishers() const std::vector<std::string> Library::getBooksPublishers() const
{ {
std::vector<std::string> booksPublishers; return getBookPropValueSet(&Book::getPublisher);
std::map<std::string, bool> booksPublishersMap;
for (auto& pair:m_books) {
auto& book = pair.second;
auto& publisher = book.getPublisher();
if (booksPublishersMap.find(publisher) == booksPublishersMap.end()) {
if (book.getOrigId().empty()) {
booksPublishersMap[publisher] = true;
booksPublishers.push_back(publisher);
}
}
}
return booksPublishers;
} }
const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks) const const std::vector<kiwix::Bookmark> Library::getBookmarks(bool onlyValidBookmarks) const

View File

@ -22,6 +22,7 @@
#include "kiwixlib-resources.h" #include "kiwixlib-resources.h"
#include <mustache.hpp> #include <mustache.hpp>
#include <unicode/locid.h>
#include "tools/stringTools.h" #include "tools/stringTools.h"
#include "tools/otherTools.h" #include "tools/otherTools.h"
@ -83,6 +84,15 @@ BookData getBookData(const Library* library, const std::vector<std::string>& boo
return bookData; return bookData;
} }
std::string getLanguageSelfName(const std::string& lang) {
const icu::Locale locale(lang.c_str());
icu::UnicodeString ustring;
locale.getDisplayLanguage(locale, ustring);
std::string result;
ustring.toUTF8String(result);
return result;
};
} // unnamed namespace } // unnamed namespace
string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds, const std::string& query) const
@ -121,11 +131,11 @@ string OPDSDumper::dumpOPDSFeedV2(const std::vector<std::string>& bookIds, const
return render_template(RESOURCE::templates::catalog_v2_entries_xml, template_data); return render_template(RESOURCE::templates::catalog_v2_entries_xml, template_data);
} }
std::string OPDSDumper::categoriesOPDSFeed(const std::vector<std::string>& categories) const std::string OPDSDumper::categoriesOPDSFeed() const
{ {
const auto now = gen_date_str(); const auto now = gen_date_str();
kainjow::mustache::list categoryData; kainjow::mustache::list categoryData;
for ( const auto& category : categories ) { for ( const auto& category : library->getBooksCategories() ) {
const auto urlencodedCategoryName = urlEncode(category); const auto urlencodedCategoryName = urlEncode(category);
categoryData.push_back(kainjow::mustache::object{ categoryData.push_back(kainjow::mustache::object{
{"name", category}, {"name", category},
@ -146,4 +156,32 @@ std::string OPDSDumper::categoriesOPDSFeed(const std::vector<std::string>& categ
); );
} }
std::string OPDSDumper::languagesOPDSFeed() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData;
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
const std::string languageCode = langAndBookCount.first;
const int bookCount = langAndBookCount.second;
const auto languageSelfName = getLanguageSelfName(languageCode);
languageData.push_back(kainjow::mustache::object{
{"lang_code", languageCode},
{"lang_self_name", languageSelfName},
{"book_count", to_string(bookCount)},
{"updated", now},
{"id", gen_uuid(libraryId + "/languages/" + languageCode)}
});
}
return render_template(
RESOURCE::templates::catalog_v2_languages_xml,
kainjow::mustache::object{
{"date", now},
{"endpoint_root", rootLocation + "/catalog/v2"},
{"feed_id", gen_uuid(libraryId + "/languages")},
{"languages", languageData }
}
);
}
} }

View File

@ -77,6 +77,7 @@ class InternalServer {
std::unique_ptr<Response> handle_catalog_v2_root(const RequestContext& request); std::unique_ptr<Response> handle_catalog_v2_root(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_entries(const RequestContext& request); std::unique_ptr<Response> handle_catalog_v2_entries(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_categories(const RequestContext& request); std::unique_ptr<Response> handle_catalog_v2_categories(const RequestContext& request);
std::unique_ptr<Response> handle_catalog_v2_languages(const RequestContext& request);
std::unique_ptr<Response> handle_meta(const RequestContext& request); std::unique_ptr<Response> handle_meta(const RequestContext& request);
std::unique_ptr<Response> handle_search(const RequestContext& request); std::unique_ptr<Response> handle_search(const RequestContext& request);
std::unique_ptr<Response> handle_suggest(const RequestContext& request); std::unique_ptr<Response> handle_suggest(const RequestContext& request);

View File

@ -59,6 +59,8 @@ std::unique_ptr<Response> InternalServer::handle_catalog_v2(const RequestContext
return handle_catalog_v2_entries(request); return handle_catalog_v2_entries(request);
} else if (url == "categories") { } else if (url == "categories") {
return handle_catalog_v2_categories(request); return handle_catalog_v2_categories(request);
} else if (url == "languages") {
return handle_catalog_v2_languages(request);
} else { } else {
return Response::build_404(*this, request, "", ""); return Response::build_404(*this, request, "", "");
} }
@ -74,7 +76,8 @@ std::unique_ptr<Response> InternalServer::handle_catalog_v2_root(const RequestCo
{"endpoint_root", m_root + "/catalog/v2"}, {"endpoint_root", m_root + "/catalog/v2"},
{"feed_id", gen_uuid(m_library_id)}, {"feed_id", gen_uuid(m_library_id)},
{"all_entries_feed_id", gen_uuid(m_library_id + "/entries")}, {"all_entries_feed_id", gen_uuid(m_library_id + "/entries")},
{"category_list_feed_id", gen_uuid(m_library_id + "/categories")} {"category_list_feed_id", gen_uuid(m_library_id + "/categories")},
{"language_list_feed_id", gen_uuid(m_library_id + "/languages")}
}, },
"application/atom+xml;profile=opds-catalog;kind=navigation" "application/atom+xml;profile=opds-catalog;kind=navigation"
); );
@ -101,7 +104,19 @@ std::unique_ptr<Response> InternalServer::handle_catalog_v2_categories(const Req
opdsDumper.setLibraryId(m_library_id); opdsDumper.setLibraryId(m_library_id);
return ContentResponse::build( return ContentResponse::build(
*this, *this,
opdsDumper.categoriesOPDSFeed(mp_library->getBooksCategories()), opdsDumper.categoriesOPDSFeed(),
"application/atom+xml;profile=opds-catalog;kind=navigation"
);
}
std::unique_ptr<Response> InternalServer::handle_catalog_v2_languages(const RequestContext& request)
{
OPDSDumper opdsDumper(mp_library);
opdsDumper.setRootLocation(m_root);
opdsDumper.setLibraryId(m_library_id);
return ContentResponse::build(
*this,
opdsDumper.languagesOPDSFeed(),
"application/atom+xml;profile=opds-catalog;kind=navigation" "application/atom+xml;profile=opds-catalog;kind=navigation"
); );
} }

View File

@ -48,5 +48,6 @@ templates/catalog_entries.xml
templates/catalog_v2_root.xml templates/catalog_v2_root.xml
templates/catalog_v2_entries.xml templates/catalog_v2_entries.xml
templates/catalog_v2_categories.xml templates/catalog_v2_categories.xml
templates/catalog_v2_languages.xml
opensearchdescription.xml opensearchdescription.xml
catalog_v2_searchdescription.xml catalog_v2_searchdescription.xml

View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:dc="http://purl.org/dc/terms/"
xmlns:opds="https://specs.opds.io/opds-1.2">
<id>{{feed_id}}</id>
<link rel="self"
href="{{endpoint_root}}/languages"
type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link rel="start"
href="{{endpoint_root}}/root.xml"
type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<title>List of languages</title>
<updated>{{date}}</updated>
{{#languages}}
<entry>
<title>{{lang_self_name}}</title>
<dc:language>{{{lang_code}}}</dc:language>
<thr:count>{{book_count}}</thr:count>
<link rel="subsection"
href="{{endpoint_root}}/entries?lang={{{lang_code}}}"
type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<updated>{{updated}}</updated>
<id>{{id}}</id>
</entry>
{{/languages}}
</feed>

View File

@ -32,4 +32,13 @@
<id>{{category_list_feed_id}}</id> <id>{{category_list_feed_id}}</id>
<content type="text">List of all categories in this catalog.</content> <content type="text">List of all categories in this catalog.</content>
</entry> </entry>
<entry>
<title>List of languages</title>
<link rel="subsection"
href="{{endpoint_root}}/languages"
type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<updated>{{date}}</updated>
<id>{{language_list_feed_id}}</id>
<content type="text">List of all languages in this catalog.</content>
</entry>
</feed> </feed>

View File

@ -21,11 +21,11 @@
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
title="Ray (uncategorized) Charles" title="Ray (uncategorized) Charles"
description="No category is assigned to this library entry." description="No category is assigned to this library entry."
language="eng" language="rus"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"
date="2020-03-31" date="2020-03-31"
name="wikipedia_en_ray_charles" name="wikipedia_ru_ray_charles"
tags="unittest;wikipedia;_pictures:no;_videos:no;_details:no" tags="unittest;wikipedia;_pictures:no;_videos:no;_details:no"
articleCount="284" articleCount="284"
mediaCount="2" mediaCount="2"
@ -37,11 +37,11 @@
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim"
title="Charles, Ray" title="Charles, Ray"
description="Wikipedia articles about Ray Charles" description="Wikipedia articles about Ray Charles"
language="eng" language="fra"
creator="Wikipedia" creator="Wikipedia"
publisher="Kiwix" publisher="Kiwix"
date="2020-03-31" date="2020-03-31"
name="wikipedia_en_ray_charles" name="wikipedia_fr_ray_charles"
tags="unittest;wikipedia;_category:jazz;_pictures:no;_videos:no;_details:no;_ftindex:yes" tags="unittest;wikipedia;_category:jazz;_pictures:no;_videos:no;_details:no;_ftindex:yes"
articleCount="284" articleCount="284"
mediaCount="2" mediaCount="2"

View File

@ -275,9 +275,25 @@ TEST_F(LibraryTest, getBookMarksTest)
TEST_F(LibraryTest, sanityCheck) TEST_F(LibraryTest, sanityCheck)
{ {
EXPECT_EQ(lib.getBookCount(true, true), 12U); EXPECT_EQ(lib.getBookCount(true, true), 12U);
EXPECT_EQ(lib.getBooksLanguages().size(), 3U); EXPECT_EQ(lib.getBooksLanguages(),
EXPECT_EQ(lib.getBooksCreators().size(), 9U); std::vector<std::string>({"deu", "eng", "fra"})
EXPECT_EQ(lib.getBooksPublishers().size(), 3U); );
EXPECT_EQ(lib.getBooksCreators(), std::vector<std::string>({
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange",
"TED",
"Tania Louis",
"Wiki",
"Wikibooks",
"Wikipedia",
"Wikiquote"
}));
EXPECT_EQ(lib.getBooksPublishers(), std::vector<std::string>({
"",
"Kiwix",
"Kiwix & Some Enthusiasts"
}));
} }
TEST_F(LibraryTest, categoryHandling) TEST_F(LibraryTest, categoryHandling)

View File

@ -618,9 +618,9 @@ std::string maskVariableOPDSFeedData(std::string s)
" <id>urn:uuid:charlesray</id>\n" \ " <id>urn:uuid:charlesray</id>\n" \
" <title>Charles, Ray</title>\n" \ " <title>Charles, Ray</title>\n" \
" <summary>Wikipedia articles about Ray Charles</summary>\n" \ " <summary>Wikipedia articles about Ray Charles</summary>\n" \
" <language>eng</language>\n" \ " <language>fra</language>\n" \
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" \ " <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" \
" <name>wikipedia_en_ray_charles</name>\n" \ " <name>wikipedia_fr_ray_charles</name>\n" \
" <flavour></flavour>\n" \ " <flavour></flavour>\n" \
" <category>jazz</category>\n" \ " <category>jazz</category>\n" \
" <tags>unittest;wikipedia;_category:jazz;_pictures:no;_videos:no;_details:no;_ftindex:yes</tags>\n" \ " <tags>unittest;wikipedia;_category:jazz;_pictures:no;_videos:no;_details:no;_ftindex:yes</tags>\n" \
@ -666,9 +666,9 @@ std::string maskVariableOPDSFeedData(std::string s)
" <id>urn:uuid:raycharles_uncategorized</id>\n" \ " <id>urn:uuid:raycharles_uncategorized</id>\n" \
" <title>Ray (uncategorized) Charles</title>\n" \ " <title>Ray (uncategorized) Charles</title>\n" \
" <summary>No category is assigned to this library entry.</summary>\n" \ " <summary>No category is assigned to this library entry.</summary>\n" \
" <language>eng</language>\n" \ " <language>rus</language>\n" \
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" \ " <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n" \
" <name>wikipedia_en_ray_charles</name>\n" \ " <name>wikipedia_ru_ray_charles</name>\n" \
" <flavour></flavour>\n" \ " <flavour></flavour>\n" \
" <category></category>\n" \ " <category></category>\n" \
" <tags>unittest;wikipedia;_pictures:no;_videos:no;_details:no</tags>\n" \ " <tags>unittest;wikipedia;_pictures:no;_videos:no;_details:no</tags>\n" \
@ -942,6 +942,15 @@ TEST_F(LibraryServerTest, catalog_v2_root)
<id>12345678-90ab-cdef-1234-567890abcdef</id> <id>12345678-90ab-cdef-1234-567890abcdef</id>
<content type="text">List of all categories in this catalog.</content> <content type="text">List of all categories in this catalog.</content>
</entry> </entry>
<entry>
<title>List of languages</title>
<link rel="subsection"
href="/catalog/v2/languages"
type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<updated>YYYY-MM-DDThh:mm:ssZ</updated>
<id>12345678-90ab-cdef-1234-567890abcdef</id>
<content type="text">List of all languages in this catalog.</content>
</entry>
</feed> </feed>
)"; )";
EXPECT_EQ(maskVariableOPDSFeedData(r->body), expected_output); EXPECT_EQ(maskVariableOPDSFeedData(r->body), expected_output);
@ -1005,6 +1014,59 @@ TEST_F(LibraryServerTest, catalog_v2_categories)
EXPECT_EQ(maskVariableOPDSFeedData(r->body), expected_output); EXPECT_EQ(maskVariableOPDSFeedData(r->body), expected_output);
} }
TEST_F(LibraryServerTest, catalog_v2_languages)
{
const auto r = zfs1_->GET("/catalog/v2/languages");
EXPECT_EQ(r->status, 200);
const char expected_output[] = R"(<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom"
xmlns:dc="http://purl.org/dc/terms/"
xmlns:opds="https://specs.opds.io/opds-1.2">
<id>12345678-90ab-cdef-1234-567890abcdef</id>
<link rel="self"
href="/catalog/v2/languages"
type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<link rel="start"
href="/catalog/v2/root.xml"
type="application/atom+xml;profile=opds-catalog;kind=navigation"/>
<title>List of languages</title>
<updated>YYYY-MM-DDThh:mm:ssZ</updated>
<entry>
<title>English</title>
<dc:language>eng</dc:language>
<thr:count>1</thr:count>
<link rel="subsection"
href="/catalog/v2/entries?lang=eng"
type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<updated>YYYY-MM-DDThh:mm:ssZ</updated>
<id>12345678-90ab-cdef-1234-567890abcdef</id>
</entry>
<entry>
<title>français</title>
<dc:language>fra</dc:language>
<thr:count>1</thr:count>
<link rel="subsection"
href="/catalog/v2/entries?lang=fra"
type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<updated>YYYY-MM-DDThh:mm:ssZ</updated>
<id>12345678-90ab-cdef-1234-567890abcdef</id>
</entry>
<entry>
<title>русский</title>
<dc:language>rus</dc:language>
<thr:count>1</thr:count>
<link rel="subsection"
href="/catalog/v2/entries?lang=rus"
type="application/atom+xml;profile=opds-catalog;kind=acquisition"/>
<updated>YYYY-MM-DDThh:mm:ssZ</updated>
<id>12345678-90ab-cdef-1234-567890abcdef</id>
</entry>
</feed>
)";
EXPECT_EQ(maskVariableOPDSFeedData(r->body), expected_output);
}
#define CATALOG_V2_ENTRIES_PREAMBLE(q) \ #define CATALOG_V2_ENTRIES_PREAMBLE(q) \
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" \
"<feed xmlns=\"http://www.w3.org/2005/Atom\"\n" \ "<feed xmlns=\"http://www.w3.org/2005/Atom\"\n" \