Merge pull request #488 from kiwix/fully_xapian_powered_catalog_search

This commit is contained in:
Matthieu Gautier 2021-04-27 15:10:40 +02:00 committed by GitHub
commit 7336dcab1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 570 additions and 106 deletions

View File

@ -35,6 +35,7 @@ namespace kiwix
{
class OPDSDumper;
class Library;
enum supportedListSortBy { UNSORTED, TITLE, SIZE, DATE, CREATOR, PUBLISHER };
enum supportedListMode {
@ -48,10 +49,13 @@ enum supportedListMode {
};
class Filter {
private:
public: // types
using Tags = std::vector<std::string>;
private: // data
uint64_t activeFilters;
std::vector<std::string> _acceptTags;
std::vector<std::string> _rejectTags;
Tags _acceptTags;
Tags _rejectTags;
std::string _category;
std::string _lang;
std::string _publisher;
@ -61,7 +65,7 @@ class Filter {
bool _queryIsPartial;
std::string _name;
public:
public: // functions
Filter();
~Filter() = default;
@ -95,8 +99,8 @@ class Filter {
/**
* Set the filter to only accept book with corresponding tag.
*/
Filter& acceptTags(std::vector<std::string> tags);
Filter& rejectTags(std::vector<std::string> tags);
Filter& acceptTags(const Tags& tags);
Filter& rejectTags(const Tags& tags);
Filter& category(std::string category);
Filter& lang(std::string lang);
@ -110,9 +114,28 @@ class Filter {
const std::string& getQuery() const { return _query; }
bool queryIsPartial() const { return _queryIsPartial; }
bool hasName() const;
const std::string& getName() const { return _name; }
bool hasCategory() const;
const std::string& getCategory() const { return _category; }
bool hasLang() const;
const std::string& getLang() const { return _lang; }
bool hasPublisher() const;
const std::string& getPublisher() const { return _publisher; }
bool hasCreator() const;
const std::string& getCreator() const { return _creator; }
const Tags& getAcceptTags() const { return _acceptTags; }
const Tags& getRejectTags() const { return _rejectTags; }
private: // functions
friend class Library;
bool accept(const Book& book) const;
bool acceptByQueryOnly(const Book& book) const;
bool acceptByNonQueryCriteria(const Book& book) const;
};
@ -307,7 +330,7 @@ class Library
friend class libXMLDumper;
private: // functions
BookIdCollection getBooksByTitleOrDescription(const Filter& filter);
BookIdCollection filterViaBookDB(const Filter& filter);
void updateBookDB(const Book& book);
};

View File

@ -43,7 +43,7 @@ std::string iso639_3ToXapian(const std::string& lang) {
return icu::Locale(lang.c_str()).getLanguage();
};
std::string normalizeText(const std::string& text, const std::string& language)
std::string normalizeText(const std::string& text)
{
return removeAccents(text);
}
@ -276,35 +276,61 @@ void Library::updateBookDB(const Book& book)
Xapian::Document doc;
indexer.set_document(doc);
const std::string title = normalizeText(book.getTitle(), lang);
const std::string desc = normalizeText(book.getDescription(), lang);
doc.add_value(0, title);
doc.add_value(1, desc);
doc.set_data(book.getId());
const std::string title = normalizeText(book.getTitle());
const std::string desc = normalizeText(book.getDescription());
indexer.index_text(title, 1, "S");
indexer.index_text(desc, 1, "XD");
// Index fields without prefixes for general search
// Index title and description without prefixes for general search
indexer.index_text(title);
indexer.increase_termpos();
indexer.index_text(desc);
// Index all fields for field-based search
indexer.index_text(title, 1, "S");
indexer.index_text(desc, 1, "XD");
indexer.index_text(lang, 1, "L");
indexer.index_text(normalizeText(book.getCreator()), 1, "A");
indexer.index_text(normalizeText(book.getPublisher()), 1, "XP");
indexer.index_text(normalizeText(book.getName()), 1, "XN");
indexer.index_text(normalizeText(book.getCategory()), 1, "XC");
for ( const auto& tag : split(normalizeText(book.getTags()), ";") )
doc.add_boolean_term("XT" + tag);
const std::string idterm = "Q" + book.getId();
doc.add_boolean_term(idterm);
doc.set_data(book.getId());
m_bookDB->replace_document(idterm, doc);
}
Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& filter)
namespace
{
if ( !filter.hasQuery() )
return getBooksIds();
BookIdCollection bookIds;
bool willSelectEverything(const Xapian::Query& query)
{
return query.get_type() == Xapian::Query::LEAF_MATCH_ALL;
}
Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
{
if ( !filter.hasQuery() ) {
// This is a thread-safe way to construct an equivalent of
// a Xapian::Query::MatchAll query
return Xapian::Query(std::string());
}
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_AND);
queryParser.add_prefix("title", "S");
queryParser.add_prefix("description", "XD");
queryParser.add_prefix("name", "XN");
queryParser.add_prefix("category", "XC");
queryParser.add_prefix("lang", "L");
queryParser.add_prefix("publisher", "XP");
queryParser.add_prefix("creator", "A");
queryParser.add_prefix("tag", "XT");
const auto partialQueryFlag = filter.queryIsPartial()
? Xapian::QueryParser::FLAG_PARTIAL
: 0;
@ -314,10 +340,99 @@ Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& fi
//queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
const auto flags = Xapian::QueryParser::FLAG_PHRASE
| Xapian::QueryParser::FLAG_BOOLEAN
| Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
| Xapian::QueryParser::FLAG_LOVEHATE
| Xapian::QueryParser::FLAG_WILDCARD
| partialQueryFlag;
const auto query = queryParser.parse_query(filter.getQuery(), flags);
return queryParser.parse_query(normalizeText(filter.getQuery()), flags);
}
Xapian::Query nameQuery(const std::string& name)
{
return Xapian::Query("XN" + normalizeText(name));
}
Xapian::Query categoryQuery(const std::string& category)
{
return Xapian::Query("XC" + normalizeText(category));
}
Xapian::Query langQuery(const std::string& lang)
{
return Xapian::Query("L" + normalizeText(lang));
}
Xapian::Query publisherQuery(const std::string& publisher)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(publisher), flags, "XP");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query creatorQuery(const std::string& creator)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_OR);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
const auto flags = 0;
const auto q = queryParser.parse_query(normalizeText(creator), flags, "A");
return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length());
}
Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags)
{
Xapian::Query q = Xapian::Query(std::string());
if (!acceptTags.empty()) {
for ( const auto& tag : acceptTags )
q &= Xapian::Query("XT" + normalizeText(tag));
}
if (!rejectTags.empty()) {
for ( const auto& tag : rejectTags )
q = Xapian::Query(Xapian::Query::OP_AND_NOT, q, "XT" + normalizeText(tag));
}
return q;
}
Xapian::Query buildXapianQuery(const Filter& filter)
{
auto q = buildXapianQueryFromFilterQuery(filter);
if ( filter.hasName() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, nameQuery(filter.getName()));
}
if ( filter.hasCategory() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, categoryQuery(filter.getCategory()));
}
if ( filter.hasLang() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang()));
}
if ( filter.hasPublisher() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher()));
}
if ( filter.hasCreator() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, creatorQuery(filter.getCreator()));
}
if ( !filter.getAcceptTags().empty() || !filter.getRejectTags().empty() ) {
const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags());
q = Xapian::Query(Xapian::Query::OP_AND, q, tq);;
}
return q;
}
} // unnamed namespace
Library::BookIdCollection Library::filterViaBookDB(const Filter& filter)
{
const auto query = buildXapianQuery(filter);
if ( willSelectEverything(query) )
return getBooksIds();
BookIdCollection bookIds;
Xapian::Enquire enquire(*m_bookDB);
enquire.set_query(query);
const auto results = enquire.get_mset(0, m_books.size());
@ -331,8 +446,8 @@ Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& fi
Library::BookIdCollection Library::filter(const Filter& filter)
{
BookIdCollection result;
for(auto id : getBooksByTitleOrDescription(filter)) {
if(filter.acceptByNonQueryCriteria(m_books.at(id))) {
for(auto id : filterViaBookDB(filter)) {
if(filter.accept(m_books.at(id))) {
result.push_back(id);
}
}
@ -525,14 +640,14 @@ Filter& Filter::valid(bool accept)
return *this;
}
Filter& Filter::acceptTags(std::vector<std::string> tags)
Filter& Filter::acceptTags(const Tags& tags)
{
_acceptTags = tags;
activeFilters |= ACCEPTTAGS;
return *this;
}
Filter& Filter::rejectTags(std::vector<std::string> tags)
Filter& Filter::rejectTags(const Tags& tags)
{
_rejectTags = tags;
activeFilters |= REJECTTAGS;
@ -596,12 +711,32 @@ bool Filter::hasQuery() const
return ACTIVE(QUERY);
}
bool Filter::accept(const Book& book) const
bool Filter::hasName() const
{
return acceptByNonQueryCriteria(book) && acceptByQueryOnly(book);
return ACTIVE(NAME);
}
bool Filter::acceptByNonQueryCriteria(const Book& book) const
bool Filter::hasCategory() const
{
return ACTIVE(CATEGORY);
}
bool Filter::hasLang() const
{
return ACTIVE(LANG);
}
bool Filter::hasPublisher() const
{
return ACTIVE(_PUBLISHER);
}
bool Filter::hasCreator() const
{
return ACTIVE(_CREATOR);
}
bool Filter::accept(const Book& book) const
{
auto local = !book.getPath().empty();
FILTER(_LOCAL, local)
@ -616,46 +751,8 @@ bool Filter::acceptByNonQueryCriteria(const Book& book) const
FILTER(_NOREMOTE, !remote)
FILTER(MAXSIZE, book.getSize() <= _maxSize)
FILTER(CATEGORY, book.getCategory() == _category)
FILTER(LANG, book.getLanguage() == _lang)
FILTER(_PUBLISHER, book.getPublisher() == _publisher)
FILTER(_CREATOR, book.getCreator() == _creator)
FILTER(NAME, book.getName() == _name)
if (ACTIVE(ACCEPTTAGS)) {
if (!_acceptTags.empty()) {
auto vBookTags = split(book.getTags(), ";");
std::set<std::string> sBookTags(vBookTags.begin(), vBookTags.end());
for (auto& t: _acceptTags) {
if (sBookTags.find(t) == sBookTags.end()) {
return false;
}
}
}
}
if (ACTIVE(REJECTTAGS)) {
if (!_rejectTags.empty()) {
auto vBookTags = split(book.getTags(), ";");
std::set<std::string> sBookTags(vBookTags.begin(), vBookTags.end());
for (auto& t: _rejectTags) {
if (sBookTags.find(t) != sBookTags.end()) {
return false;
}
}
}
}
return true;
}
bool Filter::acceptByQueryOnly(const Book& book) const
{
if ( ACTIVE(QUERY)
&& !(matchRegex(book.getTitle(), "\\Q" + _query + "\\E")
|| matchRegex(book.getDescription(), "\\Q" + _query + "\\E")))
return false;
return true;
}
}

View File

@ -181,6 +181,42 @@ const char * sampleOpdsStream = R"(
)";
const char sampleLibraryXML[] = R"(
<library version="1.0">
<book
id="raycharles"
path="./zimfile.zim"
url="https://github.com/kiwix/kiwix-lib/raw/master/test/data/zimfile.zim"
title="Ray Charles"
description="Wikipedia articles about Ray Charles"
language="eng"
creator="Wikipedia"
publisher="Kiwix"
date="2020-03-31"
name="wikipedia_en_ray_charles"
tags="wikipedia;_category:wikipedia;_pictures:no"
articleCount="284"
mediaCount="2"
size="556"
></book>
<book
id="example"
path="./example.zim"
title="An example ZIM archive"
description="An eXaMpLe book added to the catalog via XML"
language="deu"
creator="Wikibooks"
publisher="Kiwix & Some Enthusiasts"
date="2021-04-11"
name="wikibooks_de"
tags="unittest;wikibooks;_category:wikibooks"
articleCount="12"
mediaCount="0"
size="126"
></book>
</library>
)";
#include "../include/library.h"
#include "../include/manager.h"
#include "../include/bookmark.h"
@ -190,9 +226,13 @@ namespace
class LibraryTest : public ::testing::Test {
protected:
typedef kiwix::Library::BookIdCollection BookIdCollection;
typedef std::vector<std::string> TitleCollection;
void SetUp() override {
kiwix::Manager manager(&lib);
manager.readOpds(sampleOpdsStream, "foo.urlHost");
manager.readXml(sampleLibraryXML, true, "./test/library.xml", true);
}
kiwix::Bookmark createBookmark(const std::string &id) {
@ -201,6 +241,15 @@ class LibraryTest : public ::testing::Test {
return bookmark;
};
TitleCollection ids2Titles(const BookIdCollection& ids) {
TitleCollection titles;
for ( const auto& bookId : ids ) {
titles.push_back(lib.getBookById(bookId).getTitle());
}
std::sort(titles.begin(), titles.end());
return titles;
}
kiwix::Library lib;
};
@ -225,10 +274,10 @@ TEST_F(LibraryTest, getBookMarksTest)
TEST_F(LibraryTest, sanityCheck)
{
EXPECT_EQ(lib.getBookCount(true, true), 10U);
EXPECT_EQ(lib.getBooksLanguages().size(), 2U);
EXPECT_EQ(lib.getBooksCreators().size(), 8U);
EXPECT_EQ(lib.getBooksPublishers().size(), 1U);
EXPECT_EQ(lib.getBookCount(true, true), 12U);
EXPECT_EQ(lib.getBooksLanguages().size(), 3U);
EXPECT_EQ(lib.getBooksCreators().size(), 9U);
EXPECT_EQ(lib.getBooksPublishers().size(), 3U);
}
TEST_F(LibraryTest, categoryHandling)
@ -240,35 +289,339 @@ TEST_F(LibraryTest, categoryHandling)
EXPECT_EQ("category_element_overrides_tags", lib.getBookById("14829621-c490-c376-0792-9de558b57efa").getCategory());
}
TEST_F(LibraryTest, filterCheck)
TEST_F(LibraryTest, emptyFilter)
{
auto bookIds = lib.filter(kiwix::Filter());
const auto bookIds = lib.filter(kiwix::Filter());
EXPECT_EQ(bookIds, lib.getBooksIds());
}
bookIds = lib.filter(kiwix::Filter().lang("eng"));
EXPECT_EQ(bookIds.size(), 5U);
#define EXPECT_FILTER_RESULTS(f, ...) \
EXPECT_EQ( \
ids2Titles(lib.filter(f)), \
TitleCollection({ __VA_ARGS__ }) \
)
bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexchange"}));
EXPECT_EQ(bookIds.size(), 3U);
TEST_F(LibraryTest, filterLocal)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().local(true),
"An example ZIM archive",
"Ray Charles"
);
bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"}));
EXPECT_EQ(bookIds.size(), 3U);
EXPECT_FILTER_RESULTS(kiwix::Filter().local(false),
"Encyclopédie de la Tunisie",
"Granblue Fantasy Wiki",
"Géographie par Wikipédia",
"Islam Stack Exchange",
"Mathématiques",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange",
"TED talks - Business",
"Tania Louis",
"Wikiquote"
);
}
bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia", "nopic"}));
EXPECT_EQ(bookIds.size(), 2U);
TEST_F(LibraryTest, filterRemote)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().remote(true),
"Encyclopédie de la Tunisie",
"Granblue Fantasy Wiki",
"Géographie par Wikipédia",
"Islam Stack Exchange",
"Mathématiques",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange",
"Ray Charles",
"TED talks - Business",
"Tania Louis",
"Wikiquote"
);
bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"}).rejectTags({"nopic"}));
EXPECT_EQ(bookIds.size(), 1U);
EXPECT_FILTER_RESULTS(kiwix::Filter().remote(false),
"An example ZIM archive"
);
}
bookIds = lib.filter(kiwix::Filter().query("folklore"));
EXPECT_EQ(bookIds.size(), 1U);
TEST_F(LibraryTest, filterByLanguage)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().lang("eng"),
"Granblue Fantasy Wiki",
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange",
"Ray Charles",
"TED talks - Business"
);
bookIds = lib.filter(kiwix::Filter().query("Wiki"));
EXPECT_EQ(bookIds.size(), 4U);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("lang:eng"),
"Granblue Fantasy Wiki",
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange",
"Ray Charles",
"TED talks - Business"
);
bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki"));
EXPECT_EQ(bookIds.size(), 1U);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("eng"),
/* no results */
);
}
TEST_F(LibraryTest, filterByTags)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"stackexchange"}),
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by tags is case and diacritics insensitive
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"ståckEXÇhange"}),
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by tags requires full match of the search term
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"stackexch"}),
/* no results */
);
// in tags with values (tag:value form) the value is an inseparable
// part of the tag
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"_category"}),
/* no results */
);
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"_category:category_defined_via_tags_only"}),
"Tania Louis"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia"}),
"Encyclopédie de la Tunisie",
"Géographie par Wikipédia",
"Mathématiques",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia", "nopic"}),
"Géographie par Wikipédia",
"Mathématiques"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia"}).rejectTags({"nopic"}),
"Encyclopédie de la Tunisie",
"Ray Charles"
);
}
TEST_F(LibraryTest, filterByQuery)
{
// filtering by query checks the title
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Exchange"),
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by query checks the description/summary
EXPECT_FILTER_RESULTS(kiwix::Filter().query("enthusiasts"),
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by query is case insensitive on titles
EXPECT_FILTER_RESULTS(kiwix::Filter().query("ExcHANge"),
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by query is diacritics insensitive on titles
EXPECT_FILTER_RESULTS(kiwix::Filter().query("mathematiques"),
"Mathématiques",
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("èxchângé"),
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by query is case insensitive on description/summary
EXPECT_FILTER_RESULTS(kiwix::Filter().query("enTHUSiaSTS"),
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by query is diacritics insensitive on description/summary
EXPECT_FILTER_RESULTS(kiwix::Filter().query("selection"),
"Géographie par Wikipédia"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("enthúsïåsts"),
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// by default, filtering by query assumes partial query
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki"),
"Encyclopédie de la Tunisie",
"Granblue Fantasy Wiki",
"Géographie par Wikipédia",
"Ray Charles",
"Wikiquote"
);
// partial query can be disabled
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki", false),
"Granblue Fantasy Wiki"
);
}
TEST_F(LibraryTest, filterByCreator)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Wikipedia"),
"Encyclopédie de la Tunisie",
"Géographie par Wikipédia",
"Mathématiques",
"Ray Charles"
);
// filtering by creator requires full match of the search term
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Wiki"),
"Granblue Fantasy Wiki"
);
// filtering by creator is case and diacritics insensitive
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("wIkï"),
"Granblue Fantasy Wiki"
);
// filtering by creator doesn't requires full match of the full creator name
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Stack"),
"Islam Stack Exchange",
"Movies & TV Stack Exchange",
"Mythology & Folklore Stack Exchange"
);
// filtering by creator requires a full phrase match (ignoring some non-word terms)
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies & TV Stack Exchange"),
"Movies & TV Stack Exchange"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies & TV"),
"Movies & TV Stack Exchange"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies TV"),
"Movies & TV Stack Exchange"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("TV & Movies"),
/* no results */
);
EXPECT_FILTER_RESULTS(kiwix::Filter().creator("TV Movies"),
/* no results */
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("creator:Wikipedia"),
"Encyclopédie de la Tunisie",
"Géographie par Wikipédia",
"Mathématiques",
"Ray Charles"
);
}
TEST_F(LibraryTest, filterByPublisher)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix"),
"An example ZIM archive",
"Ray Charles"
);
// filtering by publisher requires full match of the search term
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwi"),
/* no results */
);
// filtering by publisher requires a full phrase match
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix & Some Enthusiasts"),
"An example ZIM archive"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Some Enthusiasts & Kiwix"),
/* no results */
);
// filtering by publisher is case and diacritics insensitive
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("kîWIx"),
"An example ZIM archive",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("publisher:kiwix"),
"An example ZIM archive",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("kiwix"),
/* no results */
);
}
TEST_F(LibraryTest, filterByName)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikibooks_de"),
"An example ZIM archive"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("name:wikibooks_de"),
"An example ZIM archive"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("wikibooks_de"),
/* no results */
);
}
TEST_F(LibraryTest, filterByCategory)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().category("category_element_overrides_tags"),
"Géographie par Wikipédia",
"Mathématiques"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("category:category_element_overrides_tags"),
"Géographie par Wikipédia",
"Mathématiques"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("category_element_overrides_tags"),
/* no results */
);
}
TEST_F(LibraryTest, filterByMaxSize)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().maxSize(200000),
"An example ZIM archive"
);
}
TEST_F(LibraryTest, filterByMultipleCriteria)
{
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia"),
"Encyclopédie de la Tunisie",
"Géographie par Wikipédia",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia").maxSize(100000000UL),
"Encyclopédie de la Tunisie",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia").maxSize(100000000UL).local(false),
"Encyclopédie de la Tunisie"
);
}
TEST_F(LibraryTest, getBookByPath)
@ -284,33 +637,24 @@ TEST_F(LibraryTest, getBookByPath)
EXPECT_THROW(lib.getBookByPath("non/existant/path.zim"), std::out_of_range);
}
class XmlLibraryTest : public ::testing::Test {
protected:
void SetUp() override {
kiwix::Manager manager(&lib);
manager.readFile( "./test/library.xml", true, true);
}
kiwix::Library lib;
};
TEST_F(XmlLibraryTest, removeBookByIdRemovesTheBook)
TEST_F(LibraryTest, removeBookByIdRemovesTheBook)
{
EXPECT_EQ(3U, lib.getBookCount(true, true));
const auto initialBookCount = lib.getBookCount(true, true);
ASSERT_GT(initialBookCount, 0U);
EXPECT_NO_THROW(lib.getBookById("raycharles"));
lib.removeBookById("raycharles");
EXPECT_EQ(2U, lib.getBookCount(true, true));
EXPECT_EQ(initialBookCount - 1, lib.getBookCount(true, true));
EXPECT_THROW(lib.getBookById("raycharles"), std::out_of_range);
};
TEST_F(XmlLibraryTest, removeBookByIdDropsTheReader)
TEST_F(LibraryTest, removeBookByIdDropsTheReader)
{
EXPECT_NE(nullptr, lib.getReaderById("raycharles"));
lib.removeBookById("raycharles");
EXPECT_THROW(lib.getReaderById("raycharles"), std::out_of_range);
};
TEST_F(XmlLibraryTest, removeBookByIdUpdatesTheSearchDB)
TEST_F(LibraryTest, removeBookByIdUpdatesTheSearchDB)
{
kiwix::Filter f;
f.local(true).valid(true).query(R"(title:"ray charles")", false);