Catalog filtering by publisher works via Xapian

This commit is contained in:
Veloman Yunkan 2021-04-12 13:05:44 +04:00
parent 7ccd9ffcce
commit a759ab989f
3 changed files with 56 additions and 6 deletions

View File

@ -120,6 +120,9 @@ class Filter {
bool hasLang() const; bool hasLang() const;
const std::string& getLang() const { return _lang; } const std::string& getLang() const { return _lang; }
bool hasPublisher() const;
const std::string& getPublisher() const { return _publisher; }
private: private:
friend class Library; friend class Library;

View File

@ -43,7 +43,7 @@ std::string iso639_3ToXapian(const std::string& lang) {
return icu::Locale(lang.c_str()).getLanguage(); return icu::Locale(lang.c_str()).getLanguage();
}; };
std::string normalizeText(const std::string& text, const std::string& language) std::string normalizeText(const std::string& text)
{ {
return removeAccents(text); return removeAccents(text);
} }
@ -276,15 +276,17 @@ void Library::updateBookDB(const Book& book)
Xapian::Document doc; Xapian::Document doc;
indexer.set_document(doc); indexer.set_document(doc);
const std::string title = normalizeText(book.getTitle(), lang); const std::string title = normalizeText(book.getTitle());
const std::string desc = normalizeText(book.getDescription(), lang); const std::string desc = normalizeText(book.getDescription());
const std::string name = book.getName(); // this is supposed to be normalized const std::string name = book.getName(); // this is supposed to be normalized
const std::string category = book.getCategory(); // this is supposed to be normalized const std::string category = book.getCategory(); // this is supposed to be normalized
const std::string publisher = normalizeText(book.getPublisher());
doc.add_value(0, title); doc.add_value(0, title);
doc.add_value(1, desc); doc.add_value(1, desc);
doc.add_value(2, name); doc.add_value(2, name);
doc.add_value(3, category); doc.add_value(3, category);
doc.add_value(4, lang); doc.add_value(4, lang);
doc.add_value(5, publisher);
doc.set_data(book.getId()); doc.set_data(book.getId());
indexer.index_text(title, 1, "S"); indexer.index_text(title, 1, "S");
@ -292,6 +294,7 @@ void Library::updateBookDB(const Book& book)
indexer.index_text(name, 1, "XN"); indexer.index_text(name, 1, "XN");
indexer.index_text(category, 1, "XC"); indexer.index_text(category, 1, "XC");
indexer.index_text(lang, 1, "L"); indexer.index_text(lang, 1, "L");
indexer.index_text(publisher, 1, "XP");
// Index fields without prefixes for general search // Index fields without prefixes for general search
indexer.index_text(title); indexer.index_text(title);
@ -327,6 +330,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter)
queryParser.add_prefix("name", "XN"); queryParser.add_prefix("name", "XN");
queryParser.add_prefix("category", "XC"); queryParser.add_prefix("category", "XC");
queryParser.add_prefix("lang", "L"); queryParser.add_prefix("lang", "L");
queryParser.add_prefix("publisher", "XP");
const auto partialQueryFlag = filter.queryIsPartial() const auto partialQueryFlag = filter.queryIsPartial()
? Xapian::QueryParser::FLAG_PARTIAL ? Xapian::QueryParser::FLAG_PARTIAL
: 0; : 0;
@ -357,6 +361,14 @@ Xapian::Query langQuery(const std::string& lang)
return Xapian::Query("L" + lang); return Xapian::Query("L" + lang);
} }
Xapian::Query publisherQuery(const std::string& publisher)
{
Xapian::QueryParser queryParser;
queryParser.set_default_op(Xapian::Query::OP_PHRASE);
const auto flags = 0;
return queryParser.parse_query(normalizeText(publisher), flags, "XP");
}
Xapian::Query buildXapianQuery(const Filter& filter) Xapian::Query buildXapianQuery(const Filter& filter)
{ {
auto q = buildXapianQueryFromFilterQuery(filter); auto q = buildXapianQueryFromFilterQuery(filter);
@ -369,6 +381,9 @@ Xapian::Query buildXapianQuery(const Filter& filter)
if ( filter.hasLang() ) { if ( filter.hasLang() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang())); q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang()));
} }
if ( filter.hasPublisher() ) {
q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher()));
}
return q; return q;
} }
@ -676,6 +691,11 @@ bool Filter::hasLang() const
return ACTIVE(LANG); return ACTIVE(LANG);
} }
bool Filter::hasPublisher() const
{
return ACTIVE(_PUBLISHER);
}
bool Filter::accept(const Book& book) const bool Filter::accept(const Book& book) const
{ {
auto local = !book.getPath().empty(); auto local = !book.getPath().empty();
@ -691,7 +711,6 @@ bool Filter::accept(const Book& book) const
FILTER(_NOREMOTE, !remote) FILTER(_NOREMOTE, !remote)
FILTER(MAXSIZE, book.getSize() <= _maxSize) FILTER(MAXSIZE, book.getSize() <= _maxSize)
FILTER(_PUBLISHER, book.getPublisher() == _publisher)
FILTER(_CREATOR, book.getCreator() == _creator) FILTER(_CREATOR, book.getCreator() == _creator)
if (ACTIVE(ACCEPTTAGS)) { if (ACTIVE(ACCEPTTAGS)) {

View File

@ -206,7 +206,7 @@ const char sampleLibraryXML[] = R"(
description="An eXaMpLe book added to the catalog via XML" description="An eXaMpLe book added to the catalog via XML"
language="deu" language="deu"
creator="Wikibooks" creator="Wikibooks"
publisher="Kiwix" publisher="Kiwix Enthusiasts"
date="2021-04-11" date="2021-04-11"
name="wikibooks_de" name="wikibooks_de"
tags="unittest;wikibooks;_category:wikibooks" tags="unittest;wikibooks;_category:wikibooks"
@ -277,7 +277,7 @@ TEST_F(LibraryTest, sanityCheck)
EXPECT_EQ(lib.getBookCount(true, true), 12U); EXPECT_EQ(lib.getBookCount(true, true), 12U);
EXPECT_EQ(lib.getBooksLanguages().size(), 3U); EXPECT_EQ(lib.getBooksLanguages().size(), 3U);
EXPECT_EQ(lib.getBooksCreators().size(), 9U); EXPECT_EQ(lib.getBooksCreators().size(), 9U);
EXPECT_EQ(lib.getBooksPublishers().size(), 2U); EXPECT_EQ(lib.getBooksPublishers().size(), 3U);
} }
TEST_F(LibraryTest, categoryHandling) TEST_F(LibraryTest, categoryHandling)
@ -492,6 +492,34 @@ TEST_F(LibraryTest, filterByPublisher)
"An example ZIM archive", "An example ZIM archive",
"Ray Charles" "Ray Charles"
); );
// filtering by publisher requires full match of the search term
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwi"),
/* no results */
);
// filtering by publisher requires a full phrase match
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix Enthusiasts"),
"An example ZIM archive"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Enthusiasts Kiwix"),
/* no results */
);
// filtering by publisher is case and diacritics insensitive
EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("kîWIx"),
"An example ZIM archive",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("publisher:kiwix"),
"An example ZIM archive",
"Ray Charles"
);
EXPECT_FILTER_RESULTS(kiwix::Filter().query("kiwix"),
/* no results */
);
} }
TEST_F(LibraryTest, filterByName) TEST_F(LibraryTest, filterByName)