From a759ab989fed1b65603ab515a40abce429e9dac2 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 12 Apr 2021 13:05:44 +0400 Subject: [PATCH] Catalog filtering by publisher works via Xapian --- include/library.h | 3 +++ src/library.cpp | 27 +++++++++++++++++++++++---- test/library.cpp | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/include/library.h b/include/library.h index 6a2c79d23..66e8b7d2c 100644 --- a/include/library.h +++ b/include/library.h @@ -120,6 +120,9 @@ class Filter { bool hasLang() const; const std::string& getLang() const { return _lang; } + bool hasPublisher() const; + const std::string& getPublisher() const { return _publisher; } + private: friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 230206674..cff560fe2 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -43,7 +43,7 @@ std::string iso639_3ToXapian(const std::string& lang) { return icu::Locale(lang.c_str()).getLanguage(); }; -std::string normalizeText(const std::string& text, const std::string& language) +std::string normalizeText(const std::string& text) { return removeAccents(text); } @@ -276,15 +276,17 @@ void Library::updateBookDB(const Book& book) Xapian::Document doc; indexer.set_document(doc); - const std::string title = normalizeText(book.getTitle(), lang); - const std::string desc = normalizeText(book.getDescription(), lang); + const std::string title = normalizeText(book.getTitle()); + const std::string desc = normalizeText(book.getDescription()); const std::string name = book.getName(); // this is supposed to be normalized const std::string category = book.getCategory(); // this is supposed to be normalized + const std::string publisher = normalizeText(book.getPublisher()); doc.add_value(0, title); doc.add_value(1, desc); doc.add_value(2, name); doc.add_value(3, category); doc.add_value(4, lang); + doc.add_value(5, publisher); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); @@ -292,6 +294,7 @@ void Library::updateBookDB(const Book& book) indexer.index_text(name, 1, "XN"); indexer.index_text(category, 1, "XC"); indexer.index_text(lang, 1, "L"); + indexer.index_text(publisher, 1, "XP"); // Index fields without prefixes for general search indexer.index_text(title); @@ -327,6 +330,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("name", "XN"); queryParser.add_prefix("category", "XC"); queryParser.add_prefix("lang", "L"); + queryParser.add_prefix("publisher", "XP"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -357,6 +361,14 @@ Xapian::Query langQuery(const std::string& lang) return Xapian::Query("L" + lang); } +Xapian::Query publisherQuery(const std::string& publisher) +{ + Xapian::QueryParser queryParser; + queryParser.set_default_op(Xapian::Query::OP_PHRASE); + const auto flags = 0; + return queryParser.parse_query(normalizeText(publisher), flags, "XP"); +} + Xapian::Query buildXapianQuery(const Filter& filter) { auto q = buildXapianQueryFromFilterQuery(filter); @@ -369,6 +381,9 @@ Xapian::Query buildXapianQuery(const Filter& filter) if ( filter.hasLang() ) { q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang())); } + if ( filter.hasPublisher() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher())); + } return q; } @@ -676,6 +691,11 @@ bool Filter::hasLang() const return ACTIVE(LANG); } +bool Filter::hasPublisher() const +{ + return ACTIVE(_PUBLISHER); +} + bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); @@ -691,7 +711,6 @@ bool Filter::accept(const Book& book) const FILTER(_NOREMOTE, !remote) FILTER(MAXSIZE, book.getSize() <= _maxSize) - FILTER(_PUBLISHER, book.getPublisher() == _publisher) FILTER(_CREATOR, book.getCreator() == _creator) if (ACTIVE(ACCEPTTAGS)) { diff --git a/test/library.cpp b/test/library.cpp index 02314699e..d46891335 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -206,7 +206,7 @@ const char sampleLibraryXML[] = R"( description="An eXaMpLe book added to the catalog via XML" language="deu" creator="Wikibooks" - publisher="Kiwix" + publisher="Kiwix Enthusiasts" date="2021-04-11" name="wikibooks_de" tags="unittest;wikibooks;_category:wikibooks" @@ -277,7 +277,7 @@ TEST_F(LibraryTest, sanityCheck) EXPECT_EQ(lib.getBookCount(true, true), 12U); EXPECT_EQ(lib.getBooksLanguages().size(), 3U); EXPECT_EQ(lib.getBooksCreators().size(), 9U); - EXPECT_EQ(lib.getBooksPublishers().size(), 2U); + EXPECT_EQ(lib.getBooksPublishers().size(), 3U); } TEST_F(LibraryTest, categoryHandling) @@ -492,6 +492,34 @@ TEST_F(LibraryTest, filterByPublisher) "An example ZIM archive", "Ray Charles" ); + + // filtering by publisher requires full match of the search term + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwi"), + /* no results */ + ); + + // filtering by publisher requires a full phrase match + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix Enthusiasts"), + "An example ZIM archive" + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Enthusiasts Kiwix"), + /* no results */ + ); + + // filtering by publisher is case and diacritics insensitive + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("kîWIx"), + "An example ZIM archive", + "Ray Charles" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("publisher:kiwix"), + "An example ZIM archive", + "Ray Charles" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("kiwix"), + /* no results */ + ); } TEST_F(LibraryTest, filterByName)