From db3e0d7f72d770472dc35eb8e9389c8f671bd7c0 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 9 Apr 2021 23:20:18 +0400 Subject: [PATCH 01/32] Enhanced the LibraryTest.filterCheck unit-test Now the LibraryTest.filterCheck unit-test validates the actual entries returned by `Library::filter` (previously only the count of the results was checked). --- test/library.cpp | 74 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index c19514163..9dcaf98fe 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -190,6 +190,9 @@ namespace class LibraryTest : public ::testing::Test { protected: + typedef kiwix::Library::BookIdCollection BookIdCollection; + typedef std::vector TitleCollection; + void SetUp() override { kiwix::Manager manager(&lib); manager.readOpds(sampleOpdsStream, "foo.urlHost"); @@ -201,6 +204,15 @@ class LibraryTest : public ::testing::Test { return bookmark; }; + TitleCollection ids2Titles(const BookIdCollection& ids) { + TitleCollection titles; + for ( const auto& bookId : ids ) { + titles.push_back(lib.getBookById(bookId).getTitle()); + } + std::sort(titles.begin(), titles.end()); + return titles; + } + kiwix::Library lib; }; @@ -246,28 +258,74 @@ TEST_F(LibraryTest, filterCheck) EXPECT_EQ(bookIds, lib.getBooksIds()); bookIds = lib.filter(kiwix::Filter().lang("eng")); - EXPECT_EQ(bookIds.size(), 5U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Granblue Fantasy Wiki", + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange", + "TED talks - Business" + }) + ); bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexchange"})); - EXPECT_EQ(bookIds.size(), 3U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" + }) + ); bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"})); - EXPECT_EQ(bookIds.size(), 3U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Encyclopédie de la Tunisie", + "Géographie par Wikipédia", + "Mathématiques" + }) + ); bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia", "nopic"})); - EXPECT_EQ(bookIds.size(), 2U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Géographie par Wikipédia", + "Mathématiques" + }) + ); bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"}).rejectTags({"nopic"})); - EXPECT_EQ(bookIds.size(), 1U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Encyclopédie de la Tunisie" + }) + ); bookIds = lib.filter(kiwix::Filter().query("folklore")); - EXPECT_EQ(bookIds.size(), 1U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Mythology & Folklore Stack Exchange" + }) + ); + bookIds = lib.filter(kiwix::Filter().query("Wiki")); - EXPECT_EQ(bookIds.size(), 4U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Encyclopédie de la Tunisie", + "Granblue Fantasy Wiki", + "Géographie par Wikipédia", + "Wikiquote" + }) + ); + bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki")); - EXPECT_EQ(bookIds.size(), 1U); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Granblue Fantasy Wiki" + }) + ); } From 8c18a379618986878712a20beb670570e54776f0 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 10 Apr 2021 23:54:41 +0400 Subject: [PATCH 02/32] Split LibraryTest.filterCheck into several tests --- test/library.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index 9dcaf98fe..d9d9e6f9f 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -252,12 +252,15 @@ TEST_F(LibraryTest, categoryHandling) EXPECT_EQ("category_element_overrides_tags", lib.getBookById("14829621-c490-c376-0792-9de558b57efa").getCategory()); } -TEST_F(LibraryTest, filterCheck) +TEST_F(LibraryTest, emptyFilter) { - auto bookIds = lib.filter(kiwix::Filter()); + const auto bookIds = lib.filter(kiwix::Filter()); EXPECT_EQ(bookIds, lib.getBooksIds()); +} - bookIds = lib.filter(kiwix::Filter().lang("eng")); +TEST_F(LibraryTest, filterByLanguage) +{ + const auto bookIds = lib.filter(kiwix::Filter().lang("eng")); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ "Granblue Fantasy Wiki", @@ -267,8 +270,11 @@ TEST_F(LibraryTest, filterCheck) "TED talks - Business" }) ); +} - bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexchange"})); +TEST_F(LibraryTest, filterByTags) +{ + auto bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexchange"})); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ "Islam Stack Exchange", @@ -300,8 +306,12 @@ TEST_F(LibraryTest, filterCheck) "Encyclopédie de la Tunisie" }) ); +} - bookIds = lib.filter(kiwix::Filter().query("folklore")); + +TEST_F(LibraryTest, filterByQuery) +{ + auto bookIds = lib.filter(kiwix::Filter().query("folklore")); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ "Mythology & Folklore Stack Exchange" @@ -318,9 +328,12 @@ TEST_F(LibraryTest, filterCheck) "Wikiquote" }) ); +} - bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki")); +TEST_F(LibraryTest, filterByMultipleCriteria) +{ + auto bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki")); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ "Granblue Fantasy Wiki" From 8c810d2d2fb4b56f38c14c685cbeb7051811961c Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 10 Apr 2021 23:56:43 +0400 Subject: [PATCH 03/32] Enhanced the LibraryTest.filterByQuery unit-test --- test/library.cpp | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/test/library.cpp b/test/library.cpp index d9d9e6f9f..e6f72e48a 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -311,14 +311,45 @@ TEST_F(LibraryTest, filterByTags) TEST_F(LibraryTest, filterByQuery) { - auto bookIds = lib.filter(kiwix::Filter().query("folklore")); + // filtering by query checks the title + auto bookIds = lib.filter(kiwix::Filter().query("Exchange")); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ + "Islam Stack Exchange", + "Movies & TV Stack Exchange", "Mythology & Folklore Stack Exchange" }) ); + // filtering by query checks the description/summary + bookIds = lib.filter(kiwix::Filter().query("enthusiasts")); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" + }) + ); + // filtering by query is case insensitive on titles + bookIds = lib.filter(kiwix::Filter().query("ExcHANge")); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" + }) + ); + + // filtering by query is case insensitive on description/summary + bookIds = lib.filter(kiwix::Filter().query("enTHUSiaSTS")); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" + }) + ); + + // by default, filtering by query assumes partial query bookIds = lib.filter(kiwix::Filter().query("Wiki")); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ @@ -328,6 +359,14 @@ TEST_F(LibraryTest, filterByQuery) "Wikiquote" }) ); + + // partial query can be disabled + bookIds = lib.filter(kiwix::Filter().query("Wiki", false)); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Granblue Fantasy Wiki" + }) + ); } From 068f7e5e956463ba5994cd56c324f4cbeaf88c21 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 00:21:28 +0400 Subject: [PATCH 04/32] New unit-test LibraryTest.filterByCreator --- test/library.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index e6f72e48a..f7b9b385c 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -370,6 +370,37 @@ TEST_F(LibraryTest, filterByQuery) } +TEST_F(LibraryTest, filterByCreator) +{ + auto bookIds = lib.filter(kiwix::Filter().creator("Wikipedia")); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({ + "Encyclopédie de la Tunisie", + "Géographie par Wikipédia", + "Mathématiques" + }) + ); + + // filtering by creator requires full match of the search term + EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("Wiki"))), + TitleCollection({"Granblue Fantasy Wiki"}) + ); + + // filtering by creator is case sensitive + EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("wiki"))), + TitleCollection({}) + ); + + // filtering by creator requires full match of the full author/creator name + EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("Stack"))), + TitleCollection({}) + ); + EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("Movies & TV Stack Exchange"))), + TitleCollection({"Movies & TV Stack Exchange"}) + ); +} + + TEST_F(LibraryTest, filterByMultipleCriteria) { auto bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki")); From 0f277ffa34981814f4f8dab0cfafea0bf8f2ffdc Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 00:54:48 +0400 Subject: [PATCH 05/32] Enhanced the LibraryTest.filterByTags unit-test --- test/library.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index f7b9b385c..55be4931c 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -283,6 +283,27 @@ TEST_F(LibraryTest, filterByTags) }) ); + // filtering by tags is case sensitive + bookIds = lib.filter(kiwix::Filter().acceptTags({"stackEXChange"})); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({}) + ); + + // filtering by tags requires full match of the search term + bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexch"})); + EXPECT_EQ(ids2Titles(bookIds), + TitleCollection({}) + ); + + // in tags with values (tag:value form) the value is an inseparable + // part of the tag + EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().acceptTags({"_category"}))), + TitleCollection({}) + ); + EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().acceptTags({"_category:category_defined_via_tags_only"}))), + TitleCollection({"Tania Louis"}) + ); + bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"})); EXPECT_EQ(ids2Titles(bookIds), TitleCollection({ From 22b8625033397fb9c57743e837bf395ebd61a6bd Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 13:52:19 +0400 Subject: [PATCH 06/32] Enter EXPECT_FILTER_RESULTS() This diff is easier to view if whitespace change is ignored. --- test/library.cpp | 171 +++++++++++++++++++---------------------------- 1 file changed, 68 insertions(+), 103 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index 55be4931c..ad0bca605 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -258,74 +258,63 @@ TEST_F(LibraryTest, emptyFilter) EXPECT_EQ(bookIds, lib.getBooksIds()); } +#define EXPECT_FILTER_RESULTS(f, ...) \ + EXPECT_EQ( \ + ids2Titles(lib.filter(f)), \ + TitleCollection({ __VA_ARGS__ }) \ + ) + TEST_F(LibraryTest, filterByLanguage) { - const auto bookIds = lib.filter(kiwix::Filter().lang("eng")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Granblue Fantasy Wiki", - "Islam Stack Exchange", - "Movies & TV Stack Exchange", - "Mythology & Folklore Stack Exchange", - "TED talks - Business" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().lang("eng"), + "Granblue Fantasy Wiki", + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange", + "TED talks - Business" ); } TEST_F(LibraryTest, filterByTags) { - auto bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexchange"})); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Islam Stack Exchange", - "Movies & TV Stack Exchange", - "Mythology & Folklore Stack Exchange" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"stackexchange"}), + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); // filtering by tags is case sensitive - bookIds = lib.filter(kiwix::Filter().acceptTags({"stackEXChange"})); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({}) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"stackEXChange"}), + /* no results */ ); // filtering by tags requires full match of the search term - bookIds = lib.filter(kiwix::Filter().acceptTags({"stackexch"})); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({}) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"stackexch"}), + /* no results */ ); // in tags with values (tag:value form) the value is an inseparable // part of the tag - EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().acceptTags({"_category"}))), - TitleCollection({}) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"_category"}), + /* no results */ ); - EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().acceptTags({"_category:category_defined_via_tags_only"}))), - TitleCollection({"Tania Louis"}) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"_category:category_defined_via_tags_only"}), + "Tania Louis" ); - bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"})); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Encyclopédie de la Tunisie", - "Géographie par Wikipédia", - "Mathématiques" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia"}), + "Encyclopédie de la Tunisie", + "Géographie par Wikipédia", + "Mathématiques" ); - bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia", "nopic"})); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Géographie par Wikipédia", - "Mathématiques" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia", "nopic"}), + "Géographie par Wikipédia", + "Mathématiques" ); - bookIds = lib.filter(kiwix::Filter().acceptTags({"wikipedia"}).rejectTags({"nopic"})); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Encyclopédie de la Tunisie" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia"}).rejectTags({"nopic"}), + "Encyclopédie de la Tunisie" ); } @@ -333,102 +322,78 @@ TEST_F(LibraryTest, filterByTags) TEST_F(LibraryTest, filterByQuery) { // filtering by query checks the title - auto bookIds = lib.filter(kiwix::Filter().query("Exchange")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Islam Stack Exchange", - "Movies & TV Stack Exchange", - "Mythology & Folklore Stack Exchange" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Exchange"), + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); // filtering by query checks the description/summary - bookIds = lib.filter(kiwix::Filter().query("enthusiasts")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Movies & TV Stack Exchange", - "Mythology & Folklore Stack Exchange" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("enthusiasts"), + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); // filtering by query is case insensitive on titles - bookIds = lib.filter(kiwix::Filter().query("ExcHANge")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Islam Stack Exchange", - "Movies & TV Stack Exchange", - "Mythology & Folklore Stack Exchange" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("ExcHANge"), + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); // filtering by query is case insensitive on description/summary - bookIds = lib.filter(kiwix::Filter().query("enTHUSiaSTS")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Movies & TV Stack Exchange", - "Mythology & Folklore Stack Exchange" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("enTHUSiaSTS"), + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); // by default, filtering by query assumes partial query - bookIds = lib.filter(kiwix::Filter().query("Wiki")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Encyclopédie de la Tunisie", - "Granblue Fantasy Wiki", - "Géographie par Wikipédia", - "Wikiquote" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki"), + "Encyclopédie de la Tunisie", + "Granblue Fantasy Wiki", + "Géographie par Wikipédia", + "Wikiquote" ); // partial query can be disabled - bookIds = lib.filter(kiwix::Filter().query("Wiki", false)); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Granblue Fantasy Wiki" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki", false), + "Granblue Fantasy Wiki" ); } TEST_F(LibraryTest, filterByCreator) { - auto bookIds = lib.filter(kiwix::Filter().creator("Wikipedia")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Encyclopédie de la Tunisie", - "Géographie par Wikipédia", - "Mathématiques" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Wikipedia"), + "Encyclopédie de la Tunisie", + "Géographie par Wikipédia", + "Mathématiques" ); // filtering by creator requires full match of the search term - EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("Wiki"))), - TitleCollection({"Granblue Fantasy Wiki"}) + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Wiki"), + "Granblue Fantasy Wiki" ); // filtering by creator is case sensitive - EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("wiki"))), - TitleCollection({}) + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("wiki"), + /* no results */ ); // filtering by creator requires full match of the full author/creator name - EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("Stack"))), - TitleCollection({}) + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Stack"), + /* no results */ ); - EXPECT_EQ(ids2Titles(lib.filter(kiwix::Filter().creator("Movies & TV Stack Exchange"))), - TitleCollection({"Movies & TV Stack Exchange"}) + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies & TV Stack Exchange"), + "Movies & TV Stack Exchange" ); } TEST_F(LibraryTest, filterByMultipleCriteria) { - auto bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki")); - EXPECT_EQ(ids2Titles(bookIds), - TitleCollection({ - "Granblue Fantasy Wiki" - }) + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wiki"), + "Granblue Fantasy Wiki" ); } From d8fe593f59e3c38087bca172eec4030a5260b285 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 15:32:16 +0400 Subject: [PATCH 07/32] Extended the unit-test library with 2 XML entries --- test/library.cpp | 57 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index ad0bca605..1040f25f9 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -181,6 +181,43 @@ const char * sampleOpdsStream = R"( )"; +const char sampleLibraryXML[] = R"( + + + + +)"; + #include "../include/library.h" #include "../include/manager.h" #include "../include/bookmark.h" @@ -196,6 +233,7 @@ class LibraryTest : public ::testing::Test { void SetUp() override { kiwix::Manager manager(&lib); manager.readOpds(sampleOpdsStream, "foo.urlHost"); + manager.readXml(sampleLibraryXML, true, "/data/library.xml", true); } kiwix::Bookmark createBookmark(const std::string &id) { @@ -237,10 +275,10 @@ TEST_F(LibraryTest, getBookMarksTest) TEST_F(LibraryTest, sanityCheck) { - EXPECT_EQ(lib.getBookCount(true, true), 10U); - EXPECT_EQ(lib.getBooksLanguages().size(), 2U); - EXPECT_EQ(lib.getBooksCreators().size(), 8U); - EXPECT_EQ(lib.getBooksPublishers().size(), 1U); + EXPECT_EQ(lib.getBookCount(true, true), 12U); + EXPECT_EQ(lib.getBooksLanguages().size(), 3U); + EXPECT_EQ(lib.getBooksCreators().size(), 9U); + EXPECT_EQ(lib.getBooksPublishers().size(), 2U); } TEST_F(LibraryTest, categoryHandling) @@ -271,6 +309,7 @@ TEST_F(LibraryTest, filterByLanguage) "Islam Stack Exchange", "Movies & TV Stack Exchange", "Mythology & Folklore Stack Exchange", + "Ray Charles", "TED talks - Business" ); } @@ -305,7 +344,8 @@ TEST_F(LibraryTest, filterByTags) EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia"}), "Encyclopédie de la Tunisie", "Géographie par Wikipédia", - "Mathématiques" + "Mathématiques", + "Ray Charles" ); EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia", "nopic"}), @@ -314,7 +354,8 @@ TEST_F(LibraryTest, filterByTags) ); EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"wikipedia"}).rejectTags({"nopic"}), - "Encyclopédie de la Tunisie" + "Encyclopédie de la Tunisie", + "Ray Charles" ); } @@ -352,6 +393,7 @@ TEST_F(LibraryTest, filterByQuery) "Encyclopédie de la Tunisie", "Granblue Fantasy Wiki", "Géographie par Wikipédia", + "Ray Charles", "Wikiquote" ); @@ -367,7 +409,8 @@ TEST_F(LibraryTest, filterByCreator) EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Wikipedia"), "Encyclopédie de la Tunisie", "Géographie par Wikipédia", - "Mathématiques" + "Mathématiques", + "Ray Charles" ); // filtering by creator requires full match of the search term From f063d350c692fa35ddbb88834851fe2e5ef77227 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 15:40:14 +0400 Subject: [PATCH 08/32] LibraryTest.{filterLocal,filterRemote} --- test/library.cpp | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/test/library.cpp b/test/library.cpp index 1040f25f9..c6b04d3a7 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -202,7 +202,6 @@ const char sampleLibraryXML[] = R"( Date: Sun, 11 Apr 2021 15:45:21 +0400 Subject: [PATCH 09/32] LibraryTest.filterByPublisher --- test/library.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index c6b04d3a7..296c2c9b9 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -473,6 +473,13 @@ TEST_F(LibraryTest, filterByCreator) ); } +TEST_F(LibraryTest, filterByPublisher) +{ + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix"), + "An example ZIM archive", + "Ray Charles" + ); +} TEST_F(LibraryTest, filterByMultipleCriteria) { From cdd272fc5a985a4b09ded07ba6570d70cc183fa8 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 15:45:56 +0400 Subject: [PATCH 10/32] LibraryTest.filterByName --- test/library.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index 296c2c9b9..556131e90 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -481,6 +481,13 @@ TEST_F(LibraryTest, filterByPublisher) ); } +TEST_F(LibraryTest, filterByName) +{ + EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikibooks_de"), + "An example ZIM archive" + ); +} + TEST_F(LibraryTest, filterByMultipleCriteria) { EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wiki"), From 95c354a5fa9098fac7117998df59b3cabe35a8b9 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 23:45:40 +0400 Subject: [PATCH 11/32] LibraryTest.filterByCategory --- test/library.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index 556131e90..a84f51448 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -488,6 +488,14 @@ TEST_F(LibraryTest, filterByName) ); } +TEST_F(LibraryTest, filterByCategory) +{ + EXPECT_FILTER_RESULTS(kiwix::Filter().category("category_element_overrides_tags"), + "Géographie par Wikipédia", + "Mathématiques" + ); +} + TEST_F(LibraryTest, filterByMultipleCriteria) { EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wiki"), From b9be7420857e4a6c985c155551dd86de89d2313f Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 16:01:09 +0400 Subject: [PATCH 12/32] LibraryTest.filterByMaxSize --- test/library.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/library.cpp b/test/library.cpp index a84f51448..e7c31151c 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -496,6 +496,13 @@ TEST_F(LibraryTest, filterByCategory) ); } +TEST_F(LibraryTest, filterByMaxSize) +{ + EXPECT_FILTER_RESULTS(kiwix::Filter().maxSize(200000), + "An example ZIM archive" + ); +} + TEST_F(LibraryTest, filterByMultipleCriteria) { EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wiki"), From 2f3f1a485917da999a7ef5a3ee23881b7c241175 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 19:22:31 +0400 Subject: [PATCH 13/32] Improved LibraryTest.filterByMultipleCriteria --- test/library.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index e7c31151c..0bbbe1816 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -505,10 +505,20 @@ TEST_F(LibraryTest, filterByMaxSize) TEST_F(LibraryTest, filterByMultipleCriteria) { - EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wiki"), - "Granblue Fantasy Wiki" + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia"), + "Encyclopédie de la Tunisie", + "Géographie par Wikipédia", + "Ray Charles" ); + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia").maxSize(100000000UL), + "Encyclopédie de la Tunisie", + "Ray Charles" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki").creator("Wikipedia").maxSize(100000000UL).local(false), + "Encyclopédie de la Tunisie" + ); } TEST_F(LibraryTest, getBookByPath) From 29a6a34ecf161dd87161beffcef8cc4713b61a44 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 19:27:26 +0400 Subject: [PATCH 14/32] Delimited kiwix::Filter's public and private APIs --- include/library.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/library.h b/include/library.h index 7c806ed6d..fd5816882 100644 --- a/include/library.h +++ b/include/library.h @@ -35,6 +35,7 @@ namespace kiwix { class OPDSDumper; +class Library; enum supportedListSortBy { UNSORTED, TITLE, SIZE, DATE, CREATOR, PUBLISHER }; enum supportedListMode { @@ -110,6 +111,9 @@ class Filter { const std::string& getQuery() const { return _query; } bool queryIsPartial() const { return _queryIsPartial; } +private: + friend class Library; + bool accept(const Book& book) const; bool acceptByQueryOnly(const Book& book) const; bool acceptByNonQueryCriteria(const Book& book) const; From 2d76f8395e688084140c49f17a4a72beef4b3825 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 19:54:14 +0400 Subject: [PATCH 15/32] Dropped unused functions from Filter's private API This should have been done back in PR #460 --- include/library.h | 2 -- src/library.cpp | 16 ---------------- 2 files changed, 18 deletions(-) diff --git a/include/library.h b/include/library.h index fd5816882..f3b494363 100644 --- a/include/library.h +++ b/include/library.h @@ -114,8 +114,6 @@ class Filter { private: friend class Library; - bool accept(const Book& book) const; - bool acceptByQueryOnly(const Book& book) const; bool acceptByNonQueryCriteria(const Book& book) const; }; diff --git a/src/library.cpp b/src/library.cpp index 6a2bc5db8..c12a0a526 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -596,11 +596,6 @@ bool Filter::hasQuery() const return ACTIVE(QUERY); } -bool Filter::accept(const Book& book) const -{ - return acceptByNonQueryCriteria(book) && acceptByQueryOnly(book); -} - bool Filter::acceptByNonQueryCriteria(const Book& book) const { auto local = !book.getPath().empty(); @@ -647,15 +642,4 @@ bool Filter::acceptByNonQueryCriteria(const Book& book) const return true; } -bool Filter::acceptByQueryOnly(const Book& book) const -{ - if ( ACTIVE(QUERY) - && !(matchRegex(book.getTitle(), "\\Q" + _query + "\\E") - || matchRegex(book.getDescription(), "\\Q" + _query + "\\E"))) - return false; - - return true; - -} - } From 80cd1fc9891672b082504f19ebd8667d01506cff Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 19:54:39 +0400 Subject: [PATCH 16/32] Renamed 2 functions in Filter and Library --- include/library.h | 4 ++-- src/library.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/library.h b/include/library.h index f3b494363..3e8c9fb2a 100644 --- a/include/library.h +++ b/include/library.h @@ -114,7 +114,7 @@ class Filter { private: friend class Library; - bool acceptByNonQueryCriteria(const Book& book) const; + bool accept(const Book& book) const; }; @@ -309,7 +309,7 @@ class Library friend class libXMLDumper; private: // functions - BookIdCollection getBooksByTitleOrDescription(const Filter& filter); + BookIdCollection filterViaBookDB(const Filter& filter); void updateBookDB(const Book& book); }; diff --git a/src/library.cpp b/src/library.cpp index c12a0a526..34f7dc9c6 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -295,7 +295,7 @@ void Library::updateBookDB(const Book& book) m_bookDB->replace_document(idterm, doc); } -Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& filter) +Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) { if ( !filter.hasQuery() ) return getBooksIds(); @@ -331,8 +331,8 @@ Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& fi Library::BookIdCollection Library::filter(const Filter& filter) { BookIdCollection result; - for(auto id : getBooksByTitleOrDescription(filter)) { - if(filter.acceptByNonQueryCriteria(m_books.at(id))) { + for(auto id : filterViaBookDB(filter)) { + if(filter.accept(m_books.at(id))) { result.push_back(id); } } @@ -596,7 +596,7 @@ bool Filter::hasQuery() const return ACTIVE(QUERY); } -bool Filter::acceptByNonQueryCriteria(const Book& book) const +bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); FILTER(_LOCAL, local) From ea779ac2001456826dc7a2a5bf8f1abeb0324cb2 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 19:51:32 +0400 Subject: [PATCH 17/32] Extracted buildXapianQuery() --- src/library.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 34f7dc9c6..3dd2b162e 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -295,12 +295,11 @@ void Library::updateBookDB(const Book& book) m_bookDB->replace_document(idterm, doc); } -Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) +namespace { - if ( !filter.hasQuery() ) - return getBooksIds(); - BookIdCollection bookIds; +Xapian::Query buildXapianQuery(const Filter& filter) +{ Xapian::QueryParser queryParser; queryParser.set_default_op(Xapian::Query::OP_AND); queryParser.add_prefix("title", "S"); @@ -317,7 +316,19 @@ Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) | Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD | partialQueryFlag; - const auto query = queryParser.parse_query(filter.getQuery(), flags); + return queryParser.parse_query(filter.getQuery(), flags); +} + +} // unnamed namespace + +Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) +{ + if ( !filter.hasQuery() ) + return getBooksIds(); + + BookIdCollection bookIds; + + const auto query = buildXapianQuery(filter); Xapian::Enquire enquire(*m_bookDB); enquire.set_query(query); const auto results = enquire.get_mset(0, m_books.size()); From 8287f351e74925a3a6d7a70de2a4f72c53c82f9b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 20:17:37 +0400 Subject: [PATCH 18/32] Final logic of Library::filterViaBookDB() Moved the `filter.hasQuery()` check inside `buildXapianQuery()`. `Library::filterViaBookDB()` only cares if the query that is going to be run on the book DB would match all documents. The rest of changes related to enhancing the usage of Xapian for the catalog search will happen inside `buildXapianQuery()` and `updateBookDB()`. --- src/library.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 3dd2b162e..5b56ce5e0 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -298,8 +298,19 @@ void Library::updateBookDB(const Book& book) namespace { +bool willSelectEverything(const Xapian::Query& query) +{ + return query.get_type() == Xapian::Query::LEAF_MATCH_ALL; +} + Xapian::Query buildXapianQuery(const Filter& filter) { + if ( !filter.hasQuery() ) { + // This is a thread-safe way to construct an equivalent of + // a Xapian::Query::MatchAll query + return Xapian::Query(std::string()); + } + Xapian::QueryParser queryParser; queryParser.set_default_op(Xapian::Query::OP_AND); queryParser.add_prefix("title", "S"); @@ -323,12 +334,13 @@ Xapian::Query buildXapianQuery(const Filter& filter) Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) { - if ( !filter.hasQuery() ) + const auto query = buildXapianQuery(filter); + + if ( willSelectEverything(query) ) return getBooksIds(); BookIdCollection bookIds; - const auto query = buildXapianQuery(filter); Xapian::Enquire enquire(*m_bookDB); enquire.set_query(query); const auto results = enquire.get_mset(0, m_books.size()); From 415c65cf03063bc83f801ae350fb8e0b8cb11019 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 23:35:21 +0400 Subject: [PATCH 19/32] Catalog filtering by book name works via Xapian --- include/library.h | 3 +++ src/library.cpp | 27 +++++++++++++++++++++++++-- test/library.cpp | 8 ++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/include/library.h b/include/library.h index 3e8c9fb2a..b2d090e8e 100644 --- a/include/library.h +++ b/include/library.h @@ -111,6 +111,9 @@ class Filter { const std::string& getQuery() const { return _query; } bool queryIsPartial() const { return _queryIsPartial; } + bool hasName() const; + const std::string& getName() const { return _name; } + private: friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 5b56ce5e0..9fe688ac1 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -278,12 +278,15 @@ void Library::updateBookDB(const Book& book) const std::string title = normalizeText(book.getTitle(), lang); const std::string desc = normalizeText(book.getDescription(), lang); + const std::string name = book.getName(); // this is supposed to be normalized doc.add_value(0, title); doc.add_value(1, desc); + doc.add_value(2, name); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); indexer.index_text(desc, 1, "XD"); + indexer.index_text(name, 1, "XN"); // Index fields without prefixes for general search indexer.index_text(title); @@ -303,7 +306,8 @@ bool willSelectEverything(const Xapian::Query& query) return query.get_type() == Xapian::Query::LEAF_MATCH_ALL; } -Xapian::Query buildXapianQuery(const Filter& filter) + +Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) { if ( !filter.hasQuery() ) { // This is a thread-safe way to construct an equivalent of @@ -315,6 +319,7 @@ Xapian::Query buildXapianQuery(const Filter& filter) queryParser.set_default_op(Xapian::Query::OP_AND); queryParser.add_prefix("title", "S"); queryParser.add_prefix("description", "XD"); + queryParser.add_prefix("name", "XN"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -330,6 +335,20 @@ Xapian::Query buildXapianQuery(const Filter& filter) return queryParser.parse_query(filter.getQuery(), flags); } +Xapian::Query nameQuery(const std::string& name) +{ + return Xapian::Query("XN" + name); +} + +Xapian::Query buildXapianQuery(const Filter& filter) +{ + auto q = buildXapianQueryFromFilterQuery(filter); + if ( filter.hasName() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, nameQuery(filter.getName())); + } + return q; +} + } // unnamed namespace Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) @@ -619,6 +638,11 @@ bool Filter::hasQuery() const return ACTIVE(QUERY); } +bool Filter::hasName() const +{ + return ACTIVE(NAME); +} + bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); @@ -638,7 +662,6 @@ bool Filter::accept(const Book& book) const FILTER(LANG, book.getLanguage() == _lang) FILTER(_PUBLISHER, book.getPublisher() == _publisher) FILTER(_CREATOR, book.getCreator() == _creator) - FILTER(NAME, book.getName() == _name) if (ACTIVE(ACCEPTTAGS)) { if (!_acceptTags.empty()) { diff --git a/test/library.cpp b/test/library.cpp index 0bbbe1816..c2c09e0b0 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -486,6 +486,14 @@ TEST_F(LibraryTest, filterByName) EXPECT_FILTER_RESULTS(kiwix::Filter().name("wikibooks_de"), "An example ZIM archive" ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("name:wikibooks_de"), + "An example ZIM archive" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("wikibooks_de"), + /* no results */ + ); } TEST_F(LibraryTest, filterByCategory) From 0c0a37073bce2eaa37688041a50973f4558b5cd9 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 11 Apr 2021 23:51:49 +0400 Subject: [PATCH 20/32] Catalog filtering by category works via Xapian --- include/library.h | 3 +++ src/library.cpp | 18 +++++++++++++++++- test/library.cpp | 9 +++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/library.h b/include/library.h index b2d090e8e..f49e7dfd1 100644 --- a/include/library.h +++ b/include/library.h @@ -114,6 +114,9 @@ class Filter { bool hasName() const; const std::string& getName() const { return _name; } + bool hasCategory() const; + const std::string& getCategory() const { return _category; } + private: friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 9fe688ac1..89a2eb31e 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -279,14 +279,17 @@ void Library::updateBookDB(const Book& book) const std::string title = normalizeText(book.getTitle(), lang); const std::string desc = normalizeText(book.getDescription(), lang); const std::string name = book.getName(); // this is supposed to be normalized + const std::string category = book.getCategory(); // this is supposed to be normalized doc.add_value(0, title); doc.add_value(1, desc); doc.add_value(2, name); + doc.add_value(3, category); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); indexer.index_text(desc, 1, "XD"); indexer.index_text(name, 1, "XN"); + indexer.index_text(category, 1, "XC"); // Index fields without prefixes for general search indexer.index_text(title); @@ -320,6 +323,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("title", "S"); queryParser.add_prefix("description", "XD"); queryParser.add_prefix("name", "XN"); + queryParser.add_prefix("category", "XC"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -340,12 +344,20 @@ Xapian::Query nameQuery(const std::string& name) return Xapian::Query("XN" + name); } +Xapian::Query categoryQuery(const std::string& category) +{ + return Xapian::Query("XC" + category); +} + Xapian::Query buildXapianQuery(const Filter& filter) { auto q = buildXapianQueryFromFilterQuery(filter); if ( filter.hasName() ) { q = Xapian::Query(Xapian::Query::OP_AND, q, nameQuery(filter.getName())); } + if ( filter.hasCategory() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, categoryQuery(filter.getCategory())); + } return q; } @@ -643,6 +655,11 @@ bool Filter::hasName() const return ACTIVE(NAME); } +bool Filter::hasCategory() const +{ + return ACTIVE(CATEGORY); +} + bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); @@ -658,7 +675,6 @@ bool Filter::accept(const Book& book) const FILTER(_NOREMOTE, !remote) FILTER(MAXSIZE, book.getSize() <= _maxSize) - FILTER(CATEGORY, book.getCategory() == _category) FILTER(LANG, book.getLanguage() == _lang) FILTER(_PUBLISHER, book.getPublisher() == _publisher) FILTER(_CREATOR, book.getCreator() == _creator) diff --git a/test/library.cpp b/test/library.cpp index c2c09e0b0..3dc84a3c7 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -502,6 +502,15 @@ TEST_F(LibraryTest, filterByCategory) "Géographie par Wikipédia", "Mathématiques" ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("category:category_element_overrides_tags"), + "Géographie par Wikipédia", + "Mathématiques" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("category_element_overrides_tags"), + /* no results */ + ); } TEST_F(LibraryTest, filterByMaxSize) From 7ccd9ffcce93cdaafeca723e6c6dc36e65c3e025 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 12 Apr 2021 12:29:06 +0400 Subject: [PATCH 21/32] Catalog filtering by language works via Xapian --- include/library.h | 3 +++ src/library.cpp | 17 ++++++++++++++++- test/library.cpp | 13 +++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/include/library.h b/include/library.h index f49e7dfd1..6a2c79d23 100644 --- a/include/library.h +++ b/include/library.h @@ -117,6 +117,9 @@ class Filter { bool hasCategory() const; const std::string& getCategory() const { return _category; } + bool hasLang() const; + const std::string& getLang() const { return _lang; } + private: friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 89a2eb31e..230206674 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -284,12 +284,14 @@ void Library::updateBookDB(const Book& book) doc.add_value(1, desc); doc.add_value(2, name); doc.add_value(3, category); + doc.add_value(4, lang); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); indexer.index_text(desc, 1, "XD"); indexer.index_text(name, 1, "XN"); indexer.index_text(category, 1, "XC"); + indexer.index_text(lang, 1, "L"); // Index fields without prefixes for general search indexer.index_text(title); @@ -324,6 +326,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("description", "XD"); queryParser.add_prefix("name", "XN"); queryParser.add_prefix("category", "XC"); + queryParser.add_prefix("lang", "L"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -349,6 +352,11 @@ Xapian::Query categoryQuery(const std::string& category) return Xapian::Query("XC" + category); } +Xapian::Query langQuery(const std::string& lang) +{ + return Xapian::Query("L" + lang); +} + Xapian::Query buildXapianQuery(const Filter& filter) { auto q = buildXapianQueryFromFilterQuery(filter); @@ -358,6 +366,9 @@ Xapian::Query buildXapianQuery(const Filter& filter) if ( filter.hasCategory() ) { q = Xapian::Query(Xapian::Query::OP_AND, q, categoryQuery(filter.getCategory())); } + if ( filter.hasLang() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang())); + } return q; } @@ -660,6 +671,11 @@ bool Filter::hasCategory() const return ACTIVE(CATEGORY); } +bool Filter::hasLang() const +{ + return ACTIVE(LANG); +} + bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); @@ -675,7 +691,6 @@ bool Filter::accept(const Book& book) const FILTER(_NOREMOTE, !remote) FILTER(MAXSIZE, book.getSize() <= _maxSize) - FILTER(LANG, book.getLanguage() == _lang) FILTER(_PUBLISHER, book.getPublisher() == _publisher) FILTER(_CREATOR, book.getCreator() == _creator) diff --git a/test/library.cpp b/test/library.cpp index 3dc84a3c7..02314699e 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -353,6 +353,19 @@ TEST_F(LibraryTest, filterByLanguage) "Ray Charles", "TED talks - Business" ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("lang:eng"), + "Granblue Fantasy Wiki", + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange", + "Ray Charles", + "TED talks - Business" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("eng"), + /* no results */ + ); } TEST_F(LibraryTest, filterByTags) From a759ab989fed1b65603ab515a40abce429e9dac2 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 12 Apr 2021 13:05:44 +0400 Subject: [PATCH 22/32] Catalog filtering by publisher works via Xapian --- include/library.h | 3 +++ src/library.cpp | 27 +++++++++++++++++++++++---- test/library.cpp | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 6 deletions(-) diff --git a/include/library.h b/include/library.h index 6a2c79d23..66e8b7d2c 100644 --- a/include/library.h +++ b/include/library.h @@ -120,6 +120,9 @@ class Filter { bool hasLang() const; const std::string& getLang() const { return _lang; } + bool hasPublisher() const; + const std::string& getPublisher() const { return _publisher; } + private: friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 230206674..cff560fe2 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -43,7 +43,7 @@ std::string iso639_3ToXapian(const std::string& lang) { return icu::Locale(lang.c_str()).getLanguage(); }; -std::string normalizeText(const std::string& text, const std::string& language) +std::string normalizeText(const std::string& text) { return removeAccents(text); } @@ -276,15 +276,17 @@ void Library::updateBookDB(const Book& book) Xapian::Document doc; indexer.set_document(doc); - const std::string title = normalizeText(book.getTitle(), lang); - const std::string desc = normalizeText(book.getDescription(), lang); + const std::string title = normalizeText(book.getTitle()); + const std::string desc = normalizeText(book.getDescription()); const std::string name = book.getName(); // this is supposed to be normalized const std::string category = book.getCategory(); // this is supposed to be normalized + const std::string publisher = normalizeText(book.getPublisher()); doc.add_value(0, title); doc.add_value(1, desc); doc.add_value(2, name); doc.add_value(3, category); doc.add_value(4, lang); + doc.add_value(5, publisher); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); @@ -292,6 +294,7 @@ void Library::updateBookDB(const Book& book) indexer.index_text(name, 1, "XN"); indexer.index_text(category, 1, "XC"); indexer.index_text(lang, 1, "L"); + indexer.index_text(publisher, 1, "XP"); // Index fields without prefixes for general search indexer.index_text(title); @@ -327,6 +330,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("name", "XN"); queryParser.add_prefix("category", "XC"); queryParser.add_prefix("lang", "L"); + queryParser.add_prefix("publisher", "XP"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -357,6 +361,14 @@ Xapian::Query langQuery(const std::string& lang) return Xapian::Query("L" + lang); } +Xapian::Query publisherQuery(const std::string& publisher) +{ + Xapian::QueryParser queryParser; + queryParser.set_default_op(Xapian::Query::OP_PHRASE); + const auto flags = 0; + return queryParser.parse_query(normalizeText(publisher), flags, "XP"); +} + Xapian::Query buildXapianQuery(const Filter& filter) { auto q = buildXapianQueryFromFilterQuery(filter); @@ -369,6 +381,9 @@ Xapian::Query buildXapianQuery(const Filter& filter) if ( filter.hasLang() ) { q = Xapian::Query(Xapian::Query::OP_AND, q, langQuery(filter.getLang())); } + if ( filter.hasPublisher() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher())); + } return q; } @@ -676,6 +691,11 @@ bool Filter::hasLang() const return ACTIVE(LANG); } +bool Filter::hasPublisher() const +{ + return ACTIVE(_PUBLISHER); +} + bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); @@ -691,7 +711,6 @@ bool Filter::accept(const Book& book) const FILTER(_NOREMOTE, !remote) FILTER(MAXSIZE, book.getSize() <= _maxSize) - FILTER(_PUBLISHER, book.getPublisher() == _publisher) FILTER(_CREATOR, book.getCreator() == _creator) if (ACTIVE(ACCEPTTAGS)) { diff --git a/test/library.cpp b/test/library.cpp index 02314699e..d46891335 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -206,7 +206,7 @@ const char sampleLibraryXML[] = R"( description="An eXaMpLe book added to the catalog via XML" language="deu" creator="Wikibooks" - publisher="Kiwix" + publisher="Kiwix Enthusiasts" date="2021-04-11" name="wikibooks_de" tags="unittest;wikibooks;_category:wikibooks" @@ -277,7 +277,7 @@ TEST_F(LibraryTest, sanityCheck) EXPECT_EQ(lib.getBookCount(true, true), 12U); EXPECT_EQ(lib.getBooksLanguages().size(), 3U); EXPECT_EQ(lib.getBooksCreators().size(), 9U); - EXPECT_EQ(lib.getBooksPublishers().size(), 2U); + EXPECT_EQ(lib.getBooksPublishers().size(), 3U); } TEST_F(LibraryTest, categoryHandling) @@ -492,6 +492,34 @@ TEST_F(LibraryTest, filterByPublisher) "An example ZIM archive", "Ray Charles" ); + + // filtering by publisher requires full match of the search term + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwi"), + /* no results */ + ); + + // filtering by publisher requires a full phrase match + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix Enthusiasts"), + "An example ZIM archive" + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Enthusiasts Kiwix"), + /* no results */ + ); + + // filtering by publisher is case and diacritics insensitive + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("kîWIx"), + "An example ZIM archive", + "Ray Charles" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("publisher:kiwix"), + "An example ZIM archive", + "Ray Charles" + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("kiwix"), + /* no results */ + ); } TEST_F(LibraryTest, filterByName) From e805f689944edbe231e16fef28db8ea6fd6518c4 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 16 Apr 2021 12:22:08 +0400 Subject: [PATCH 23/32] Enhanced & broke LibraryTest.filterByPublisher --- test/library.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index d46891335..40da24942 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -206,7 +206,7 @@ const char sampleLibraryXML[] = R"( description="An eXaMpLe book added to the catalog via XML" language="deu" creator="Wikibooks" - publisher="Kiwix Enthusiasts" + publisher="Kiwix & Some Enthusiasts" date="2021-04-11" name="wikibooks_de" tags="unittest;wikibooks;_category:wikibooks" @@ -499,10 +499,10 @@ TEST_F(LibraryTest, filterByPublisher) ); // filtering by publisher requires a full phrase match - EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix Enthusiasts"), + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Kiwix & Some Enthusiasts"), "An example ZIM archive" ); - EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Enthusiasts Kiwix"), + EXPECT_FILTER_RESULTS(kiwix::Filter().publisher("Some Enthusiasts & Kiwix"), /* no results */ ); From d3d5abe14d1dcf5f6323bb84d9ba19c1ee3460ba Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 16 Apr 2021 12:27:14 +0400 Subject: [PATCH 24/32] Handling of non-words in publisher query This change fixes the failure of the LibraryTest.filterByPublisher unit-test broken by the previous commit. The previous approach used in `publisherQuery()` for building a phrase query enforcing the specified prefix for all terms fails if 1. the input phrase contains a non-word term that Xapian's query parser doesn't like (e.g. a standalone ampersand character, 1/2, a#1, etc); 2. the input phrase contains at least three terms that Xapian's query parser has no issue with. Using the `quest` tool (coming with xapian-tools under Ubuntu) the issue can be demonstrated as follows: ``` $ quest -o phrase -d some_xapian_db "Energy & security" Parsed Query: Query((energy@1 PHRASE 11 Zsecur@2)) Exactly 0 matches MSet: $ quest -o phrase -d some_xapian_db "Energy & security act" UnimplementedError: OP_NEAR and OP_PHRASE only currently support leaf subqueries $ quest -o phrase -d some_xapian_db 'Energy 1/2 security act' UnimplementedError: OP_NEAR and OP_PHRASE only currently support leaf subqueries $ quest -o phrase -d some_xapian_db "Energy a#1 security act" UnimplementedError: OP_NEAR and OP_PHRASE only currently support leaf subqueries ``` The problem comes from parsing the query with the default operation set to `OP_PHRASE` (exemplified by the `-o phrase` option in above invocations of `quest`). A workaround is to parse the phrase with a default operation of `OP_OR` and then combine all the terms with `OP_PHRASE`. Besides stemming should be disabled in order to target an exact phrase match (save for the non-word terms, if any, that are ignored by the query parser). --- src/library.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index cff560fe2..72a947068 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -364,9 +364,11 @@ Xapian::Query langQuery(const std::string& lang) Xapian::Query publisherQuery(const std::string& publisher) { Xapian::QueryParser queryParser; - queryParser.set_default_op(Xapian::Query::OP_PHRASE); + queryParser.set_default_op(Xapian::Query::OP_OR); + queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); const auto flags = 0; - return queryParser.parse_query(normalizeText(publisher), flags, "XP"); + const auto q = queryParser.parse_query(normalizeText(publisher), flags, "XP"); + return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); } Xapian::Query buildXapianQuery(const Filter& filter) From 3d5fd8f585c0000f875c3db5652b62389c7c5eb4 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 13 Apr 2021 00:51:53 +0400 Subject: [PATCH 25/32] Catalog filtering by creator works via Xapian --- include/library.h | 3 +++ src/library.cpp | 23 ++++++++++++++++++++++- test/library.cpp | 34 +++++++++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/include/library.h b/include/library.h index 66e8b7d2c..adb47498d 100644 --- a/include/library.h +++ b/include/library.h @@ -123,6 +123,9 @@ class Filter { bool hasPublisher() const; const std::string& getPublisher() const { return _publisher; } + bool hasCreator() const; + const std::string& getCreator() const { return _creator; } + private: friend class Library; diff --git a/src/library.cpp b/src/library.cpp index 72a947068..0f945dc54 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -281,12 +281,14 @@ void Library::updateBookDB(const Book& book) const std::string name = book.getName(); // this is supposed to be normalized const std::string category = book.getCategory(); // this is supposed to be normalized const std::string publisher = normalizeText(book.getPublisher()); + const std::string creator = normalizeText(book.getCreator()); doc.add_value(0, title); doc.add_value(1, desc); doc.add_value(2, name); doc.add_value(3, category); doc.add_value(4, lang); doc.add_value(5, publisher); + doc.add_value(6, creator); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); @@ -295,6 +297,7 @@ void Library::updateBookDB(const Book& book) indexer.index_text(category, 1, "XC"); indexer.index_text(lang, 1, "L"); indexer.index_text(publisher, 1, "XP"); + indexer.index_text(creator, 1, "A"); // Index fields without prefixes for general search indexer.index_text(title); @@ -331,6 +334,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("category", "XC"); queryParser.add_prefix("lang", "L"); queryParser.add_prefix("publisher", "XP"); + queryParser.add_prefix("creator", "A"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -371,6 +375,16 @@ Xapian::Query publisherQuery(const std::string& publisher) return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); } +Xapian::Query creatorQuery(const std::string& creator) +{ + Xapian::QueryParser queryParser; + queryParser.set_default_op(Xapian::Query::OP_OR); + queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE); + const auto flags = 0; + const auto q = queryParser.parse_query(normalizeText(creator), flags, "A"); + return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); +} + Xapian::Query buildXapianQuery(const Filter& filter) { auto q = buildXapianQueryFromFilterQuery(filter); @@ -386,6 +400,9 @@ Xapian::Query buildXapianQuery(const Filter& filter) if ( filter.hasPublisher() ) { q = Xapian::Query(Xapian::Query::OP_AND, q, publisherQuery(filter.getPublisher())); } + if ( filter.hasCreator() ) { + q = Xapian::Query(Xapian::Query::OP_AND, q, creatorQuery(filter.getCreator())); + } return q; } @@ -698,6 +715,11 @@ bool Filter::hasPublisher() const return ACTIVE(_PUBLISHER); } +bool Filter::hasCreator() const +{ + return ACTIVE(_CREATOR); +} + bool Filter::accept(const Book& book) const { auto local = !book.getPath().empty(); @@ -713,7 +735,6 @@ bool Filter::accept(const Book& book) const FILTER(_NOREMOTE, !remote) FILTER(MAXSIZE, book.getSize() <= _maxSize) - FILTER(_CREATOR, book.getCreator() == _creator) if (ACTIVE(ACCEPTTAGS)) { if (!_acceptTags.empty()) { diff --git a/test/library.cpp b/test/library.cpp index 40da24942..e48564b32 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -472,18 +472,42 @@ TEST_F(LibraryTest, filterByCreator) "Granblue Fantasy Wiki" ); - // filtering by creator is case sensitive - EXPECT_FILTER_RESULTS(kiwix::Filter().creator("wiki"), - /* no results */ + // filtering by creator is case and diacritics insensitive + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("wIkï"), + "Granblue Fantasy Wiki" ); - // filtering by creator requires full match of the full author/creator name + // filtering by creator doesn't requires full match of the full creator name EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Stack"), - /* no results */ + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); + + // filtering by creator requires a full phrase match (ignoring some non-word terms) EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies & TV Stack Exchange"), "Movies & TV Stack Exchange" ); + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies & TV"), + "Movies & TV Stack Exchange" + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("Movies TV"), + "Movies & TV Stack Exchange" + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("TV & Movies"), + /* no results */ + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().creator("TV Movies"), + /* no results */ + ); + + EXPECT_FILTER_RESULTS(kiwix::Filter().query("creator:Wikipedia"), + "Encyclopédie de la Tunisie", + "Géographie par Wikipédia", + "Mathématiques", + "Ray Charles" + ); + } TEST_F(LibraryTest, filterByPublisher) From 19e195cb7d0fcfd08ac3a7fe58b04879797e9332 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 13 Apr 2021 11:56:19 +0400 Subject: [PATCH 26/32] Filter::Tags typedef --- include/library.h | 17 ++++++++++------- src/library.cpp | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/library.h b/include/library.h index adb47498d..08598b913 100644 --- a/include/library.h +++ b/include/library.h @@ -49,10 +49,13 @@ enum supportedListMode { }; class Filter { - private: + public: // types + using Tags = std::vector; + + private: // data uint64_t activeFilters; - std::vector _acceptTags; - std::vector _rejectTags; + Tags _acceptTags; + Tags _rejectTags; std::string _category; std::string _lang; std::string _publisher; @@ -62,7 +65,7 @@ class Filter { bool _queryIsPartial; std::string _name; - public: + public: // functions Filter(); ~Filter() = default; @@ -96,8 +99,8 @@ class Filter { /** * Set the filter to only accept book with corresponding tag. */ - Filter& acceptTags(std::vector tags); - Filter& rejectTags(std::vector tags); + Filter& acceptTags(const Tags& tags); + Filter& rejectTags(const Tags& tags); Filter& category(std::string category); Filter& lang(std::string lang); @@ -126,7 +129,7 @@ class Filter { bool hasCreator() const; const std::string& getCreator() const { return _creator; } -private: +private: // functions friend class Library; bool accept(const Book& book) const; diff --git a/src/library.cpp b/src/library.cpp index 0f945dc54..b1ad3b27b 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -624,14 +624,14 @@ Filter& Filter::valid(bool accept) return *this; } -Filter& Filter::acceptTags(std::vector tags) +Filter& Filter::acceptTags(const Tags& tags) { _acceptTags = tags; activeFilters |= ACCEPTTAGS; return *this; } -Filter& Filter::rejectTags(std::vector tags) +Filter& Filter::rejectTags(const Tags& tags) { _rejectTags = tags; activeFilters |= REJECTTAGS; From 9c7366890ddcd04f7d918bb47c145ba895e6541b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 13 Apr 2021 12:35:08 +0400 Subject: [PATCH 27/32] Catalog filtering by tags works via Xapian --- include/library.h | 3 +++ src/library.cpp | 47 +++++++++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/include/library.h b/include/library.h index 08598b913..d91e3c7da 100644 --- a/include/library.h +++ b/include/library.h @@ -129,6 +129,9 @@ class Filter { bool hasCreator() const; const std::string& getCreator() const { return _creator; } + const Tags& getAcceptTags() const { return _acceptTags; } + const Tags& getRejectTags() const { return _rejectTags; } + private: // functions friend class Library; diff --git a/src/library.cpp b/src/library.cpp index b1ad3b27b..25d08a858 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -282,6 +282,7 @@ void Library::updateBookDB(const Book& book) const std::string category = book.getCategory(); // this is supposed to be normalized const std::string publisher = normalizeText(book.getPublisher()); const std::string creator = normalizeText(book.getCreator()); + const std::string tags = book.getTags(); // normalization not needed doc.add_value(0, title); doc.add_value(1, desc); doc.add_value(2, name); @@ -289,6 +290,7 @@ void Library::updateBookDB(const Book& book) doc.add_value(4, lang); doc.add_value(5, publisher); doc.add_value(6, creator); + doc.add_value(7, tags); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); @@ -299,6 +301,9 @@ void Library::updateBookDB(const Book& book) indexer.index_text(publisher, 1, "XP"); indexer.index_text(creator, 1, "A"); + for ( const auto& tag : split(tags, ";") ) + doc.add_boolean_term("XT" + tag); + // Index fields without prefixes for general search indexer.index_text(title); indexer.increase_termpos(); @@ -335,6 +340,7 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) queryParser.add_prefix("lang", "L"); queryParser.add_prefix("publisher", "XP"); queryParser.add_prefix("creator", "A"); + queryParser.add_prefix("tag", "XT"); const auto partialQueryFlag = filter.queryIsPartial() ? Xapian::QueryParser::FLAG_PARTIAL : 0; @@ -385,6 +391,21 @@ Xapian::Query creatorQuery(const std::string& creator) return Xapian::Query(Xapian::Query::OP_PHRASE, q.get_terms_begin(), q.get_terms_end(), q.get_length()); } +Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& rejectTags) +{ + Xapian::Query q = Xapian::Query(std::string()); + if (!acceptTags.empty()) { + for ( const auto& tag : acceptTags ) + q &= Xapian::Query("XT" + tag); + } + + if (!rejectTags.empty()) { + for ( const auto& tag : rejectTags ) + q = Xapian::Query(Xapian::Query::OP_AND_NOT, q, "XT" + tag); + } + return q; +} + Xapian::Query buildXapianQuery(const Filter& filter) { auto q = buildXapianQueryFromFilterQuery(filter); @@ -403,6 +424,10 @@ Xapian::Query buildXapianQuery(const Filter& filter) if ( filter.hasCreator() ) { q = Xapian::Query(Xapian::Query::OP_AND, q, creatorQuery(filter.getCreator())); } + if ( !filter.getAcceptTags().empty() || !filter.getRejectTags().empty() ) { + const auto tq = tagsQuery(filter.getAcceptTags(), filter.getRejectTags()); + q = Xapian::Query(Xapian::Query::OP_AND, q, tq);; + } return q; } @@ -736,28 +761,6 @@ bool Filter::accept(const Book& book) const FILTER(MAXSIZE, book.getSize() <= _maxSize) - if (ACTIVE(ACCEPTTAGS)) { - if (!_acceptTags.empty()) { - auto vBookTags = split(book.getTags(), ";"); - std::set sBookTags(vBookTags.begin(), vBookTags.end()); - for (auto& t: _acceptTags) { - if (sBookTags.find(t) == sBookTags.end()) { - return false; - } - } - } - } - if (ACTIVE(REJECTTAGS)) { - if (!_rejectTags.empty()) { - auto vBookTags = split(book.getTags(), ";"); - std::set sBookTags(vBookTags.begin(), vBookTags.end()); - for (auto& t: _rejectTags) { - if (sBookTags.find(t) != sBookTags.end()) { - return false; - } - } - } - } return true; } From 87dc9d2723492481711e158c5d0d82198b53ffd1 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 13 Apr 2021 23:04:35 +0400 Subject: [PATCH 28/32] Made catalog filtering by query diacritics insensitive Catalog filtering by titles/description was sensitive to diacritics present in the query string. Fixed that. Also enhanced the unit test to validate the insensitivity to diacritics present in either the title/description or the query string. --- src/library.cpp | 3 ++- test/library.cpp | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/library.cpp b/src/library.cpp index 25d08a858..4f3b58754 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -350,10 +350,11 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) //queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); const auto flags = Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE | Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD | partialQueryFlag; - return queryParser.parse_query(filter.getQuery(), flags); + return queryParser.parse_query(normalizeText(filter.getQuery()), flags); } Xapian::Query nameQuery(const std::string& name) diff --git a/test/library.cpp b/test/library.cpp index e48564b32..209be4a87 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -436,12 +436,31 @@ TEST_F(LibraryTest, filterByQuery) "Mythology & Folklore Stack Exchange" ); + // filtering by query is diacritics insensitive on titles + EXPECT_FILTER_RESULTS(kiwix::Filter().query("mathematiques"), + "Mathématiques", + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().query("èxchângé"), + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" + ); + // filtering by query is case insensitive on description/summary EXPECT_FILTER_RESULTS(kiwix::Filter().query("enTHUSiaSTS"), "Movies & TV Stack Exchange", "Mythology & Folklore Stack Exchange" ); + // filtering by query is diacritics insensitive on description/summary + EXPECT_FILTER_RESULTS(kiwix::Filter().query("selection"), + "Géographie par Wikipédia" + ); + EXPECT_FILTER_RESULTS(kiwix::Filter().query("enthúsïåsts"), + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" + ); + // by default, filtering by query assumes partial query EXPECT_FILTER_RESULTS(kiwix::Filter().query("Wiki"), "Encyclopédie de la Tunisie", From f751aff2fb3c94953375854b4e683ac407a3cc58 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 13 Apr 2021 23:23:40 +0400 Subject: [PATCH 29/32] Full case/diacritics insensitivity in catalog filtering Catalog filtering should now be case/diacritics insensitive for all fields. However it is not validated for language, name and category fields, and is validated for tags, creator & publisher only for text supplied in the filter (but not for values read from the book). --- src/library.cpp | 16 ++++++++-------- test/library.cpp | 8 +++++--- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 4f3b58754..20a3e00a9 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -278,11 +278,11 @@ void Library::updateBookDB(const Book& book) const std::string title = normalizeText(book.getTitle()); const std::string desc = normalizeText(book.getDescription()); - const std::string name = book.getName(); // this is supposed to be normalized - const std::string category = book.getCategory(); // this is supposed to be normalized + const std::string name = normalizeText(book.getName()); + const std::string category = normalizeText(book.getCategory()); const std::string publisher = normalizeText(book.getPublisher()); const std::string creator = normalizeText(book.getCreator()); - const std::string tags = book.getTags(); // normalization not needed + const std::string tags = normalizeText(book.getTags()); doc.add_value(0, title); doc.add_value(1, desc); doc.add_value(2, name); @@ -359,17 +359,17 @@ Xapian::Query buildXapianQueryFromFilterQuery(const Filter& filter) Xapian::Query nameQuery(const std::string& name) { - return Xapian::Query("XN" + name); + return Xapian::Query("XN" + normalizeText(name)); } Xapian::Query categoryQuery(const std::string& category) { - return Xapian::Query("XC" + category); + return Xapian::Query("XC" + normalizeText(category)); } Xapian::Query langQuery(const std::string& lang) { - return Xapian::Query("L" + lang); + return Xapian::Query("L" + normalizeText(lang)); } Xapian::Query publisherQuery(const std::string& publisher) @@ -397,12 +397,12 @@ Xapian::Query tagsQuery(const Filter::Tags& acceptTags, const Filter::Tags& reje Xapian::Query q = Xapian::Query(std::string()); if (!acceptTags.empty()) { for ( const auto& tag : acceptTags ) - q &= Xapian::Query("XT" + tag); + q &= Xapian::Query("XT" + normalizeText(tag)); } if (!rejectTags.empty()) { for ( const auto& tag : rejectTags ) - q = Xapian::Query(Xapian::Query::OP_AND_NOT, q, "XT" + tag); + q = Xapian::Query(Xapian::Query::OP_AND_NOT, q, "XT" + normalizeText(tag)); } return q; } diff --git a/test/library.cpp b/test/library.cpp index 209be4a87..5c831e367 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -376,9 +376,11 @@ TEST_F(LibraryTest, filterByTags) "Mythology & Folklore Stack Exchange" ); - // filtering by tags is case sensitive - EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"stackEXChange"}), - /* no results */ + // filtering by tags is case and diacritics insensitive + EXPECT_FILTER_RESULTS(kiwix::Filter().acceptTags({"ståckEXÇhange"}), + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange" ); // filtering by tags requires full match of the search term From 59e9a0cd77d4d93855fafd6647bf193e6e1a0c47 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 13 Apr 2021 23:48:22 +0400 Subject: [PATCH 30/32] Merged XmlLibraryTest with LibraryTest The library set up by LibraryTest now contains two valid books initialized via XML. Therefore XmlLibraryTest is not needed as a separate test suite. --- test/library.cpp | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/test/library.cpp b/test/library.cpp index 5c831e367..be2df95c9 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -232,7 +232,7 @@ class LibraryTest : public ::testing::Test { void SetUp() override { kiwix::Manager manager(&lib); manager.readOpds(sampleOpdsStream, "foo.urlHost"); - manager.readXml(sampleLibraryXML, true, "/data/library.xml", true); + manager.readXml(sampleLibraryXML, true, "./test/library.xml", true); } kiwix::Bookmark createBookmark(const std::string &id) { @@ -637,33 +637,24 @@ TEST_F(LibraryTest, getBookByPath) EXPECT_THROW(lib.getBookByPath("non/existant/path.zim"), std::out_of_range); } -class XmlLibraryTest : public ::testing::Test { - protected: - void SetUp() override { - kiwix::Manager manager(&lib); - manager.readFile( "./test/library.xml", true, true); - } - - kiwix::Library lib; -}; - -TEST_F(XmlLibraryTest, removeBookByIdRemovesTheBook) +TEST_F(LibraryTest, removeBookByIdRemovesTheBook) { - EXPECT_EQ(3U, lib.getBookCount(true, true)); + const auto initialBookCount = lib.getBookCount(true, true); + ASSERT_GT(initialBookCount, 0U); EXPECT_NO_THROW(lib.getBookById("raycharles")); lib.removeBookById("raycharles"); - EXPECT_EQ(2U, lib.getBookCount(true, true)); + EXPECT_EQ(initialBookCount - 1, lib.getBookCount(true, true)); EXPECT_THROW(lib.getBookById("raycharles"), std::out_of_range); }; -TEST_F(XmlLibraryTest, removeBookByIdDropsTheReader) +TEST_F(LibraryTest, removeBookByIdDropsTheReader) { EXPECT_NE(nullptr, lib.getReaderById("raycharles")); lib.removeBookById("raycharles"); EXPECT_THROW(lib.getReaderById("raycharles"), std::out_of_range); }; -TEST_F(XmlLibraryTest, removeBookByIdUpdatesTheSearchDB) +TEST_F(LibraryTest, removeBookByIdUpdatesTheSearchDB) { kiwix::Filter f; f.local(true).valid(true).query(R"(title:"ray charles")", false); From 4178c169ddc4ddd86a734131719986281b500d7b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 27 Apr 2021 16:33:04 +0400 Subject: [PATCH 31/32] Xapian documents in book DB store only the book id --- src/library.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 20a3e00a9..66a457ff6 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -283,14 +283,6 @@ void Library::updateBookDB(const Book& book) const std::string publisher = normalizeText(book.getPublisher()); const std::string creator = normalizeText(book.getCreator()); const std::string tags = normalizeText(book.getTags()); - doc.add_value(0, title); - doc.add_value(1, desc); - doc.add_value(2, name); - doc.add_value(3, category); - doc.add_value(4, lang); - doc.add_value(5, publisher); - doc.add_value(6, creator); - doc.add_value(7, tags); doc.set_data(book.getId()); indexer.index_text(title, 1, "S"); From 63e9a09259cec6f67d4e3b0db19b523f1526bdc3 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Tue, 27 Apr 2021 16:42:20 +0400 Subject: [PATCH 32/32] Cleaned up/beautified Library::updateBookDB() --- src/library.cpp | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 66a457ff6..bc74c488f 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -278,31 +278,29 @@ void Library::updateBookDB(const Book& book) const std::string title = normalizeText(book.getTitle()); const std::string desc = normalizeText(book.getDescription()); - const std::string name = normalizeText(book.getName()); - const std::string category = normalizeText(book.getCategory()); - const std::string publisher = normalizeText(book.getPublisher()); - const std::string creator = normalizeText(book.getCreator()); - const std::string tags = normalizeText(book.getTags()); - doc.set_data(book.getId()); - indexer.index_text(title, 1, "S"); - indexer.index_text(desc, 1, "XD"); - indexer.index_text(name, 1, "XN"); - indexer.index_text(category, 1, "XC"); - indexer.index_text(lang, 1, "L"); - indexer.index_text(publisher, 1, "XP"); - indexer.index_text(creator, 1, "A"); - - for ( const auto& tag : split(tags, ";") ) - doc.add_boolean_term("XT" + tag); - - // Index fields without prefixes for general search + // Index title and description without prefixes for general search indexer.index_text(title); indexer.increase_termpos(); indexer.index_text(desc); + // Index all fields for field-based search + indexer.index_text(title, 1, "S"); + indexer.index_text(desc, 1, "XD"); + indexer.index_text(lang, 1, "L"); + indexer.index_text(normalizeText(book.getCreator()), 1, "A"); + indexer.index_text(normalizeText(book.getPublisher()), 1, "XP"); + indexer.index_text(normalizeText(book.getName()), 1, "XN"); + indexer.index_text(normalizeText(book.getCategory()), 1, "XC"); + + for ( const auto& tag : split(normalizeText(book.getTags()), ";") ) + doc.add_boolean_term("XT" + tag); + const std::string idterm = "Q" + book.getId(); doc.add_boolean_term(idterm); + + doc.set_data(book.getId()); + m_bookDB->replace_document(idterm, doc); }