diff --git a/include/library.h b/include/library.h index c09a77551..fb2d8aad4 100644 --- a/include/library.h +++ b/include/library.h @@ -28,6 +28,7 @@ #include "book.h" #include "bookmark.h" #include "common.h" +#include #define KIWIX_LIBRARY_VERSION "20110515" @@ -106,6 +107,7 @@ class Filter { Filter& name(std::string name); bool hasQuery() const; + const std::string& getQuery() const { return _query; } bool accept(const Book& book) const; bool acceptByQueryOnly(const Book& book) const; @@ -121,6 +123,7 @@ class Library std::map m_books; std::map> m_readers; std::vector m_bookmarks; + Xapian::WritableDatabase m_bookDB; public: typedef std::vector BookIdCollection; @@ -295,6 +298,7 @@ class Library private: // functions BookIdCollection getBooksByTitleOrDescription(const Filter& filter); + void updateBookDB(const Book& book); }; } diff --git a/src/library.cpp b/src/library.cpp index 7d428f570..7cbfd08a1 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -34,10 +34,31 @@ namespace kiwix { +namespace +{ + +const std::map iso639_3ToXapian { + {"deu", "german" }, + {"eng", "english" }, + {"fra", "french" }, + {"hye", "armenian"}, + {"rus", "russian" }, + {"spa", "spanish" }, +}; + +std::string normalizeText(const std::string& text, const std::string& language) +{ + return removeAccents(text); +} + +} // unnamed namespace + /* Constructor */ Library::Library() + : m_bookDB("", Xapian::DB_BACKEND_INMEMORY) { } + /* Destructor */ Library::~Library() { @@ -47,6 +68,7 @@ Library::~Library() bool Library::addBook(const Book& book) { /* Try to find it */ + updateBookDB(book); try { auto& oldbook = m_books.at(book.getId()); oldbook.update(book); @@ -232,14 +254,64 @@ Library::BookIdCollection Library::filter(const std::string& search) } +void Library::updateBookDB(const Book& book) +{ + Xapian::Stem stemmer; + Xapian::TermGenerator indexer; + const std::string lang = book.getLanguage(); + try { + stemmer = Xapian::Stem(iso639_3ToXapian.at(lang)); + indexer.set_stemmer(stemmer); + indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME); + } catch (...) {} + Xapian::Document doc; + indexer.set_document(doc); + + const std::string title = normalizeText(book.getTitle(), lang); + const std::string desc = normalizeText(book.getDescription(), lang); + doc.add_value(0, title); + doc.add_value(1, desc); + doc.set_data(book.getId()); + + indexer.index_text(title, 1, "S"); + indexer.index_text(desc, 1, "XD"); + + // Index fields without prefixes for general search + indexer.index_text(title); + indexer.increase_termpos(); + indexer.index_text(desc); + + const std::string idterm = "Q" + book.getId(); + doc.add_boolean_term(idterm); + m_bookDB.replace_document(idterm, doc); +} + Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& filter) { + if ( !filter.hasQuery() ) + return getBooksIds(); + BookIdCollection bookIds; - for(auto& pair:m_books) { - if(filter.acceptByQueryOnly(pair.second)) { - bookIds.push_back(pair.first); - } + Xapian::QueryParser queryParser; + queryParser.set_default_op(Xapian::Query::OP_AND); + queryParser.add_prefix("title", "S"); + queryParser.add_prefix("description", "XD"); + // Language assumed for the query is not known for sure so stemming + // is not applied + //queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian.at(???))); + //queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); + const auto flags = Xapian::QueryParser::FLAG_PHRASE + | Xapian::QueryParser::FLAG_BOOLEAN + | Xapian::QueryParser::FLAG_LOVEHATE + | Xapian::QueryParser::FLAG_WILDCARD; + const auto query = queryParser.parse_query(filter.getQuery(), flags); + Xapian::Enquire enquire(m_bookDB); + enquire.set_query(query); + const auto results = enquire.get_mset(0, m_books.size()); + for ( auto it = results.begin(); it != results.end(); ++it ) { + bookIds.push_back(it.get_document().get_data()); } + return bookIds; } diff --git a/test/library.cpp b/test/library.cpp index 7f34e008b..0501124df 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -263,7 +263,7 @@ TEST_F(LibraryTest, filterCheck) bookIds = lib.filter(kiwix::Filter().query("folklore")); EXPECT_EQ(bookIds.size(), 1U); - bookIds = lib.filter(kiwix::Filter().query("Wiki")); + bookIds = lib.filter(kiwix::Filter().query("Wiki*")); EXPECT_EQ(bookIds.size(), 4U); bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki")); diff --git a/test/server.cpp b/test/server.cpp index d4bd59de1..5fd583f26 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -733,7 +733,26 @@ TEST_F(LibraryServerTest, catalog_searchdescription_xml) ); } -TEST_F(LibraryServerTest, catalog_search_by_text) +TEST_F(LibraryServerTest, catalog_search_by_phrase) +{ + const auto r = zfs1_->GET("/catalog/search?q=\"ray%20charles\""); + EXPECT_EQ(r->status, 200); + EXPECT_EQ(maskVariableOPDSFeedData(r->body), + OPDS_FEED_TAG + " 12345678-90ab-cdef-1234-567890abcdef\n" + " Search result for \"ray charles\"\n" + " YYYY-MM-DDThh:mm:ssZ\n" + " 2\n" + " 0\n" + " 2\n" + CATALOG_LINK_TAGS + RAY_CHARLES_CATALOG_ENTRY + UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY + "\n" + ); +} + +TEST_F(LibraryServerTest, catalog_search_by_words) { const auto r = zfs1_->GET("/catalog/search?q=ray%20charles"); EXPECT_EQ(r->status, 200); @@ -742,12 +761,13 @@ TEST_F(LibraryServerTest, catalog_search_by_text) " 12345678-90ab-cdef-1234-567890abcdef\n" " Search result for ray charles\n" " YYYY-MM-DDThh:mm:ssZ\n" - " 2\n" + " 3\n" " 0\n" - " 2\n" + " 3\n" CATALOG_LINK_TAGS RAY_CHARLES_CATALOG_ENTRY UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY + CHARLES_RAY_CATALOG_ENTRY "\n" ); }