mirror of https://github.com/kiwix/libkiwix.git
Initial version of Xapian-based catalog search
This commit is contained in:
parent
a17fc0ef2d
commit
a599fb3892
|
@ -28,6 +28,7 @@
|
|||
#include "book.h"
|
||||
#include "bookmark.h"
|
||||
#include "common.h"
|
||||
#include <xapian.h>
|
||||
|
||||
#define KIWIX_LIBRARY_VERSION "20110515"
|
||||
|
||||
|
@ -106,6 +107,7 @@ class Filter {
|
|||
Filter& name(std::string name);
|
||||
|
||||
bool hasQuery() const;
|
||||
const std::string& getQuery() const { return _query; }
|
||||
|
||||
bool accept(const Book& book) const;
|
||||
bool acceptByQueryOnly(const Book& book) const;
|
||||
|
@ -121,6 +123,7 @@ class Library
|
|||
std::map<std::string, kiwix::Book> m_books;
|
||||
std::map<std::string, std::shared_ptr<Reader>> m_readers;
|
||||
std::vector<kiwix::Bookmark> m_bookmarks;
|
||||
Xapian::WritableDatabase m_bookDB;
|
||||
|
||||
public:
|
||||
typedef std::vector<std::string> BookIdCollection;
|
||||
|
@ -295,6 +298,7 @@ class Library
|
|||
|
||||
private: // functions
|
||||
BookIdCollection getBooksByTitleOrDescription(const Filter& filter);
|
||||
void updateBookDB(const Book& book);
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -34,10 +34,31 @@
|
|||
namespace kiwix
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
const std::map<std::string, std::string> iso639_3ToXapian {
|
||||
{"deu", "german" },
|
||||
{"eng", "english" },
|
||||
{"fra", "french" },
|
||||
{"hye", "armenian"},
|
||||
{"rus", "russian" },
|
||||
{"spa", "spanish" },
|
||||
};
|
||||
|
||||
std::string normalizeText(const std::string& text, const std::string& language)
|
||||
{
|
||||
return removeAccents(text);
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
/* Constructor */
|
||||
Library::Library()
|
||||
: m_bookDB("", Xapian::DB_BACKEND_INMEMORY)
|
||||
{
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Library::~Library()
|
||||
{
|
||||
|
@ -47,6 +68,7 @@ Library::~Library()
|
|||
bool Library::addBook(const Book& book)
|
||||
{
|
||||
/* Try to find it */
|
||||
updateBookDB(book);
|
||||
try {
|
||||
auto& oldbook = m_books.at(book.getId());
|
||||
oldbook.update(book);
|
||||
|
@ -232,14 +254,64 @@ Library::BookIdCollection Library::filter(const std::string& search)
|
|||
}
|
||||
|
||||
|
||||
void Library::updateBookDB(const Book& book)
|
||||
{
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::TermGenerator indexer;
|
||||
const std::string lang = book.getLanguage();
|
||||
try {
|
||||
stemmer = Xapian::Stem(iso639_3ToXapian.at(lang));
|
||||
indexer.set_stemmer(stemmer);
|
||||
indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
|
||||
} catch (...) {}
|
||||
Xapian::Document doc;
|
||||
indexer.set_document(doc);
|
||||
|
||||
const std::string title = normalizeText(book.getTitle(), lang);
|
||||
const std::string desc = normalizeText(book.getDescription(), lang);
|
||||
doc.add_value(0, title);
|
||||
doc.add_value(1, desc);
|
||||
doc.set_data(book.getId());
|
||||
|
||||
indexer.index_text(title, 1, "S");
|
||||
indexer.index_text(desc, 1, "XD");
|
||||
|
||||
// Index fields without prefixes for general search
|
||||
indexer.index_text(title);
|
||||
indexer.increase_termpos();
|
||||
indexer.index_text(desc);
|
||||
|
||||
const std::string idterm = "Q" + book.getId();
|
||||
doc.add_boolean_term(idterm);
|
||||
m_bookDB.replace_document(idterm, doc);
|
||||
}
|
||||
|
||||
Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& filter)
|
||||
{
|
||||
if ( !filter.hasQuery() )
|
||||
return getBooksIds();
|
||||
|
||||
BookIdCollection bookIds;
|
||||
for(auto& pair:m_books) {
|
||||
if(filter.acceptByQueryOnly(pair.second)) {
|
||||
bookIds.push_back(pair.first);
|
||||
}
|
||||
Xapian::QueryParser queryParser;
|
||||
queryParser.set_default_op(Xapian::Query::OP_AND);
|
||||
queryParser.add_prefix("title", "S");
|
||||
queryParser.add_prefix("description", "XD");
|
||||
// Language assumed for the query is not known for sure so stemming
|
||||
// is not applied
|
||||
//queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian.at(???)));
|
||||
//queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
|
||||
const auto flags = Xapian::QueryParser::FLAG_PHRASE
|
||||
| Xapian::QueryParser::FLAG_BOOLEAN
|
||||
| Xapian::QueryParser::FLAG_LOVEHATE
|
||||
| Xapian::QueryParser::FLAG_WILDCARD;
|
||||
const auto query = queryParser.parse_query(filter.getQuery(), flags);
|
||||
Xapian::Enquire enquire(m_bookDB);
|
||||
enquire.set_query(query);
|
||||
const auto results = enquire.get_mset(0, m_books.size());
|
||||
for ( auto it = results.begin(); it != results.end(); ++it ) {
|
||||
bookIds.push_back(it.get_document().get_data());
|
||||
}
|
||||
|
||||
return bookIds;
|
||||
}
|
||||
|
||||
|
|
|
@ -263,7 +263,7 @@ TEST_F(LibraryTest, filterCheck)
|
|||
bookIds = lib.filter(kiwix::Filter().query("folklore"));
|
||||
EXPECT_EQ(bookIds.size(), 1U);
|
||||
|
||||
bookIds = lib.filter(kiwix::Filter().query("Wiki"));
|
||||
bookIds = lib.filter(kiwix::Filter().query("Wiki*"));
|
||||
EXPECT_EQ(bookIds.size(), 4U);
|
||||
|
||||
bookIds = lib.filter(kiwix::Filter().query("Wiki").creator("Wiki"));
|
||||
|
|
|
@ -733,7 +733,26 @@ TEST_F(LibraryServerTest, catalog_searchdescription_xml)
|
|||
);
|
||||
}
|
||||
|
||||
TEST_F(LibraryServerTest, catalog_search_by_text)
|
||||
TEST_F(LibraryServerTest, catalog_search_by_phrase)
|
||||
{
|
||||
const auto r = zfs1_->GET("/catalog/search?q=\"ray%20charles\"");
|
||||
EXPECT_EQ(r->status, 200);
|
||||
EXPECT_EQ(maskVariableOPDSFeedData(r->body),
|
||||
OPDS_FEED_TAG
|
||||
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
|
||||
" <title>Search result for \"ray charles\"</title>\n"
|
||||
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
|
||||
" <totalResults>2</totalResults>\n"
|
||||
" <startIndex>0</startIndex>\n"
|
||||
" <itemsPerPage>2</itemsPerPage>\n"
|
||||
CATALOG_LINK_TAGS
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
"</feed>\n"
|
||||
);
|
||||
}
|
||||
|
||||
TEST_F(LibraryServerTest, catalog_search_by_words)
|
||||
{
|
||||
const auto r = zfs1_->GET("/catalog/search?q=ray%20charles");
|
||||
EXPECT_EQ(r->status, 200);
|
||||
|
@ -742,12 +761,13 @@ TEST_F(LibraryServerTest, catalog_search_by_text)
|
|||
" <id>12345678-90ab-cdef-1234-567890abcdef</id>\n"
|
||||
" <title>Search result for ray charles</title>\n"
|
||||
" <updated>YYYY-MM-DDThh:mm:ssZ</updated>\n"
|
||||
" <totalResults>2</totalResults>\n"
|
||||
" <totalResults>3</totalResults>\n"
|
||||
" <startIndex>0</startIndex>\n"
|
||||
" <itemsPerPage>2</itemsPerPage>\n"
|
||||
" <itemsPerPage>3</itemsPerPage>\n"
|
||||
CATALOG_LINK_TAGS
|
||||
RAY_CHARLES_CATALOG_ENTRY
|
||||
UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY
|
||||
CHARLES_RAY_CATALOG_ENTRY
|
||||
"</feed>\n"
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue