From 074c1bcffa7942f924b88a2ae63719181a199da7 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 21 Mar 2017 16:28:03 +0100 Subject: [PATCH] Try to generate the snippet if it is not present in the database. We generate the snippet from the content of the article in the zim so we need to have a access to the reader. --- include/xapianSearcher.h | 5 ++++- src/android/kiwix.cpp | 2 +- src/xapianSearcher.cpp | 29 ++++++++++++++++++++++++----- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index b44cb2e58..8b27eb229 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -22,6 +22,8 @@ #include #include "searcher.h" +#include "reader.h" + #include #include @@ -58,7 +60,7 @@ namespace kiwix { class XapianSearcher : public Searcher { friend class XapianResult; public: - XapianSearcher(const string &xapianDirectoryPath); + XapianSearcher(const string &xapianDirectoryPath, Reader* reader); virtual ~XapianSearcher() {}; void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, const bool verbose=false); @@ -69,6 +71,7 @@ namespace kiwix { void closeIndex(); void openIndex(const string &xapianDirectoryPath); + Reader* reader; Xapian::Database readableDatabase; Xapian::Stem stemmer; Xapian::MSet results; diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index 4d573e700..88076a9ff 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN searcher = NULL; try { if (searcher != NULL) delete searcher; - searcher = new kiwix::XapianSearcher(cPath); + searcher = new kiwix::XapianSearcher(cPath, NULL); } catch (...) { searcher = NULL; retVal = JNI_FALSE; diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index 3f051f273..4de9d4834 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -18,6 +18,7 @@ */ #include "xapianSearcher.h" +#include "xapian/myhtmlparse.h" #include #include #include @@ -41,8 +42,9 @@ std::map read_valuesmap(const std::string &s) { } /* Constructor */ - XapianSearcher::XapianSearcher(const string &xapianDirectoryPath) + XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) : Searcher(), + reader(reader), stemmer(Xapian::Stem("english")) { this->openIndex(xapianDirectoryPath); } @@ -134,14 +136,31 @@ std::map read_valuesmap(const std::string &s) { std::string XapianResult::get_snippet() { if ( searcher->valuesmap.empty() ) { - /* This is the old legacy version. Guess and try */ - return document.get_value(1); + /* This is the old legacy version. Guess and try */ + std::string stored_snippet = document.get_value(1); + if ( ! stored_snippet.empty() ) + return stored_snippet; + /* Let's continue here, and see if we can genenate one */ } else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() ) { - return document.get_value(searcher->valuesmap["snippet"]); + return document.get_value(searcher->valuesmap["snippet"]); } - return ""; + /* No reader, no snippet */ + if ( ! searcher->reader ) + return ""; + /* Get the content of the article to generate a snippet. + We parse it and use the html dump to avoid remove html tags in the + content and be able to nicely cut the text at random place. */ + MyHtmlParser htmlParser; + std::string content; + unsigned int contentLength; + std::string contentType; + searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType); + try { + htmlParser.parse_html(content, "UTF-8", true); + } catch (...) {} + return searcher->results.snippet(htmlParser.dump, 500); } int XapianResult::get_size() {