Try to generate the snippet if it is not present in the database.

We generate the snippet from the content of the article in the zim so
we need to have a access to the reader.
This commit is contained in:
Matthieu Gautier 2017-03-21 16:28:03 +01:00
parent 9be2abedf3
commit 074c1bcffa
3 changed files with 29 additions and 7 deletions

View File

@ -22,6 +22,8 @@
#include <xapian.h> #include <xapian.h>
#include "searcher.h" #include "searcher.h"
#include "reader.h"
#include <map> #include <map>
#include <string> #include <string>
@ -58,7 +60,7 @@ namespace kiwix {
class XapianSearcher : public Searcher { class XapianSearcher : public Searcher {
friend class XapianResult; friend class XapianResult;
public: public:
XapianSearcher(const string &xapianDirectoryPath); XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
virtual ~XapianSearcher() {}; virtual ~XapianSearcher() {};
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
const bool verbose=false); const bool verbose=false);
@ -69,6 +71,7 @@ namespace kiwix {
void closeIndex(); void closeIndex();
void openIndex(const string &xapianDirectoryPath); void openIndex(const string &xapianDirectoryPath);
Reader* reader;
Xapian::Database readableDatabase; Xapian::Database readableDatabase;
Xapian::Stem stemmer; Xapian::Stem stemmer;
Xapian::MSet results; Xapian::MSet results;

View File

@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
searcher = NULL; searcher = NULL;
try { try {
if (searcher != NULL) delete searcher; if (searcher != NULL) delete searcher;
searcher = new kiwix::XapianSearcher(cPath); searcher = new kiwix::XapianSearcher(cPath, NULL);
} catch (...) { } catch (...) {
searcher = NULL; searcher = NULL;
retVal = JNI_FALSE; retVal = JNI_FALSE;

View File

@ -18,6 +18,7 @@
*/ */
#include "xapianSearcher.h" #include "xapianSearcher.h"
#include "xapian/myhtmlparse.h"
#include <zim/zim.h> #include <zim/zim.h>
#include <zim/file.h> #include <zim/file.h>
#include <zim/article.h> #include <zim/article.h>
@ -41,8 +42,9 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
} }
/* Constructor */ /* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath) XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
: Searcher(), : Searcher(),
reader(reader),
stemmer(Xapian::Stem("english")) { stemmer(Xapian::Stem("english")) {
this->openIndex(xapianDirectoryPath); this->openIndex(xapianDirectoryPath);
} }
@ -134,14 +136,31 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
std::string XapianResult::get_snippet() { std::string XapianResult::get_snippet() {
if ( searcher->valuesmap.empty() ) if ( searcher->valuesmap.empty() )
{ {
/* This is the old legacy version. Guess and try */ /* This is the old legacy version. Guess and try */
return document.get_value(1); std::string stored_snippet = document.get_value(1);
if ( ! stored_snippet.empty() )
return stored_snippet;
/* Let's continue here, and see if we can genenate one */
} }
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() ) else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() )
{ {
return document.get_value(searcher->valuesmap["snippet"]); return document.get_value(searcher->valuesmap["snippet"]);
} }
return ""; /* No reader, no snippet */
if ( ! searcher->reader )
return "";
/* Get the content of the article to generate a snippet.
We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser;
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType);
try {
htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {}
return searcher->results.snippet(htmlParser.dump, 500);
} }
int XapianResult::get_size() { int XapianResult::get_size() {