Try to generate the snippet if it is not present in the database.

We generate the snippet from the content of the article in the zim so
we need to have a access to the reader.
This commit is contained in:
Matthieu Gautier 2017-03-21 16:28:03 +01:00
parent 9be2abedf3
commit 074c1bcffa
3 changed files with 29 additions and 7 deletions

View File

@ -22,6 +22,8 @@
#include <xapian.h>
#include "searcher.h"
#include "reader.h"
#include <map>
#include <string>
@ -58,7 +60,7 @@ namespace kiwix {
class XapianSearcher : public Searcher {
friend class XapianResult;
public:
XapianSearcher(const string &xapianDirectoryPath);
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
virtual ~XapianSearcher() {};
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
const bool verbose=false);
@ -69,6 +71,7 @@ namespace kiwix {
void closeIndex();
void openIndex(const string &xapianDirectoryPath);
Reader* reader;
Xapian::Database readableDatabase;
Xapian::Stem stemmer;
Xapian::MSet results;

View File

@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
searcher = NULL;
try {
if (searcher != NULL) delete searcher;
searcher = new kiwix::XapianSearcher(cPath);
searcher = new kiwix::XapianSearcher(cPath, NULL);
} catch (...) {
searcher = NULL;
retVal = JNI_FALSE;

View File

@ -18,6 +18,7 @@
*/
#include "xapianSearcher.h"
#include "xapian/myhtmlparse.h"
#include <zim/zim.h>
#include <zim/file.h>
#include <zim/article.h>
@ -41,8 +42,9 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
}
/* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath)
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
: Searcher(),
reader(reader),
stemmer(Xapian::Stem("english")) {
this->openIndex(xapianDirectoryPath);
}
@ -134,14 +136,31 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
std::string XapianResult::get_snippet() {
if ( searcher->valuesmap.empty() )
{
/* This is the old legacy version. Guess and try */
return document.get_value(1);
/* This is the old legacy version. Guess and try */
std::string stored_snippet = document.get_value(1);
if ( ! stored_snippet.empty() )
return stored_snippet;
/* Let's continue here, and see if we can genenate one */
}
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() )
{
return document.get_value(searcher->valuesmap["snippet"]);
return document.get_value(searcher->valuesmap["snippet"]);
}
return "";
/* No reader, no snippet */
if ( ! searcher->reader )
return "";
/* Get the content of the article to generate a snippet.
We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser;
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType);
try {
htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {}
return searcher->results.snippet(htmlParser.dump, 500);
}
int XapianResult::get_size() {