mirror of https://github.com/kiwix/libkiwix.git
Try to generate the snippet if it is not present in the database.
We generate the snippet from the content of the article in the zim so we need to have a access to the reader.
This commit is contained in:
parent
9be2abedf3
commit
074c1bcffa
|
@ -22,6 +22,8 @@
|
||||||
|
|
||||||
#include <xapian.h>
|
#include <xapian.h>
|
||||||
#include "searcher.h"
|
#include "searcher.h"
|
||||||
|
#include "reader.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -58,7 +60,7 @@ namespace kiwix {
|
||||||
class XapianSearcher : public Searcher {
|
class XapianSearcher : public Searcher {
|
||||||
friend class XapianResult;
|
friend class XapianResult;
|
||||||
public:
|
public:
|
||||||
XapianSearcher(const string &xapianDirectoryPath);
|
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
|
||||||
virtual ~XapianSearcher() {};
|
virtual ~XapianSearcher() {};
|
||||||
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
|
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
|
||||||
const bool verbose=false);
|
const bool verbose=false);
|
||||||
|
@ -69,6 +71,7 @@ namespace kiwix {
|
||||||
void closeIndex();
|
void closeIndex();
|
||||||
void openIndex(const string &xapianDirectoryPath);
|
void openIndex(const string &xapianDirectoryPath);
|
||||||
|
|
||||||
|
Reader* reader;
|
||||||
Xapian::Database readableDatabase;
|
Xapian::Database readableDatabase;
|
||||||
Xapian::Stem stemmer;
|
Xapian::Stem stemmer;
|
||||||
Xapian::MSet results;
|
Xapian::MSet results;
|
||||||
|
|
|
@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
|
||||||
searcher = NULL;
|
searcher = NULL;
|
||||||
try {
|
try {
|
||||||
if (searcher != NULL) delete searcher;
|
if (searcher != NULL) delete searcher;
|
||||||
searcher = new kiwix::XapianSearcher(cPath);
|
searcher = new kiwix::XapianSearcher(cPath, NULL);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
searcher = NULL;
|
searcher = NULL;
|
||||||
retVal = JNI_FALSE;
|
retVal = JNI_FALSE;
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "xapianSearcher.h"
|
#include "xapianSearcher.h"
|
||||||
|
#include "xapian/myhtmlparse.h"
|
||||||
#include <zim/zim.h>
|
#include <zim/zim.h>
|
||||||
#include <zim/file.h>
|
#include <zim/file.h>
|
||||||
#include <zim/article.h>
|
#include <zim/article.h>
|
||||||
|
@ -41,8 +42,9 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath)
|
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
|
||||||
: Searcher(),
|
: Searcher(),
|
||||||
|
reader(reader),
|
||||||
stemmer(Xapian::Stem("english")) {
|
stemmer(Xapian::Stem("english")) {
|
||||||
this->openIndex(xapianDirectoryPath);
|
this->openIndex(xapianDirectoryPath);
|
||||||
}
|
}
|
||||||
|
@ -134,14 +136,31 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
|
||||||
std::string XapianResult::get_snippet() {
|
std::string XapianResult::get_snippet() {
|
||||||
if ( searcher->valuesmap.empty() )
|
if ( searcher->valuesmap.empty() )
|
||||||
{
|
{
|
||||||
/* This is the old legacy version. Guess and try */
|
/* This is the old legacy version. Guess and try */
|
||||||
return document.get_value(1);
|
std::string stored_snippet = document.get_value(1);
|
||||||
|
if ( ! stored_snippet.empty() )
|
||||||
|
return stored_snippet;
|
||||||
|
/* Let's continue here, and see if we can genenate one */
|
||||||
}
|
}
|
||||||
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() )
|
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() )
|
||||||
{
|
{
|
||||||
return document.get_value(searcher->valuesmap["snippet"]);
|
return document.get_value(searcher->valuesmap["snippet"]);
|
||||||
}
|
}
|
||||||
return "";
|
/* No reader, no snippet */
|
||||||
|
if ( ! searcher->reader )
|
||||||
|
return "";
|
||||||
|
/* Get the content of the article to generate a snippet.
|
||||||
|
We parse it and use the html dump to avoid remove html tags in the
|
||||||
|
content and be able to nicely cut the text at random place. */
|
||||||
|
MyHtmlParser htmlParser;
|
||||||
|
std::string content;
|
||||||
|
unsigned int contentLength;
|
||||||
|
std::string contentType;
|
||||||
|
searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType);
|
||||||
|
try {
|
||||||
|
htmlParser.parse_html(content, "UTF-8", true);
|
||||||
|
} catch (...) {}
|
||||||
|
return searcher->results.snippet(htmlParser.dump, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
int XapianResult::get_size() {
|
int XapianResult::get_size() {
|
||||||
|
|
Loading…
Reference in New Issue