Search result objects now have a get_content method.

This was not necessary when searching in only one zim file as `url` was
enough to get the article (and so the content).

If we want to search in several zim in the same time, we need a way to get
the content directly.
This commit is contained in:
Matthieu Gautier 2017-07-05 11:28:08 +02:00
parent 4a51dd9e00
commit 8d39b0b343
4 changed files with 27 additions and 5 deletions

View File

@ -46,6 +46,7 @@ class Result
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
};

View File

@ -43,6 +43,7 @@ class XapianResult : public Result
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();

View File

@ -45,6 +45,7 @@ class _Result : public Result
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
@ -241,6 +242,13 @@ std::string _Result::get_snippet()
{
return iterator.get_snippet();
}
std::string _Result::get_content()
{
if (iterator->good()) {
return iterator->getData();
}
return "";
}
int _Result::get_size()
{
return iterator.get_size();

View File

@ -177,11 +177,10 @@ std::string XapianResult::get_snippet()
We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser;
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
std::string content = get_content();
if (content.empty()) {
return content;
}
try {
htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {
@ -189,6 +188,19 @@ std::string XapianResult::get_snippet()
return searcher->results.snippet(htmlParser.dump, 500);
}
std::string XapianResult::get_content()
{
if (!searcher->reader) {
return "";
}
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
return content;
}
int XapianResult::get_size()
{
if (searcher->valuesmap.empty()) {