Search result objects now have a get_content method.

This was not necessary when searching in only one zim file as `url` was
enough to get the article (and so the content).

If we want to search in several zim in the same time, we need a way to get
the content directly.
This commit is contained in:
Matthieu Gautier 2017-07-05 11:28:08 +02:00
parent 4a51dd9e00
commit 8d39b0b343
4 changed files with 27 additions and 5 deletions

View File

@ -46,6 +46,7 @@ class Result
virtual std::string get_title() = 0; virtual std::string get_title() = 0;
virtual int get_score() = 0; virtual int get_score() = 0;
virtual std::string get_snippet() = 0; virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0; virtual int get_wordCount() = 0;
virtual int get_size() = 0; virtual int get_size() = 0;
}; };

View File

@ -43,6 +43,7 @@ class XapianResult : public Result
virtual std::string get_title(); virtual std::string get_title();
virtual int get_score(); virtual int get_score();
virtual std::string get_snippet(); virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount(); virtual int get_wordCount();
virtual int get_size(); virtual int get_size();

View File

@ -45,6 +45,7 @@ class _Result : public Result
virtual std::string get_title(); virtual std::string get_title();
virtual int get_score(); virtual int get_score();
virtual std::string get_snippet(); virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount(); virtual int get_wordCount();
virtual int get_size(); virtual int get_size();
@ -241,6 +242,13 @@ std::string _Result::get_snippet()
{ {
return iterator.get_snippet(); return iterator.get_snippet();
} }
std::string _Result::get_content()
{
if (iterator->good()) {
return iterator->getData();
}
return "";
}
int _Result::get_size() int _Result::get_size()
{ {
return iterator.get_size(); return iterator.get_size();

View File

@ -177,11 +177,10 @@ std::string XapianResult::get_snippet()
We parse it and use the html dump to avoid remove html tags in the We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */ content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser; MyHtmlParser htmlParser;
std::string content; std::string content = get_content();
unsigned int contentLength; if (content.empty()) {
std::string contentType; return content;
searcher->reader->getContentByUrl( }
get_url(), content, contentLength, contentType);
try { try {
htmlParser.parse_html(content, "UTF-8", true); htmlParser.parse_html(content, "UTF-8", true);
} catch (...) { } catch (...) {
@ -189,6 +188,19 @@ std::string XapianResult::get_snippet()
return searcher->results.snippet(htmlParser.dump, 500); return searcher->results.snippet(htmlParser.dump, 500);
} }
std::string XapianResult::get_content()
{
if (!searcher->reader) {
return "";
}
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
return content;
}
int XapianResult::get_size() int XapianResult::get_size()
{ {
if (searcher->valuesmap.empty()) { if (searcher->valuesmap.empty()) {