diff --git a/include/searcher.h b/include/searcher.h index e5549778f..af28e521f 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -46,6 +46,7 @@ class Result virtual std::string get_title() = 0; virtual int get_score() = 0; virtual std::string get_snippet() = 0; + virtual std::string get_content() = 0; virtual int get_wordCount() = 0; virtual int get_size() = 0; }; diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index 907ca733e..dcbe5647a 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -43,6 +43,7 @@ class XapianResult : public Result virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); + virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); diff --git a/src/searcher.cpp b/src/searcher.cpp index 4bfeab355..d83adc7fb 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -45,6 +45,7 @@ class _Result : public Result virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); + virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); @@ -241,6 +242,13 @@ std::string _Result::get_snippet() { return iterator.get_snippet(); } +std::string _Result::get_content() +{ + if (iterator->good()) { + return iterator->getData(); + } + return ""; +} int _Result::get_size() { return iterator.get_size(); diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index aa0223d99..206888155 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -177,11 +177,10 @@ std::string XapianResult::get_snippet() We parse it and use the html dump to avoid remove html tags in the content and be able to nicely cut the text at random place. */ MyHtmlParser htmlParser; - std::string content; - unsigned int contentLength; - std::string contentType; - searcher->reader->getContentByUrl( - get_url(), content, contentLength, contentType); + std::string content = get_content(); + if (content.empty()) { + return content; + } try { htmlParser.parse_html(content, "UTF-8", true); } catch (...) { @@ -189,6 +188,19 @@ std::string XapianResult::get_snippet() return searcher->results.snippet(htmlParser.dump, 500); } +std::string XapianResult::get_content() +{ + if (!searcher->reader) { + return ""; + } + std::string content; + unsigned int contentLength; + std::string contentType; + searcher->reader->getContentByUrl( + get_url(), content, contentLength, contentType); + return content; +} + int XapianResult::get_size() { if (searcher->valuesmap.empty()) {