mirror of https://github.com/kiwix/libkiwix.git
commit
473b62c9b8
|
@ -46,17 +46,21 @@ class Result
|
|||
virtual std::string get_title() = 0;
|
||||
virtual int get_score() = 0;
|
||||
virtual std::string get_snippet() = 0;
|
||||
virtual std::string get_content() = 0;
|
||||
virtual int get_wordCount() = 0;
|
||||
virtual int get_size() = 0;
|
||||
virtual int get_readerIndex() = 0;
|
||||
};
|
||||
|
||||
struct SearcherInternal;
|
||||
class Searcher
|
||||
{
|
||||
public:
|
||||
Searcher();
|
||||
Searcher(const string& xapianDirectoryPath, Reader* reader);
|
||||
~Searcher();
|
||||
|
||||
void add_reader(Reader* reader, const std::string& humanReaderName);
|
||||
void search(std::string& search,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
|
@ -82,7 +86,8 @@ class Searcher
|
|||
const unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
|
||||
Reader* reader;
|
||||
std::vector<Reader*> readers;
|
||||
std::vector<std::string> humanReaderNames;
|
||||
SearcherInternal* internal;
|
||||
std::string searchPattern;
|
||||
std::string protocolPrefix;
|
||||
|
|
|
@ -43,8 +43,10 @@ class XapianResult : public Result
|
|||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual std::string get_content();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
virtual int get_readerIndex() { return 0; };
|
||||
|
||||
private:
|
||||
XapianSearcher* searcher;
|
||||
|
|
|
@ -486,12 +486,19 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
|
|||
std::string cPath = jni2c(path, env);
|
||||
|
||||
pthread_mutex_lock(&searcherLock);
|
||||
searcher = NULL;
|
||||
try {
|
||||
if (searcher != NULL) {
|
||||
delete searcher;
|
||||
}
|
||||
searcher = new kiwix::Searcher(cPath, reader);
|
||||
if (!reader || !reader->hasFulltextIndex()) {
|
||||
// Use old API (no embedded full text index).
|
||||
searcher = new kiwix::Searcher(cPath, reader);
|
||||
} else {
|
||||
// Use the new API. We don't care about the human readable name as
|
||||
// we don't use it (in android).
|
||||
searcher = new kiwix::Searcher();
|
||||
searcher->add_reader(reader, "");
|
||||
}
|
||||
} catch (...) {
|
||||
searcher = NULL;
|
||||
retVal = JNI_FALSE;
|
||||
|
|
|
@ -45,8 +45,10 @@ class _Result : public Result
|
|||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual std::string get_content();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
virtual int get_readerIndex();
|
||||
|
||||
private:
|
||||
Searcher* searcher;
|
||||
|
@ -72,8 +74,7 @@ struct SearcherInternal {
|
|||
|
||||
/* Constructor */
|
||||
Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
|
||||
: reader(reader),
|
||||
internal(new SearcherInternal()),
|
||||
: internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
|
@ -89,11 +90,32 @@ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
|
|||
}
|
||||
}
|
||||
|
||||
Searcher::Searcher()
|
||||
: internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
resultCountPerPage(0),
|
||||
estimatedResultCount(0),
|
||||
resultStart(0),
|
||||
resultEnd(0)
|
||||
{
|
||||
template_ct2 = RESOURCE::results_ct2;
|
||||
loadICUExternalTables();
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Searcher::~Searcher()
|
||||
{
|
||||
delete internal;
|
||||
}
|
||||
|
||||
void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
|
||||
{
|
||||
this->readers.push_back(reader);
|
||||
this->humanReaderNames.push_back(humanReadableName);
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void Searcher::search(std::string& search,
|
||||
unsigned int resultStart,
|
||||
|
@ -133,8 +155,15 @@ void Searcher::search(std::string& search,
|
|||
this->estimatedResultCount
|
||||
= internal->_xapianSearcher->results.get_matches_estimated();
|
||||
} else {
|
||||
internal->_search = this->reader->getZimFileHandler()->search(
|
||||
unaccentedSearch, resultStart, resultEnd);
|
||||
std::vector<const zim::File*> zims;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
search->set_query(unaccentedSearch);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
internal->_search = search;
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
|
@ -190,8 +219,16 @@ void Searcher::suggestions(std::string& search, const bool verbose)
|
|||
* We do not support that. */
|
||||
this->estimatedResultCount = 0;
|
||||
} else {
|
||||
internal->_search = this->reader->getZimFileHandler()->suggestions(
|
||||
unaccentedSearch, resultStart, resultEnd);
|
||||
std::vector<const zim::File*> zims;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
search->set_query(unaccentedSearch);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
search->set_suggestion_mode(true);
|
||||
internal->_search = search;
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
|
@ -241,6 +278,13 @@ std::string _Result::get_snippet()
|
|||
{
|
||||
return iterator.get_snippet();
|
||||
}
|
||||
std::string _Result::get_content()
|
||||
{
|
||||
if (iterator->good()) {
|
||||
return iterator->getData();
|
||||
}
|
||||
return "";
|
||||
}
|
||||
int _Result::get_size()
|
||||
{
|
||||
return iterator.get_size();
|
||||
|
@ -249,6 +293,10 @@ int _Result::get_wordCount()
|
|||
{
|
||||
return iterator.get_wordCount();
|
||||
}
|
||||
int _Result::get_readerIndex()
|
||||
{
|
||||
return iterator.get_fileIndex();
|
||||
}
|
||||
#ifdef ENABLE_CTPP2
|
||||
|
||||
string Searcher::getHtml()
|
||||
|
@ -266,6 +314,7 @@ string Searcher::getHtml()
|
|||
result["title"] = p_result->get_title();
|
||||
result["url"] = p_result->get_url();
|
||||
result["snippet"] = p_result->get_snippet();
|
||||
result["contentId"] = humanReaderNames[p_result->get_readerIndex()];
|
||||
|
||||
if (p_result->get_size() >= 0) {
|
||||
result["size"] = kiwix::beautifyInteger(p_result->get_size());
|
||||
|
|
|
@ -177,11 +177,10 @@ std::string XapianResult::get_snippet()
|
|||
We parse it and use the html dump to avoid remove html tags in the
|
||||
content and be able to nicely cut the text at random place. */
|
||||
MyHtmlParser htmlParser;
|
||||
std::string content;
|
||||
unsigned int contentLength;
|
||||
std::string contentType;
|
||||
searcher->reader->getContentByUrl(
|
||||
get_url(), content, contentLength, contentType);
|
||||
std::string content = get_content();
|
||||
if (content.empty()) {
|
||||
return content;
|
||||
}
|
||||
try {
|
||||
htmlParser.parse_html(content, "UTF-8", true);
|
||||
} catch (...) {
|
||||
|
@ -189,6 +188,19 @@ std::string XapianResult::get_snippet()
|
|||
return searcher->results.snippet(htmlParser.dump, 500);
|
||||
}
|
||||
|
||||
std::string XapianResult::get_content()
|
||||
{
|
||||
if (!searcher->reader) {
|
||||
return "";
|
||||
}
|
||||
std::string content;
|
||||
unsigned int contentLength;
|
||||
std::string contentType;
|
||||
searcher->reader->getContentByUrl(
|
||||
get_url(), content, contentLength, contentType);
|
||||
return content;
|
||||
}
|
||||
|
||||
int XapianResult::get_size()
|
||||
{
|
||||
if (searcher->valuesmap.empty()) {
|
||||
|
|
|
@ -92,36 +92,68 @@
|
|||
|
||||
</style>
|
||||
<title>Search: <TMPL_var searchPattern></title>
|
||||
</head>
|
||||
<body bgcolor="white">
|
||||
</head>
|
||||
<body bgcolor="white">
|
||||
<div class="header">
|
||||
<TMPL_if results>Results <b><TMPL_var resultStart>-<TMPL_var resultEnd></b> of <b><TMPL_var count></b> for <b><TMPL_var searchPattern></b><TMPL_else>No result were found for <b><TMPL_var searchPattern></b></TMPL_if>
|
||||
<TMPL_if results>
|
||||
Results
|
||||
<b>
|
||||
<TMPL_var resultStart>-<TMPL_var resultEnd>
|
||||
</b> of <b>
|
||||
<TMPL_var count>
|
||||
</b> for <b>
|
||||
<TMPL_var searchPattern>
|
||||
</b>
|
||||
<TMPL_else>
|
||||
No result were found for <b><TMPL_var searchPattern></b>
|
||||
</TMPL_if>
|
||||
</div>
|
||||
|
||||
<div class="results">
|
||||
<ul>
|
||||
<ul>
|
||||
<TMPL_foreach results as result>
|
||||
<li><a href="<TMPL_var protocolPrefix><TMPL_var contentId>/<TMPL_var result.url>"><TMPL_var result.title></a>
|
||||
<cite><TMPL_if result.snippet><TMPL_var result.snippet>...</TMPL_if></cite>
|
||||
<TMPL_if wordCount><div class="informations"><TMPL_var wordCount> words</div></TMPL_if>
|
||||
</li>
|
||||
<li>
|
||||
<a href="<TMPL_var protocolPrefix><TMPL_var result.contentId>/<TMPL_var result.url>">
|
||||
<TMPL_var result.title>
|
||||
</a>
|
||||
<cite>
|
||||
<TMPL_if result.snippet>
|
||||
<TMPL_var result.snippet>...
|
||||
</TMPL_if>
|
||||
</cite>
|
||||
<TMPL_if wordCount>
|
||||
<div class="informations"><TMPL_var wordCount> words</div>
|
||||
</TMPL_if>
|
||||
</li>
|
||||
</TMPL_foreach>
|
||||
</ul>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="footer">
|
||||
<ul>
|
||||
<TMPL_if (resultLastPageStart>0)>
|
||||
<li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=0&end=<TMPL_var resultRange>">◀</a></li>
|
||||
</TMPL_if>
|
||||
<TMPL_foreach pages as page>
|
||||
<li><a <TMPL_if page.selected>class="selected"</TMPL_if> href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var page.start>&end=<TMPL_var page.end>"><TMPL_var page.label></a></li>
|
||||
</TMPL_foreach>
|
||||
<TMPL_if (resultLastPageStart>0)>
|
||||
<li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">▶</a></li>
|
||||
</TMPL_if>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
<ul>
|
||||
<TMPL_if (resultLastPageStart>0)>
|
||||
<li>
|
||||
<a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=0&end=<TMPL_var resultRange>">
|
||||
◀
|
||||
</a>
|
||||
</li>
|
||||
</TMPL_if>
|
||||
<TMPL_foreach pages as page>
|
||||
<li>
|
||||
<a <TMPL_if page.selected>class="selected"</TMPL_if>
|
||||
href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var page.start>&end=<TMPL_var page.end>">
|
||||
<TMPL_var page.label>
|
||||
</a>
|
||||
</li>
|
||||
</TMPL_foreach>
|
||||
<TMPL_if (resultLastPageStart>0)>
|
||||
<li>
|
||||
<a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">
|
||||
▶
|
||||
</a>
|
||||
</li>
|
||||
</TMPL_if>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
Loading…
Reference in New Issue