mirror of https://github.com/kiwix/libkiwix.git
commit
473b62c9b8
|
@ -46,17 +46,21 @@ class Result
|
||||||
virtual std::string get_title() = 0;
|
virtual std::string get_title() = 0;
|
||||||
virtual int get_score() = 0;
|
virtual int get_score() = 0;
|
||||||
virtual std::string get_snippet() = 0;
|
virtual std::string get_snippet() = 0;
|
||||||
|
virtual std::string get_content() = 0;
|
||||||
virtual int get_wordCount() = 0;
|
virtual int get_wordCount() = 0;
|
||||||
virtual int get_size() = 0;
|
virtual int get_size() = 0;
|
||||||
|
virtual int get_readerIndex() = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SearcherInternal;
|
struct SearcherInternal;
|
||||||
class Searcher
|
class Searcher
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
Searcher();
|
||||||
Searcher(const string& xapianDirectoryPath, Reader* reader);
|
Searcher(const string& xapianDirectoryPath, Reader* reader);
|
||||||
~Searcher();
|
~Searcher();
|
||||||
|
|
||||||
|
void add_reader(Reader* reader, const std::string& humanReaderName);
|
||||||
void search(std::string& search,
|
void search(std::string& search,
|
||||||
unsigned int resultStart,
|
unsigned int resultStart,
|
||||||
unsigned int resultEnd,
|
unsigned int resultEnd,
|
||||||
|
@ -82,7 +86,8 @@ class Searcher
|
||||||
const unsigned int resultEnd,
|
const unsigned int resultEnd,
|
||||||
const bool verbose = false);
|
const bool verbose = false);
|
||||||
|
|
||||||
Reader* reader;
|
std::vector<Reader*> readers;
|
||||||
|
std::vector<std::string> humanReaderNames;
|
||||||
SearcherInternal* internal;
|
SearcherInternal* internal;
|
||||||
std::string searchPattern;
|
std::string searchPattern;
|
||||||
std::string protocolPrefix;
|
std::string protocolPrefix;
|
||||||
|
|
|
@ -43,8 +43,10 @@ class XapianResult : public Result
|
||||||
virtual std::string get_title();
|
virtual std::string get_title();
|
||||||
virtual int get_score();
|
virtual int get_score();
|
||||||
virtual std::string get_snippet();
|
virtual std::string get_snippet();
|
||||||
|
virtual std::string get_content();
|
||||||
virtual int get_wordCount();
|
virtual int get_wordCount();
|
||||||
virtual int get_size();
|
virtual int get_size();
|
||||||
|
virtual int get_readerIndex() { return 0; };
|
||||||
|
|
||||||
private:
|
private:
|
||||||
XapianSearcher* searcher;
|
XapianSearcher* searcher;
|
||||||
|
|
|
@ -486,12 +486,19 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
|
||||||
std::string cPath = jni2c(path, env);
|
std::string cPath = jni2c(path, env);
|
||||||
|
|
||||||
pthread_mutex_lock(&searcherLock);
|
pthread_mutex_lock(&searcherLock);
|
||||||
searcher = NULL;
|
|
||||||
try {
|
try {
|
||||||
if (searcher != NULL) {
|
if (searcher != NULL) {
|
||||||
delete searcher;
|
delete searcher;
|
||||||
}
|
}
|
||||||
searcher = new kiwix::Searcher(cPath, reader);
|
if (!reader || !reader->hasFulltextIndex()) {
|
||||||
|
// Use old API (no embedded full text index).
|
||||||
|
searcher = new kiwix::Searcher(cPath, reader);
|
||||||
|
} else {
|
||||||
|
// Use the new API. We don't care about the human readable name as
|
||||||
|
// we don't use it (in android).
|
||||||
|
searcher = new kiwix::Searcher();
|
||||||
|
searcher->add_reader(reader, "");
|
||||||
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
searcher = NULL;
|
searcher = NULL;
|
||||||
retVal = JNI_FALSE;
|
retVal = JNI_FALSE;
|
||||||
|
|
|
@ -45,8 +45,10 @@ class _Result : public Result
|
||||||
virtual std::string get_title();
|
virtual std::string get_title();
|
||||||
virtual int get_score();
|
virtual int get_score();
|
||||||
virtual std::string get_snippet();
|
virtual std::string get_snippet();
|
||||||
|
virtual std::string get_content();
|
||||||
virtual int get_wordCount();
|
virtual int get_wordCount();
|
||||||
virtual int get_size();
|
virtual int get_size();
|
||||||
|
virtual int get_readerIndex();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Searcher* searcher;
|
Searcher* searcher;
|
||||||
|
@ -72,8 +74,7 @@ struct SearcherInternal {
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
|
Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
|
||||||
: reader(reader),
|
: internal(new SearcherInternal()),
|
||||||
internal(new SearcherInternal()),
|
|
||||||
searchPattern(""),
|
searchPattern(""),
|
||||||
protocolPrefix("zim://"),
|
protocolPrefix("zim://"),
|
||||||
searchProtocolPrefix("search://?"),
|
searchProtocolPrefix("search://?"),
|
||||||
|
@ -89,11 +90,32 @@ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Searcher::Searcher()
|
||||||
|
: internal(new SearcherInternal()),
|
||||||
|
searchPattern(""),
|
||||||
|
protocolPrefix("zim://"),
|
||||||
|
searchProtocolPrefix("search://?"),
|
||||||
|
resultCountPerPage(0),
|
||||||
|
estimatedResultCount(0),
|
||||||
|
resultStart(0),
|
||||||
|
resultEnd(0)
|
||||||
|
{
|
||||||
|
template_ct2 = RESOURCE::results_ct2;
|
||||||
|
loadICUExternalTables();
|
||||||
|
}
|
||||||
|
|
||||||
/* Destructor */
|
/* Destructor */
|
||||||
Searcher::~Searcher()
|
Searcher::~Searcher()
|
||||||
{
|
{
|
||||||
delete internal;
|
delete internal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
|
||||||
|
{
|
||||||
|
this->readers.push_back(reader);
|
||||||
|
this->humanReaderNames.push_back(humanReadableName);
|
||||||
|
}
|
||||||
|
|
||||||
/* Search strings in the database */
|
/* Search strings in the database */
|
||||||
void Searcher::search(std::string& search,
|
void Searcher::search(std::string& search,
|
||||||
unsigned int resultStart,
|
unsigned int resultStart,
|
||||||
|
@ -133,8 +155,15 @@ void Searcher::search(std::string& search,
|
||||||
this->estimatedResultCount
|
this->estimatedResultCount
|
||||||
= internal->_xapianSearcher->results.get_matches_estimated();
|
= internal->_xapianSearcher->results.get_matches_estimated();
|
||||||
} else {
|
} else {
|
||||||
internal->_search = this->reader->getZimFileHandler()->search(
|
std::vector<const zim::File*> zims;
|
||||||
unaccentedSearch, resultStart, resultEnd);
|
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||||
|
current++) {
|
||||||
|
zims.push_back((*current)->getZimFileHandler());
|
||||||
|
}
|
||||||
|
zim::Search* search = new zim::Search(zims);
|
||||||
|
search->set_query(unaccentedSearch);
|
||||||
|
search->set_range(resultStart, resultEnd);
|
||||||
|
internal->_search = search;
|
||||||
internal->current_iterator = internal->_search->begin();
|
internal->current_iterator = internal->_search->begin();
|
||||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||||
}
|
}
|
||||||
|
@ -190,8 +219,16 @@ void Searcher::suggestions(std::string& search, const bool verbose)
|
||||||
* We do not support that. */
|
* We do not support that. */
|
||||||
this->estimatedResultCount = 0;
|
this->estimatedResultCount = 0;
|
||||||
} else {
|
} else {
|
||||||
internal->_search = this->reader->getZimFileHandler()->suggestions(
|
std::vector<const zim::File*> zims;
|
||||||
unaccentedSearch, resultStart, resultEnd);
|
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||||
|
current++) {
|
||||||
|
zims.push_back((*current)->getZimFileHandler());
|
||||||
|
}
|
||||||
|
zim::Search* search = new zim::Search(zims);
|
||||||
|
search->set_query(unaccentedSearch);
|
||||||
|
search->set_range(resultStart, resultEnd);
|
||||||
|
search->set_suggestion_mode(true);
|
||||||
|
internal->_search = search;
|
||||||
internal->current_iterator = internal->_search->begin();
|
internal->current_iterator = internal->_search->begin();
|
||||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||||
}
|
}
|
||||||
|
@ -241,6 +278,13 @@ std::string _Result::get_snippet()
|
||||||
{
|
{
|
||||||
return iterator.get_snippet();
|
return iterator.get_snippet();
|
||||||
}
|
}
|
||||||
|
std::string _Result::get_content()
|
||||||
|
{
|
||||||
|
if (iterator->good()) {
|
||||||
|
return iterator->getData();
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
int _Result::get_size()
|
int _Result::get_size()
|
||||||
{
|
{
|
||||||
return iterator.get_size();
|
return iterator.get_size();
|
||||||
|
@ -249,6 +293,10 @@ int _Result::get_wordCount()
|
||||||
{
|
{
|
||||||
return iterator.get_wordCount();
|
return iterator.get_wordCount();
|
||||||
}
|
}
|
||||||
|
int _Result::get_readerIndex()
|
||||||
|
{
|
||||||
|
return iterator.get_fileIndex();
|
||||||
|
}
|
||||||
#ifdef ENABLE_CTPP2
|
#ifdef ENABLE_CTPP2
|
||||||
|
|
||||||
string Searcher::getHtml()
|
string Searcher::getHtml()
|
||||||
|
@ -266,6 +314,7 @@ string Searcher::getHtml()
|
||||||
result["title"] = p_result->get_title();
|
result["title"] = p_result->get_title();
|
||||||
result["url"] = p_result->get_url();
|
result["url"] = p_result->get_url();
|
||||||
result["snippet"] = p_result->get_snippet();
|
result["snippet"] = p_result->get_snippet();
|
||||||
|
result["contentId"] = humanReaderNames[p_result->get_readerIndex()];
|
||||||
|
|
||||||
if (p_result->get_size() >= 0) {
|
if (p_result->get_size() >= 0) {
|
||||||
result["size"] = kiwix::beautifyInteger(p_result->get_size());
|
result["size"] = kiwix::beautifyInteger(p_result->get_size());
|
||||||
|
|
|
@ -177,11 +177,10 @@ std::string XapianResult::get_snippet()
|
||||||
We parse it and use the html dump to avoid remove html tags in the
|
We parse it and use the html dump to avoid remove html tags in the
|
||||||
content and be able to nicely cut the text at random place. */
|
content and be able to nicely cut the text at random place. */
|
||||||
MyHtmlParser htmlParser;
|
MyHtmlParser htmlParser;
|
||||||
std::string content;
|
std::string content = get_content();
|
||||||
unsigned int contentLength;
|
if (content.empty()) {
|
||||||
std::string contentType;
|
return content;
|
||||||
searcher->reader->getContentByUrl(
|
}
|
||||||
get_url(), content, contentLength, contentType);
|
|
||||||
try {
|
try {
|
||||||
htmlParser.parse_html(content, "UTF-8", true);
|
htmlParser.parse_html(content, "UTF-8", true);
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
|
@ -189,6 +188,19 @@ std::string XapianResult::get_snippet()
|
||||||
return searcher->results.snippet(htmlParser.dump, 500);
|
return searcher->results.snippet(htmlParser.dump, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string XapianResult::get_content()
|
||||||
|
{
|
||||||
|
if (!searcher->reader) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
std::string content;
|
||||||
|
unsigned int contentLength;
|
||||||
|
std::string contentType;
|
||||||
|
searcher->reader->getContentByUrl(
|
||||||
|
get_url(), content, contentLength, contentType);
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
int XapianResult::get_size()
|
int XapianResult::get_size()
|
||||||
{
|
{
|
||||||
if (searcher->valuesmap.empty()) {
|
if (searcher->valuesmap.empty()) {
|
||||||
|
|
|
@ -92,36 +92,68 @@
|
||||||
|
|
||||||
</style>
|
</style>
|
||||||
<title>Search: <TMPL_var searchPattern></title>
|
<title>Search: <TMPL_var searchPattern></title>
|
||||||
</head>
|
</head>
|
||||||
<body bgcolor="white">
|
<body bgcolor="white">
|
||||||
<div class="header">
|
<div class="header">
|
||||||
<TMPL_if results>Results <b><TMPL_var resultStart>-<TMPL_var resultEnd></b> of <b><TMPL_var count></b> for <b><TMPL_var searchPattern></b><TMPL_else>No result were found for <b><TMPL_var searchPattern></b></TMPL_if>
|
<TMPL_if results>
|
||||||
|
Results
|
||||||
|
<b>
|
||||||
|
<TMPL_var resultStart>-<TMPL_var resultEnd>
|
||||||
|
</b> of <b>
|
||||||
|
<TMPL_var count>
|
||||||
|
</b> for <b>
|
||||||
|
<TMPL_var searchPattern>
|
||||||
|
</b>
|
||||||
|
<TMPL_else>
|
||||||
|
No result were found for <b><TMPL_var searchPattern></b>
|
||||||
|
</TMPL_if>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="results">
|
<div class="results">
|
||||||
<ul>
|
<ul>
|
||||||
<TMPL_foreach results as result>
|
<TMPL_foreach results as result>
|
||||||
<li><a href="<TMPL_var protocolPrefix><TMPL_var contentId>/<TMPL_var result.url>"><TMPL_var result.title></a>
|
<li>
|
||||||
<cite><TMPL_if result.snippet><TMPL_var result.snippet>...</TMPL_if></cite>
|
<a href="<TMPL_var protocolPrefix><TMPL_var result.contentId>/<TMPL_var result.url>">
|
||||||
<TMPL_if wordCount><div class="informations"><TMPL_var wordCount> words</div></TMPL_if>
|
<TMPL_var result.title>
|
||||||
</li>
|
</a>
|
||||||
|
<cite>
|
||||||
|
<TMPL_if result.snippet>
|
||||||
|
<TMPL_var result.snippet>...
|
||||||
|
</TMPL_if>
|
||||||
|
</cite>
|
||||||
|
<TMPL_if wordCount>
|
||||||
|
<div class="informations"><TMPL_var wordCount> words</div>
|
||||||
|
</TMPL_if>
|
||||||
|
</li>
|
||||||
</TMPL_foreach>
|
</TMPL_foreach>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
<ul>
|
<ul>
|
||||||
<TMPL_if (resultLastPageStart>0)>
|
<TMPL_if (resultLastPageStart>0)>
|
||||||
<li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=0&end=<TMPL_var resultRange>">◀</a></li>
|
<li>
|
||||||
</TMPL_if>
|
<a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=0&end=<TMPL_var resultRange>">
|
||||||
<TMPL_foreach pages as page>
|
◀
|
||||||
<li><a <TMPL_if page.selected>class="selected"</TMPL_if> href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var page.start>&end=<TMPL_var page.end>"><TMPL_var page.label></a></li>
|
</a>
|
||||||
</TMPL_foreach>
|
</li>
|
||||||
<TMPL_if (resultLastPageStart>0)>
|
</TMPL_if>
|
||||||
<li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">▶</a></li>
|
<TMPL_foreach pages as page>
|
||||||
</TMPL_if>
|
<li>
|
||||||
</ul>
|
<a <TMPL_if page.selected>class="selected"</TMPL_if>
|
||||||
</div>
|
href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var page.start>&end=<TMPL_var page.end>">
|
||||||
|
<TMPL_var page.label>
|
||||||
</body>
|
</a>
|
||||||
|
</li>
|
||||||
|
</TMPL_foreach>
|
||||||
|
<TMPL_if (resultLastPageStart>0)>
|
||||||
|
<li>
|
||||||
|
<a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">
|
||||||
|
▶
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
</TMPL_if>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
Loading…
Reference in New Issue