Merge pull request #66 from kiwix/multisearch

Multisearch
This commit is contained in:
Matthieu Gautier 2017-07-18 16:07:46 +02:00 committed by GitHub
commit 473b62c9b8
6 changed files with 144 additions and 37 deletions

View File

@ -46,17 +46,21 @@ class Result
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
virtual int get_readerIndex() = 0;
};
struct SearcherInternal;
class Searcher
{
public:
Searcher();
Searcher(const string& xapianDirectoryPath, Reader* reader);
~Searcher();
void add_reader(Reader* reader, const std::string& humanReaderName);
void search(std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
@ -82,7 +86,8 @@ class Searcher
const unsigned int resultEnd,
const bool verbose = false);
Reader* reader;
std::vector<Reader*> readers;
std::vector<std::string> humanReaderNames;
SearcherInternal* internal;
std::string searchPattern;
std::string protocolPrefix;

View File

@ -43,8 +43,10 @@ class XapianResult : public Result
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
virtual int get_readerIndex() { return 0; };
private:
XapianSearcher* searcher;

View File

@ -486,12 +486,19 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
std::string cPath = jni2c(path, env);
pthread_mutex_lock(&searcherLock);
searcher = NULL;
try {
if (searcher != NULL) {
delete searcher;
}
searcher = new kiwix::Searcher(cPath, reader);
if (!reader || !reader->hasFulltextIndex()) {
// Use old API (no embedded full text index).
searcher = new kiwix::Searcher(cPath, reader);
} else {
// Use the new API. We don't care about the human readable name as
// we don't use it (in android).
searcher = new kiwix::Searcher();
searcher->add_reader(reader, "");
}
} catch (...) {
searcher = NULL;
retVal = JNI_FALSE;

View File

@ -45,8 +45,10 @@ class _Result : public Result
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
virtual int get_readerIndex();
private:
Searcher* searcher;
@ -72,8 +74,7 @@ struct SearcherInternal {
/* Constructor */
Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
: reader(reader),
internal(new SearcherInternal()),
: internal(new SearcherInternal()),
searchPattern(""),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
@ -89,11 +90,32 @@ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
}
}
Searcher::Searcher()
: internal(new SearcherInternal()),
searchPattern(""),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
resultCountPerPage(0),
estimatedResultCount(0),
resultStart(0),
resultEnd(0)
{
template_ct2 = RESOURCE::results_ct2;
loadICUExternalTables();
}
/* Destructor */
Searcher::~Searcher()
{
delete internal;
}
void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
{
this->readers.push_back(reader);
this->humanReaderNames.push_back(humanReadableName);
}
/* Search strings in the database */
void Searcher::search(std::string& search,
unsigned int resultStart,
@ -133,8 +155,15 @@ void Searcher::search(std::string& search,
this->estimatedResultCount
= internal->_xapianSearcher->results.get_matches_estimated();
} else {
internal->_search = this->reader->getZimFileHandler()->search(
unaccentedSearch, resultStart, resultEnd);
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_query(unaccentedSearch);
search->set_range(resultStart, resultEnd);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
@ -190,8 +219,16 @@ void Searcher::suggestions(std::string& search, const bool verbose)
* We do not support that. */
this->estimatedResultCount = 0;
} else {
internal->_search = this->reader->getZimFileHandler()->suggestions(
unaccentedSearch, resultStart, resultEnd);
std::vector<const zim::File*> zims;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
zims.push_back((*current)->getZimFileHandler());
}
zim::Search* search = new zim::Search(zims);
search->set_query(unaccentedSearch);
search->set_range(resultStart, resultEnd);
search->set_suggestion_mode(true);
internal->_search = search;
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
@ -241,6 +278,13 @@ std::string _Result::get_snippet()
{
return iterator.get_snippet();
}
std::string _Result::get_content()
{
if (iterator->good()) {
return iterator->getData();
}
return "";
}
int _Result::get_size()
{
return iterator.get_size();
@ -249,6 +293,10 @@ int _Result::get_wordCount()
{
return iterator.get_wordCount();
}
int _Result::get_readerIndex()
{
return iterator.get_fileIndex();
}
#ifdef ENABLE_CTPP2
string Searcher::getHtml()
@ -266,6 +314,7 @@ string Searcher::getHtml()
result["title"] = p_result->get_title();
result["url"] = p_result->get_url();
result["snippet"] = p_result->get_snippet();
result["contentId"] = humanReaderNames[p_result->get_readerIndex()];
if (p_result->get_size() >= 0) {
result["size"] = kiwix::beautifyInteger(p_result->get_size());

View File

@ -177,11 +177,10 @@ std::string XapianResult::get_snippet()
We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser;
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
std::string content = get_content();
if (content.empty()) {
return content;
}
try {
htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {
@ -189,6 +188,19 @@ std::string XapianResult::get_snippet()
return searcher->results.snippet(htmlParser.dump, 500);
}
std::string XapianResult::get_content()
{
if (!searcher->reader) {
return "";
}
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
return content;
}
int XapianResult::get_size()
{
if (searcher->valuesmap.empty()) {

View File

@ -92,36 +92,68 @@
</style>
<title>Search: <TMPL_var searchPattern></title>
</head>
<body bgcolor="white">
</head>
<body bgcolor="white">
<div class="header">
<TMPL_if results>Results <b><TMPL_var resultStart>-<TMPL_var resultEnd></b> of <b><TMPL_var count></b> for <b><TMPL_var searchPattern></b><TMPL_else>No result were found for <b><TMPL_var searchPattern></b></TMPL_if>
<TMPL_if results>
Results
<b>
<TMPL_var resultStart>-<TMPL_var resultEnd>
</b> of <b>
<TMPL_var count>
</b> for <b>
<TMPL_var searchPattern>
</b>
<TMPL_else>
No result were found for <b><TMPL_var searchPattern></b>
</TMPL_if>
</div>
<div class="results">
<ul>
<ul>
<TMPL_foreach results as result>
<li><a href="<TMPL_var protocolPrefix><TMPL_var contentId>/<TMPL_var result.url>"><TMPL_var result.title></a>
<cite><TMPL_if result.snippet><TMPL_var result.snippet>...</TMPL_if></cite>
<TMPL_if wordCount><div class="informations"><TMPL_var wordCount> words</div></TMPL_if>
</li>
<li>
<a href="<TMPL_var protocolPrefix><TMPL_var result.contentId>/<TMPL_var result.url>">
<TMPL_var result.title>
</a>
<cite>
<TMPL_if result.snippet>
<TMPL_var result.snippet>...
</TMPL_if>
</cite>
<TMPL_if wordCount>
<div class="informations"><TMPL_var wordCount> words</div>
</TMPL_if>
</li>
</TMPL_foreach>
</ul>
</ul>
</div>
<div class="footer">
<ul>
<TMPL_if (resultLastPageStart>0)>
<li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=0&end=<TMPL_var resultRange>">◀</a></li>
</TMPL_if>
<TMPL_foreach pages as page>
<li><a <TMPL_if page.selected>class="selected"</TMPL_if> href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var page.start>&end=<TMPL_var page.end>"><TMPL_var page.label></a></li>
</TMPL_foreach>
<TMPL_if (resultLastPageStart>0)>
<li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">▶</a></li>
</TMPL_if>
</ul>
</div>
</body>
<ul>
<TMPL_if (resultLastPageStart>0)>
<li>
<a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=0&end=<TMPL_var resultRange>">
</a>
</li>
</TMPL_if>
<TMPL_foreach pages as page>
<li>
<a <TMPL_if page.selected>class="selected"</TMPL_if>
href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var page.start>&end=<TMPL_var page.end>">
<TMPL_var page.label>
</a>
</li>
</TMPL_foreach>
<TMPL_if (resultLastPageStart>0)>
<li>
<a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">
</a>
</li>
</TMPL_if>
</ul>
</div>
</body>
</html>