Do not create all the results at once. Be a bit lazy.

We don't need to generate a vector of result when we do a search.
We better to just keep the handle to the current MSetIterator and
generate the wanted values when needed.
This commit is contained in:
Matthieu Gautier 2017-03-21 16:20:17 +01:00
parent 72a6b578e6
commit 83d27255cf
5 changed files with 99 additions and 81 deletions

View File

@ -35,14 +35,16 @@
using namespace std; using namespace std;
struct Result class Result
{ {
string url; public:
string title; virtual ~Result() {};
int score; virtual std::string get_url() = 0;
string snippet; virtual std::string get_title() = 0;
int wordCount; virtual int get_score() = 0;
int size; virtual std::string get_snippet() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
}; };
namespace kiwix { namespace kiwix {
@ -55,7 +57,8 @@ namespace kiwix {
void search(std::string &search, unsigned int resultStart, void search(std::string &search, unsigned int resultStart,
unsigned int resultEnd, const bool verbose=false); unsigned int resultEnd, const bool verbose=false);
bool getNextResult(string &url, string &title, unsigned int &score); virtual Result* getNextResult() = 0;
virtual void restart_search() = 0;
unsigned int getEstimatedResultCount(); unsigned int getEstimatedResultCount();
bool setProtocolPrefix(const std::string prefix); bool setProtocolPrefix(const std::string prefix);
bool setSearchProtocolPrefix(const std::string prefix); bool setSearchProtocolPrefix(const std::string prefix);
@ -72,8 +75,6 @@ namespace kiwix {
virtual void searchInIndex(string &search, const unsigned int resultStart, virtual void searchInIndex(string &search, const unsigned int resultStart,
const unsigned int resultEnd, const bool verbose=false) = 0; const unsigned int resultEnd, const bool verbose=false) = 0;
std::vector<Result> results;
std::vector<Result>::iterator resultOffset;
std::string searchPattern; std::string searchPattern;
std::string protocolPrefix; std::string protocolPrefix;
std::string searchProtocolPrefix; std::string searchProtocolPrefix;

View File

@ -27,6 +27,23 @@ using namespace std;
namespace kiwix { namespace kiwix {
class XapianResult : public Result {
public:
XapianResult(Xapian::MSetIterator& iterator);
virtual ~XapianResult() {};
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual int get_wordCount();
virtual int get_size();
private:
Xapian::MSetIterator iterator;
Xapian::Document document;
};
class NoXapianIndexInZim: public exception { class NoXapianIndexInZim: public exception {
virtual const char* what() const throw() { virtual const char* what() const throw() {
return "There is no fulltext index in the zim file"; return "There is no fulltext index in the zim file";
@ -40,6 +57,8 @@ namespace kiwix {
virtual ~XapianSearcher() {}; virtual ~XapianSearcher() {};
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
const bool verbose=false); const bool verbose=false);
virtual Result* getNextResult();
void restart_search();
protected: protected:
void closeIndex(); void closeIndex();
@ -47,6 +66,8 @@ namespace kiwix {
Xapian::Database readableDatabase; Xapian::Database readableDatabase;
Xapian::Stem stemmer; Xapian::Stem stemmer;
Xapian::MSet results;
Xapian::MSetIterator current_result;
}; };
} }

View File

@ -460,19 +460,18 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
(JNIEnv *env, jclass obj, jstring query, jint count) { (JNIEnv *env, jclass obj, jstring query, jint count) {
std::string cQuery = jni2c(query, env); std::string cQuery = jni2c(query, env);
unsigned int cCount = jni2c(count); unsigned int cCount = jni2c(count);
std::string url; kiwix::Result *p_result;
std::string title;
std::string result; std::string result;
unsigned int score;
pthread_mutex_lock(&searcherLock); pthread_mutex_lock(&searcherLock);
try { try {
if (searcher != NULL) { if (searcher != NULL) {
searcher->search(cQuery, 0, count); searcher->search(cQuery, 0, count);
while (searcher->getNextResult(url, title, score) && while ( (p_result = searcher->getNextResult()) &&
!title.empty() && !(p_result->get_title().empty()) &&
!url.empty()) { !(p_result->get_url().empty())) {
result += title + "\n"; result += p_result->get_title() + "\n";
delete p_result;
} }
} }
} catch (...) { } catch (...) {

View File

@ -81,7 +81,6 @@ namespace kiwix {
this->resultEnd = resultEnd; this->resultEnd = resultEnd;
string unaccentedSearch = removeAccents(search); string unaccentedSearch = removeAccents(search);
searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose); searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
this->resultOffset = this->results.begin();
} }
return; return;
@ -89,8 +88,6 @@ namespace kiwix {
/* Reset the results */ /* Reset the results */
void Searcher::reset() { void Searcher::reset() {
this->results.clear();
this->resultOffset = this->results.begin();
this->estimatedResultCount = 0; this->estimatedResultCount = 0;
this->searchPattern = ""; this->searchPattern = "";
return; return;
@ -101,30 +98,6 @@ namespace kiwix {
return this->estimatedResultCount; return this->estimatedResultCount;
} }
/* Get next result */
bool Searcher::getNextResult(string &url, string &title, unsigned int &score) {
bool retVal = false;
if (this->resultOffset != this->results.end()) {
/* url */
url = this->resultOffset->url;
/* title */
title = this->resultOffset->title;
/* score */
score = this->resultOffset->score;
/* increment the cursor for the next call */
this->resultOffset++;
retVal = true;
}
return retVal;
}
bool Searcher::setProtocolPrefix(const std::string prefix) { bool Searcher::setProtocolPrefix(const std::string prefix) {
this->protocolPrefix = prefix; this->protocolPrefix = prefix;
return true; return true;
@ -149,23 +122,24 @@ namespace kiwix {
CDT oData; CDT oData;
CDT resultsCDT(CDT::ARRAY_VAL); CDT resultsCDT(CDT::ARRAY_VAL);
this->resultOffset = this->results.begin(); this->restart_search();
while (this->resultOffset != this->results.end()) { Result * p_result = NULL;
while ( (p_result = this->getNextResult()) ) {
CDT result; CDT result;
result["title"] = this->resultOffset->title; result["title"] = p_result->get_title();
result["url"] = this->resultOffset->url; result["url"] = p_result->get_url();
result["snippet"] = this->resultOffset->snippet; result["snippet"] = p_result->get_snippet();
if (this->resultOffset->size >= 0) if (p_result->get_size() >= 0)
result["size"] = kiwix::beautifyInteger(this->resultOffset->size); result["size"] = kiwix::beautifyInteger(p_result->get_size());
if (this->resultOffset->wordCount >= 0) if (p_result->get_wordCount() >= 0)
result["wordCount"] = kiwix::beautifyInteger(this->resultOffset->wordCount); result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
resultsCDT.PushBack(result); resultsCDT.PushBack(result);
this->resultOffset++; delete p_result;
} }
this->resultOffset = this->results.begin(); this->restart_search();
oData["results"] = resultsCDT; oData["results"] = resultsCDT;
// pages // pages

View File

@ -68,32 +68,55 @@ namespace kiwix {
enquire.set_query(query); enquire.set_query(query);
/* Get the results */ /* Get the results */
Xapian::MSet matches = enquire.get_mset(resultStart, resultEnd - resultStart); this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
this->current_result = this->results.begin();
Xapian::MSetIterator i;
for (i = matches.begin(); i != matches.end(); ++i) {
Xapian::Document doc = i.get_document();
Result result;
result.url = doc.get_data();
result.title = doc.get_value(0);
result.snippet = doc.get_value(1);
result.size = (doc.get_value(2).empty() == true ? -1 : atoi(doc.get_value(2).c_str()));
result.wordCount = (doc.get_value(3).empty() == true ? -1 : atoi(doc.get_value(3).c_str()));
result.score = i.get_percent();
this->results.push_back(result);
if (verbose) {
std::cout << "Document ID " << *i << " \t";
std::cout << i.get_percent() << "% ";
std::cout << "\t[" << doc.get_data() << "] - " << doc.get_value(0) << std::endl;
}
}
/* Update the global resultCount value*/ /* Update the global resultCount value*/
this->estimatedResultCount = matches.get_matches_estimated(); this->estimatedResultCount = this->results.get_matches_estimated();
}
return; /* Get next result */
Result* XapianSearcher::getNextResult() {
if (this->current_result != this->results.end()) {
XapianResult* result = new XapianResult(this->current_result);
this->current_result++;
return result;
} }
return NULL;
} }
void XapianSearcher::restart_search() {
this->current_result = this->results.begin();
}
XapianResult::XapianResult(Xapian::MSetIterator& iterator):
iterator(iterator),
document(iterator.get_document())
{
}
std::string XapianResult::get_url() {
return document.get_data();
}
std::string XapianResult::get_title() {
return document.get_value(0);
}
int XapianResult::get_score() {
return iterator.get_percent();
}
std::string XapianResult::get_snippet() {
return document.get_value(1);
}
int XapianResult::get_size() {
return document.get_value(2).empty() == true ? -1 : atoi(document.get_value(2).c_str());
}
int XapianResult::get_wordCount() {
return document.get_value(3).empty() == true ? -1 : atoi(document.get_value(3).c_str());
}
} // Kiwix namespace