mirror of https://github.com/kiwix/libkiwix.git
Updating CLucene search and index routines
This commit is contained in:
parent
b146d87158
commit
b7984c5138
|
@ -28,6 +28,7 @@ namespace kiwix {
|
||||||
|
|
||||||
this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true);
|
this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true);
|
||||||
this->writer = new IndexWriter(dir, &analyzer, true);
|
this->writer = new IndexWriter(dir, &analyzer, true);
|
||||||
|
writer->setUseCompoundFile(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CluceneIndexer::indexNextPercentPre() {
|
void CluceneIndexer::indexNextPercentPre() {
|
||||||
|
@ -47,7 +48,7 @@ namespace kiwix {
|
||||||
/* Not indexed but stored */
|
/* Not indexed but stored */
|
||||||
//STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
|
//STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
|
||||||
::mbstowcs(buffer,title.c_str(),MAX_BUFFER_SIZE);
|
::mbstowcs(buffer,title.c_str(),MAX_BUFFER_SIZE);
|
||||||
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); // TODO: Why store, not analyzed? what is utitle?
|
||||||
|
|
||||||
//STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
|
//STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
|
||||||
::mbstowcs(buffer,url.c_str(),MAX_BUFFER_SIZE);
|
::mbstowcs(buffer,url.c_str(),MAX_BUFFER_SIZE);
|
||||||
|
@ -68,7 +69,7 @@ namespace kiwix {
|
||||||
|
|
||||||
//STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
|
//STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
|
||||||
::mbstowcs(buffer,content.c_str(),MAX_BUFFER_SIZE);
|
::mbstowcs(buffer,content.c_str(),MAX_BUFFER_SIZE);
|
||||||
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED));
|
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); // TODO: TermVectors if you want to highlight
|
||||||
|
|
||||||
/* Add the document to the index */
|
/* Add the document to the index */
|
||||||
this->writer->addDocument(&doc);
|
this->writer->addDocument(&doc);
|
||||||
|
@ -78,9 +79,10 @@ namespace kiwix {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CluceneIndexer::stopIndexing() {
|
void CluceneIndexer::stopIndexing() {
|
||||||
|
writer->setUseCompoundFile(true);
|
||||||
this->writer->optimize();
|
this->writer->optimize();
|
||||||
this->writer->close();
|
this->writer->close();
|
||||||
delete this->writer;
|
delete this->writer;
|
||||||
delete this->dir;
|
_CLDECDELETE(this->dir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,50 +70,41 @@ std::string toString(const TCHAR* s){
|
||||||
QueryParser* parser = new QueryParser(_T("content"), analyzer);
|
QueryParser* parser = new QueryParser(_T("content"), analyzer);
|
||||||
STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
|
STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
|
||||||
|
|
||||||
//lucene::util::Misc::_cpycharToWide();
|
Query* query = parser->parse(buffer);
|
||||||
//::mbstowcs(buffer,search.c_str(),MAX_BUFFER_SIZE);
|
delete parser;
|
||||||
//search.c_str();
|
delete analyzer;
|
||||||
Query* query = parser->parse(_T("between"));
|
|
||||||
|
|
||||||
cout << "Query: " << search << endl;
|
cout << "Query: " << search << endl;
|
||||||
wcout << "Buffer: " << buffer;
|
wcout << "Buffer: " << buffer << endl;
|
||||||
cout << endl;
|
|
||||||
|
if (query == NULL){
|
||||||
|
cout << "Hits length:0 (null query)" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const wchar_t* querystring = query->toString();
|
const wchar_t* querystring = query->toString();
|
||||||
wcout << L"Query2string: " << querystring << endl;
|
wcout << L"Query2string: " << querystring << endl;
|
||||||
|
|
||||||
//string q = toString(querystring);
|
|
||||||
//STRCPY_TtoA(querystring, buffer, MAX_BUFFER_SIZE);
|
|
||||||
//cout << "Query object: " << q << endl;
|
|
||||||
delete[] querystring;
|
delete[] querystring;
|
||||||
|
|
||||||
delete parser;
|
|
||||||
delete analyzer;
|
|
||||||
|
|
||||||
// Search
|
// Search
|
||||||
Hits* hits = searcher->search(query);
|
Hits* hits = searcher->search(query);
|
||||||
cout << "Hits length:" << hits->length() << endl;
|
cout << "Hits length:" << hits->length() << endl;
|
||||||
|
|
||||||
/*
|
for (int32_t i=0; i < hits->length() && i<10; i++) {
|
||||||
for (size_t i=0; i < hits->length() && i<10; i++) {
|
|
||||||
Document* d = &hits->doc(i);
|
Document* d = &hits->doc(i);
|
||||||
_tprintf(_T("#%d. %s (score: %f)\n"),
|
_tprintf(_T("#%d. %s, url: %s (score: %f)\n"),
|
||||||
i, d->get(_T("contents")),
|
i + 1, d->get(_T("title")), d->get(_T("url")),
|
||||||
hits->score(i));
|
hits->score(i));
|
||||||
}
|
}
|
||||||
*/
|
/*
|
||||||
/*
|
|
||||||
Result result;
|
Result result;
|
||||||
result.url = doc.get_data();
|
result.url = doc.get_data();
|
||||||
result.title = doc.get_value(0);
|
result.title = doc.get_value(0);
|
||||||
result.score = i.get_percent();
|
result.score = i.get_percent();
|
||||||
|
|
||||||
this->results.push_back(result);
|
this->results.push_back(result);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
searcher->close();
|
|
||||||
|
|
||||||
delete searcher;
|
|
||||||
delete hits;
|
delete hits;
|
||||||
delete query;
|
delete query;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue