Updating CLucene search and index routines

This commit is contained in:
synhershko 2011-08-03 07:48:31 +00:00
parent b146d87158
commit b7984c5138
2 changed files with 19 additions and 26 deletions

View File

@ -28,6 +28,7 @@ namespace kiwix {
this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true);
this->writer = new IndexWriter(dir, &analyzer, true);
writer->setUseCompoundFile(false);
}
void CluceneIndexer::indexNextPercentPre() {
@ -47,7 +48,7 @@ namespace kiwix {
/* Not indexed but stored */
//STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,title.c_str(),MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); // TODO: Why store, not analyzed? what is utitle?
//STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,url.c_str(),MAX_BUFFER_SIZE);
@ -68,7 +69,7 @@ namespace kiwix {
//STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,content.c_str(),MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); // TODO: TermVectors if you want to highlight
/* Add the document to the index */
this->writer->addDocument(&doc);
@ -78,9 +79,10 @@ namespace kiwix {
}
void CluceneIndexer::stopIndexing() {
writer->setUseCompoundFile(true);
this->writer->optimize();
this->writer->close();
delete this->writer;
delete this->dir;
_CLDECDELETE(this->dir);
}
}

View File

@ -70,50 +70,41 @@ std::string toString(const TCHAR* s){
QueryParser* parser = new QueryParser(_T("content"), analyzer);
STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
//lucene::util::Misc::_cpycharToWide();
//::mbstowcs(buffer,search.c_str(),MAX_BUFFER_SIZE);
//search.c_str();
Query* query = parser->parse(_T("between"));
Query* query = parser->parse(buffer);
delete parser;
delete analyzer;
cout << "Query: " << search << endl;
wcout << "Buffer: " << buffer;
cout << endl;
wcout << "Buffer: " << buffer << endl;
if (query == NULL){
cout << "Hits length:0 (null query)" << endl;
return;
}
const wchar_t* querystring = query->toString();
wcout << L"Query2string: " << querystring << endl;
//string q = toString(querystring);
//STRCPY_TtoA(querystring, buffer, MAX_BUFFER_SIZE);
//cout << "Query object: " << q << endl;
delete[] querystring;
delete parser;
delete analyzer;
// Search
Hits* hits = searcher->search(query);
cout << "Hits length:" << hits->length() << endl;
/*
for (size_t i=0; i < hits->length() && i<10; i++) {
for (int32_t i=0; i < hits->length() && i<10; i++) {
Document* d = &hits->doc(i);
_tprintf(_T("#%d. %s (score: %f)\n"),
i, d->get(_T("contents")),
_tprintf(_T("#%d. %s, url: %s (score: %f)\n"),
i + 1, d->get(_T("title")), d->get(_T("url")),
hits->score(i));
}
*/
/*
/*
Result result;
result.url = doc.get_data();
result.title = doc.get_value(0);
result.score = i.get_percent();
this->results.push_back(result);
*/
*/
searcher->close();
delete searcher;
delete hits;
delete query;