From b7984c5138baabb3cc4db1752725e7f054279ae7 Mon Sep 17 00:00:00 2001 From: synhershko Date: Wed, 3 Aug 2011 07:48:31 +0000 Subject: [PATCH] Updating CLucene search and index routines --- src/common/kiwix/cluceneIndexer.cpp | 8 +++--- src/common/kiwix/cluceneSearcher.cpp | 37 +++++++++++----------------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/common/kiwix/cluceneIndexer.cpp b/src/common/kiwix/cluceneIndexer.cpp index bc3d9ab9c..e66ce596d 100644 --- a/src/common/kiwix/cluceneIndexer.cpp +++ b/src/common/kiwix/cluceneIndexer.cpp @@ -28,6 +28,7 @@ namespace kiwix { this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true); this->writer = new IndexWriter(dir, &analyzer, true); + writer->setUseCompoundFile(false); } void CluceneIndexer::indexNextPercentPre() { @@ -47,7 +48,7 @@ namespace kiwix { /* Not indexed but stored */ //STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE); ::mbstowcs(buffer,title.c_str(),MAX_BUFFER_SIZE); - doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); // TODO: Why store, not analyzed? what is utitle? //STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE); ::mbstowcs(buffer,url.c_str(),MAX_BUFFER_SIZE); @@ -68,7 +69,7 @@ namespace kiwix { //STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE); ::mbstowcs(buffer,content.c_str(),MAX_BUFFER_SIZE); - doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); + doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); // TODO: TermVectors if you want to highlight /* Add the document to the index */ this->writer->addDocument(&doc); @@ -78,9 +79,10 @@ namespace kiwix { } void CluceneIndexer::stopIndexing() { + writer->setUseCompoundFile(true); this->writer->optimize(); this->writer->close(); delete this->writer; - delete this->dir; + _CLDECDELETE(this->dir); } } diff --git a/src/common/kiwix/cluceneSearcher.cpp b/src/common/kiwix/cluceneSearcher.cpp index 8a6ce714d..62f414677 100644 --- a/src/common/kiwix/cluceneSearcher.cpp +++ b/src/common/kiwix/cluceneSearcher.cpp @@ -70,50 +70,41 @@ std::string toString(const TCHAR* s){ QueryParser* parser = new QueryParser(_T("content"), analyzer); STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE); - //lucene::util::Misc::_cpycharToWide(); - //::mbstowcs(buffer,search.c_str(),MAX_BUFFER_SIZE); - //search.c_str(); - Query* query = parser->parse(_T("between")); + Query* query = parser->parse(buffer); + delete parser; + delete analyzer; cout << "Query: " << search << endl; - wcout << "Buffer: " << buffer; - cout << endl; + wcout << "Buffer: " << buffer << endl; + + if (query == NULL){ + cout << "Hits length:0 (null query)" << endl; + return; + } const wchar_t* querystring = query->toString(); wcout << L"Query2string: " << querystring << endl; - - //string q = toString(querystring); - //STRCPY_TtoA(querystring, buffer, MAX_BUFFER_SIZE); - //cout << "Query object: " << q << endl; delete[] querystring; - delete parser; - delete analyzer; - // Search Hits* hits = searcher->search(query); cout << "Hits length:" << hits->length() << endl; - /* - for (size_t i=0; i < hits->length() && i<10; i++) { + for (int32_t i=0; i < hits->length() && i<10; i++) { Document* d = &hits->doc(i); - _tprintf(_T("#%d. %s (score: %f)\n"), - i, d->get(_T("contents")), + _tprintf(_T("#%d. %s, url: %s (score: %f)\n"), + i + 1, d->get(_T("title")), d->get(_T("url")), hits->score(i)); } - */ - /* +/* Result result; result.url = doc.get_data(); result.title = doc.get_value(0); result.score = i.get_percent(); this->results.push_back(result); - */ +*/ - searcher->close(); - - delete searcher; delete hits; delete query;