diff --git a/src/common/kiwix/cluceneIndexer.cpp b/src/common/kiwix/cluceneIndexer.cpp index 36e630ecc..033171056 100644 --- a/src/common/kiwix/cluceneIndexer.cpp +++ b/src/common/kiwix/cluceneIndexer.cpp @@ -2,6 +2,8 @@ namespace kiwix { + TCHAR buffer[MAX_BUFFER_SIZE]; + CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) : Indexer(zimFilePath) { @@ -21,27 +23,25 @@ namespace kiwix { Document doc; /* Not indexed but stored */ - doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()), - Field::STORE_YES | Field::INDEX_UNTOKENIZED)); - doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()), - Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE); + doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + + STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE); + doc.add(*_CLNEW Field(_T("url"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); /* indexed but not stored */ - Field *titleField = new Field((const wchar_t*)("unaccentedTitle"), - (const wchar_t*)(unaccentedTitle.c_str()), - Field::STORE_NO | Field::INDEX_TOKENIZED); + STRCPY_AtoT(buffer, unaccentedTitle.c_str(), MAX_BUFFER_SIZE); + Field *titleField = new Field(_T("utitle"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED); titleField->setBoost(getTitleBoostFactor(content.size())); doc.add(*titleField); - Field *keywordsField = new Field((const wchar_t*)("keywords"), - (const wchar_t*)(keywords.c_str()), - Field::STORE_NO | Field::INDEX_TOKENIZED); + STRCPY_AtoT(buffer, keywords.c_str(), MAX_BUFFER_SIZE); + Field *keywordsField = new Field(_T("keywords"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED); keywordsField->setBoost(keywordsBoostFactor); doc.add(*keywordsField); - doc.add(*_CLNEW Field((const wchar_t*)("content"), - (const wchar_t*)(content.c_str()), - Field::STORE_NO | Field::INDEX_TOKENIZED)); + STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE); + doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); /* Add the document to the index */ this->writer->addDocument(&doc); @@ -51,6 +51,9 @@ namespace kiwix { } void CluceneIndexer::stopIndexing() { + this->writer->optimize(); this->writer->close(); + delete this->writer; + delete this->dir; } } diff --git a/src/common/kiwix/cluceneIndexer.h b/src/common/kiwix/cluceneIndexer.h index ab328cd27..33075695d 100644 --- a/src/common/kiwix/cluceneIndexer.h +++ b/src/common/kiwix/cluceneIndexer.h @@ -4,6 +4,8 @@ #include #include "indexer.h" +#define MAX_BUFFER_SIZE 4200000 + using namespace std; using namespace lucene::analysis; @@ -32,7 +34,7 @@ namespace kiwix { FSDirectory* dir; IndexWriter* writer; - SimpleAnalyzer analyzer; + lucene::analysis::standard::StandardAnalyzer analyzer; }; } diff --git a/src/common/kiwix/cluceneSearcher.cpp b/src/common/kiwix/cluceneSearcher.cpp index 0d37ae648..a623fccbe 100644 --- a/src/common/kiwix/cluceneSearcher.cpp +++ b/src/common/kiwix/cluceneSearcher.cpp @@ -2,29 +2,7 @@ namespace kiwix { - - typedef std::basic_string tstring; - - TCHAR* StringToTCHAR(string& s) - { - tstring tstr; - const char* all = s.c_str(); - int len = 1 + strlen(all); - wchar_t* t = new wchar_t[len]; - if (NULL == t) throw std::bad_alloc(); - mbstowcs(t, all, len); - return (TCHAR*)t; - } - - std::string TCHARToString(const TCHAR* ptsz) - { - int len = wcslen((wchar_t*)ptsz); - char* psz = new char[2*len + 1]; - wcstombs(psz, (wchar_t*)ptsz, 2*len + 1); - std::string s = psz; - delete [] psz; - return s; - } + TCHAR buffer[MAX_BUFFER_SIZE]; /* Constructor */ CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath) @@ -45,9 +23,9 @@ namespace kiwix { /* Search strings in the database */ void CluceneSearcher::searchInIndex(string &search, const unsigned int resultsCount, const bool verbose) { IndexSearcher searcher(reader); - SimpleAnalyzer analyzer; QueryParser parser(_T("content"), &analyzer); - Query* query = parser.parse(StringToTCHAR(search)); + STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE); + Query* query = parser.parse(buffer); Hits* hits = searcher.search(query); cout << "--------------------------------" << hits->length() << endl; diff --git a/src/common/kiwix/cluceneSearcher.h b/src/common/kiwix/cluceneSearcher.h index d9d551c7c..44888ff2e 100644 --- a/src/common/kiwix/cluceneSearcher.h +++ b/src/common/kiwix/cluceneSearcher.h @@ -5,6 +5,8 @@ #include #include "searcher.h" +#define MAX_BUFFER_SIZE 4200000 + using namespace std; using namespace lucene::analysis; using namespace lucene::index; @@ -27,6 +29,8 @@ namespace kiwix { void openIndex(const string &cluceneDirectoryPath); IndexReader* reader; + lucene::analysis::standard::StandardAnalyzer analyzer; + }; }