+ additional work to the clucene backend... it still does not work :(

This commit is contained in:
kelson42 2010-11-17 20:58:19 +00:00
parent 6eb3da53cc
commit 4deacdd923
4 changed files with 26 additions and 39 deletions

View File

@ -2,6 +2,8 @@
namespace kiwix {
TCHAR buffer[MAX_BUFFER_SIZE];
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
Indexer(zimFilePath) {
@ -21,27 +23,25 @@ namespace kiwix {
Document doc;
/* Not indexed but stored */
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("url"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed but not stored */
Field *titleField = new Field((const wchar_t*)("unaccentedTitle"),
(const wchar_t*)(unaccentedTitle.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED);
STRCPY_AtoT(buffer, unaccentedTitle.c_str(), MAX_BUFFER_SIZE);
Field *titleField = new Field(_T("utitle"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
titleField->setBoost(getTitleBoostFactor(content.size()));
doc.add(*titleField);
Field *keywordsField = new Field((const wchar_t*)("keywords"),
(const wchar_t*)(keywords.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED);
STRCPY_AtoT(buffer, keywords.c_str(), MAX_BUFFER_SIZE);
Field *keywordsField = new Field(_T("keywords"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
keywordsField->setBoost(keywordsBoostFactor);
doc.add(*keywordsField);
doc.add(*_CLNEW Field((const wchar_t*)("content"),
(const wchar_t*)(content.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED));
/* Add the document to the index */
this->writer->addDocument(&doc);
@ -51,6 +51,9 @@ namespace kiwix {
}
void CluceneIndexer::stopIndexing() {
this->writer->optimize();
this->writer->close();
delete this->writer;
delete this->dir;
}
}

View File

@ -4,6 +4,8 @@
#include <CLucene.h>
#include "indexer.h"
#define MAX_BUFFER_SIZE 4200000
using namespace std;
using namespace lucene::analysis;
@ -32,7 +34,7 @@ namespace kiwix {
FSDirectory* dir;
IndexWriter* writer;
SimpleAnalyzer analyzer;
lucene::analysis::standard::StandardAnalyzer analyzer;
};
}

View File

@ -2,29 +2,7 @@
namespace kiwix {
typedef std::basic_string<TCHAR> tstring;
TCHAR* StringToTCHAR(string& s)
{
tstring tstr;
const char* all = s.c_str();
int len = 1 + strlen(all);
wchar_t* t = new wchar_t[len];
if (NULL == t) throw std::bad_alloc();
mbstowcs(t, all, len);
return (TCHAR*)t;
}
std::string TCHARToString(const TCHAR* ptsz)
{
int len = wcslen((wchar_t*)ptsz);
char* psz = new char[2*len + 1];
wcstombs(psz, (wchar_t*)ptsz, 2*len + 1);
std::string s = psz;
delete [] psz;
return s;
}
TCHAR buffer[MAX_BUFFER_SIZE];
/* Constructor */
CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath)
@ -45,9 +23,9 @@ namespace kiwix {
/* Search strings in the database */
void CluceneSearcher::searchInIndex(string &search, const unsigned int resultsCount, const bool verbose) {
IndexSearcher searcher(reader);
SimpleAnalyzer analyzer;
QueryParser parser(_T("content"), &analyzer);
Query* query = parser.parse(StringToTCHAR(search));
STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
Query* query = parser.parse(buffer);
Hits* hits = searcher.search(query);
cout << "--------------------------------" << hits->length() << endl;

View File

@ -5,6 +5,8 @@
#include <CLucene/queryParser/MultiFieldQueryParser.h>
#include "searcher.h"
#define MAX_BUFFER_SIZE 4200000
using namespace std;
using namespace lucene::analysis;
using namespace lucene::index;
@ -27,6 +29,8 @@ namespace kiwix {
void openIndex(const string &cluceneDirectoryPath);
IndexReader* reader;
lucene::analysis::standard::StandardAnalyzer analyzer;
};
}