+ additional work to the clucene backend... it still does not work :(

This commit is contained in:
kelson42 2010-11-17 20:58:19 +00:00
parent 6eb3da53cc
commit 4deacdd923
4 changed files with 26 additions and 39 deletions

View File

@ -2,6 +2,8 @@
namespace kiwix { namespace kiwix {
TCHAR buffer[MAX_BUFFER_SIZE];
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) : CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
Indexer(zimFilePath) { Indexer(zimFilePath) {
@ -21,27 +23,25 @@ namespace kiwix {
Document doc; Document doc;
/* Not indexed but stored */ /* Not indexed but stored */
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()), STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
Field::STORE_YES | Field::INDEX_UNTOKENIZED)); doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED)); STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("url"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed but not stored */ /* indexed but not stored */
Field *titleField = new Field((const wchar_t*)("unaccentedTitle"), STRCPY_AtoT(buffer, unaccentedTitle.c_str(), MAX_BUFFER_SIZE);
(const wchar_t*)(unaccentedTitle.c_str()), Field *titleField = new Field(_T("utitle"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
Field::STORE_NO | Field::INDEX_TOKENIZED);
titleField->setBoost(getTitleBoostFactor(content.size())); titleField->setBoost(getTitleBoostFactor(content.size()));
doc.add(*titleField); doc.add(*titleField);
Field *keywordsField = new Field((const wchar_t*)("keywords"), STRCPY_AtoT(buffer, keywords.c_str(), MAX_BUFFER_SIZE);
(const wchar_t*)(keywords.c_str()), Field *keywordsField = new Field(_T("keywords"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
Field::STORE_NO | Field::INDEX_TOKENIZED);
keywordsField->setBoost(keywordsBoostFactor); keywordsField->setBoost(keywordsBoostFactor);
doc.add(*keywordsField); doc.add(*keywordsField);
doc.add(*_CLNEW Field((const wchar_t*)("content"), STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
(const wchar_t*)(content.c_str()), doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED));
Field::STORE_NO | Field::INDEX_TOKENIZED));
/* Add the document to the index */ /* Add the document to the index */
this->writer->addDocument(&doc); this->writer->addDocument(&doc);
@ -51,6 +51,9 @@ namespace kiwix {
} }
void CluceneIndexer::stopIndexing() { void CluceneIndexer::stopIndexing() {
this->writer->optimize();
this->writer->close(); this->writer->close();
delete this->writer;
delete this->dir;
} }
} }

View File

@ -4,6 +4,8 @@
#include <CLucene.h> #include <CLucene.h>
#include "indexer.h" #include "indexer.h"
#define MAX_BUFFER_SIZE 4200000
using namespace std; using namespace std;
using namespace lucene::analysis; using namespace lucene::analysis;
@ -32,7 +34,7 @@ namespace kiwix {
FSDirectory* dir; FSDirectory* dir;
IndexWriter* writer; IndexWriter* writer;
SimpleAnalyzer analyzer; lucene::analysis::standard::StandardAnalyzer analyzer;
}; };
} }

View File

@ -2,29 +2,7 @@
namespace kiwix { namespace kiwix {
TCHAR buffer[MAX_BUFFER_SIZE];
typedef std::basic_string<TCHAR> tstring;
TCHAR* StringToTCHAR(string& s)
{
tstring tstr;
const char* all = s.c_str();
int len = 1 + strlen(all);
wchar_t* t = new wchar_t[len];
if (NULL == t) throw std::bad_alloc();
mbstowcs(t, all, len);
return (TCHAR*)t;
}
std::string TCHARToString(const TCHAR* ptsz)
{
int len = wcslen((wchar_t*)ptsz);
char* psz = new char[2*len + 1];
wcstombs(psz, (wchar_t*)ptsz, 2*len + 1);
std::string s = psz;
delete [] psz;
return s;
}
/* Constructor */ /* Constructor */
CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath) CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath)
@ -45,9 +23,9 @@ namespace kiwix {
/* Search strings in the database */ /* Search strings in the database */
void CluceneSearcher::searchInIndex(string &search, const unsigned int resultsCount, const bool verbose) { void CluceneSearcher::searchInIndex(string &search, const unsigned int resultsCount, const bool verbose) {
IndexSearcher searcher(reader); IndexSearcher searcher(reader);
SimpleAnalyzer analyzer;
QueryParser parser(_T("content"), &analyzer); QueryParser parser(_T("content"), &analyzer);
Query* query = parser.parse(StringToTCHAR(search)); STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
Query* query = parser.parse(buffer);
Hits* hits = searcher.search(query); Hits* hits = searcher.search(query);
cout << "--------------------------------" << hits->length() << endl; cout << "--------------------------------" << hits->length() << endl;

View File

@ -5,6 +5,8 @@
#include <CLucene/queryParser/MultiFieldQueryParser.h> #include <CLucene/queryParser/MultiFieldQueryParser.h>
#include "searcher.h" #include "searcher.h"
#define MAX_BUFFER_SIZE 4200000
using namespace std; using namespace std;
using namespace lucene::analysis; using namespace lucene::analysis;
using namespace lucene::index; using namespace lucene::index;
@ -27,6 +29,8 @@ namespace kiwix {
void openIndex(const string &cluceneDirectoryPath); void openIndex(const string &cluceneDirectoryPath);
IndexReader* reader; IndexReader* reader;
lucene::analysis::standard::StandardAnalyzer analyzer;
}; };
} }