mirror of https://github.com/kiwix/libkiwix.git
+ additional work to the clucene backend... it still does not work :(
This commit is contained in:
parent
6eb3da53cc
commit
4deacdd923
|
@ -2,6 +2,8 @@
|
|||
|
||||
namespace kiwix {
|
||||
|
||||
TCHAR buffer[MAX_BUFFER_SIZE];
|
||||
|
||||
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
|
||||
Indexer(zimFilePath) {
|
||||
|
||||
|
@ -21,27 +23,25 @@ namespace kiwix {
|
|||
Document doc;
|
||||
|
||||
/* Not indexed but stored */
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
|
||||
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
|
||||
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||
STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
|
||||
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||
|
||||
STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
|
||||
doc.add(*_CLNEW Field(_T("url"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||
|
||||
/* indexed but not stored */
|
||||
Field *titleField = new Field((const wchar_t*)("unaccentedTitle"),
|
||||
(const wchar_t*)(unaccentedTitle.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED);
|
||||
STRCPY_AtoT(buffer, unaccentedTitle.c_str(), MAX_BUFFER_SIZE);
|
||||
Field *titleField = new Field(_T("utitle"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
|
||||
titleField->setBoost(getTitleBoostFactor(content.size()));
|
||||
doc.add(*titleField);
|
||||
|
||||
Field *keywordsField = new Field((const wchar_t*)("keywords"),
|
||||
(const wchar_t*)(keywords.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED);
|
||||
STRCPY_AtoT(buffer, keywords.c_str(), MAX_BUFFER_SIZE);
|
||||
Field *keywordsField = new Field(_T("keywords"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
|
||||
keywordsField->setBoost(keywordsBoostFactor);
|
||||
doc.add(*keywordsField);
|
||||
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("content"),
|
||||
(const wchar_t*)(content.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||
STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
|
||||
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||
|
||||
/* Add the document to the index */
|
||||
this->writer->addDocument(&doc);
|
||||
|
@ -51,6 +51,9 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
void CluceneIndexer::stopIndexing() {
|
||||
this->writer->optimize();
|
||||
this->writer->close();
|
||||
delete this->writer;
|
||||
delete this->dir;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
#include <CLucene.h>
|
||||
#include "indexer.h"
|
||||
|
||||
#define MAX_BUFFER_SIZE 4200000
|
||||
|
||||
using namespace std;
|
||||
|
||||
using namespace lucene::analysis;
|
||||
|
@ -32,7 +34,7 @@ namespace kiwix {
|
|||
|
||||
FSDirectory* dir;
|
||||
IndexWriter* writer;
|
||||
SimpleAnalyzer analyzer;
|
||||
lucene::analysis::standard::StandardAnalyzer analyzer;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -2,29 +2,7 @@
|
|||
|
||||
namespace kiwix {
|
||||
|
||||
|
||||
typedef std::basic_string<TCHAR> tstring;
|
||||
|
||||
TCHAR* StringToTCHAR(string& s)
|
||||
{
|
||||
tstring tstr;
|
||||
const char* all = s.c_str();
|
||||
int len = 1 + strlen(all);
|
||||
wchar_t* t = new wchar_t[len];
|
||||
if (NULL == t) throw std::bad_alloc();
|
||||
mbstowcs(t, all, len);
|
||||
return (TCHAR*)t;
|
||||
}
|
||||
|
||||
std::string TCHARToString(const TCHAR* ptsz)
|
||||
{
|
||||
int len = wcslen((wchar_t*)ptsz);
|
||||
char* psz = new char[2*len + 1];
|
||||
wcstombs(psz, (wchar_t*)ptsz, 2*len + 1);
|
||||
std::string s = psz;
|
||||
delete [] psz;
|
||||
return s;
|
||||
}
|
||||
TCHAR buffer[MAX_BUFFER_SIZE];
|
||||
|
||||
/* Constructor */
|
||||
CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath)
|
||||
|
@ -45,9 +23,9 @@ namespace kiwix {
|
|||
/* Search strings in the database */
|
||||
void CluceneSearcher::searchInIndex(string &search, const unsigned int resultsCount, const bool verbose) {
|
||||
IndexSearcher searcher(reader);
|
||||
SimpleAnalyzer analyzer;
|
||||
QueryParser parser(_T("content"), &analyzer);
|
||||
Query* query = parser.parse(StringToTCHAR(search));
|
||||
STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
|
||||
Query* query = parser.parse(buffer);
|
||||
Hits* hits = searcher.search(query);
|
||||
cout << "--------------------------------" << hits->length() << endl;
|
||||
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
#include <CLucene/queryParser/MultiFieldQueryParser.h>
|
||||
#include "searcher.h"
|
||||
|
||||
#define MAX_BUFFER_SIZE 4200000
|
||||
|
||||
using namespace std;
|
||||
using namespace lucene::analysis;
|
||||
using namespace lucene::index;
|
||||
|
@ -27,6 +29,8 @@ namespace kiwix {
|
|||
void openIndex(const string &cluceneDirectoryPath);
|
||||
|
||||
IndexReader* reader;
|
||||
lucene::analysis::standard::StandardAnalyzer analyzer;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue