diff --git a/src/common/kiwix/cluceneIndexer.cpp b/src/common/kiwix/cluceneIndexer.cpp index 813500bea..f2502e882 100644 --- a/src/common/kiwix/cluceneIndexer.cpp +++ b/src/common/kiwix/cluceneIndexer.cpp @@ -4,19 +4,42 @@ namespace kiwix { CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) : Indexer(zimFilePath) { + + this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true); + this->writer = new IndexWriter(dir, &analyzer, true); } void CluceneIndexer::indexNextPercentPre() { } - void CluceneIndexer::indexNextArticle(string &url, string &title, string &unaccentedTitle, - string &keywords, string &content) { + void CluceneIndexer::indexNextArticle(const string &url, + const string &title, + const string &unaccentedTitle, + const string &keywords, + const string &content) { + Document doc; + + /* Not indexed */ + doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()), + Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()), + Field::STORE_YES | Field::INDEX_UNTOKENIZED)); + + /* indexed */ + doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()), + Field::STORE_NO | Field::INDEX_TOKENIZED)); + doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()), + Field::STORE_NO | Field::INDEX_TOKENIZED)); + doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()), + Field::STORE_NO | Field::INDEX_TOKENIZED)); + this->writer->addDocument(&doc); } void CluceneIndexer::indexNextPercentPost() { } void CluceneIndexer::stopIndexing() { + this->writer->close(); } } diff --git a/src/common/kiwix/cluceneIndexer.h b/src/common/kiwix/cluceneIndexer.h index 6a2454b2a..ab328cd27 100644 --- a/src/common/kiwix/cluceneIndexer.h +++ b/src/common/kiwix/cluceneIndexer.h @@ -1,20 +1,18 @@ #ifndef KIWIX_CLUCENE_INDEXER_H #define KIWIX_CLUCENE_INDEXER_H -#include -#include -#include -#include - -#include -#include -#include -#include -#include "xapian/myhtmlparse.h" +#include #include "indexer.h" using namespace std; +using namespace lucene::analysis; +using namespace lucene::index; +using namespace lucene::document; +using namespace lucene::queryParser; +using namespace lucene::search; +using namespace lucene::store; + namespace kiwix { class CluceneIndexer : public Indexer { @@ -24,10 +22,17 @@ namespace kiwix { protected: void indexNextPercentPre(); - void indexNextArticle(string &url, string &title, string &unaccentedTitle, - string &keywords, string &content); + void indexNextArticle(const string &url, + const string &title, + const string &unaccentedTitle, + const string &keywords, + const string &content); void indexNextPercentPost(); void stopIndexing(); + + FSDirectory* dir; + IndexWriter* writer; + SimpleAnalyzer analyzer; }; }