+ first version working of the clucene indexer class

This commit is contained in:
kelson42 2010-11-02 19:00:37 +00:00
parent dd4913a77c
commit b4ab94e4c0
2 changed files with 42 additions and 14 deletions

View File

@ -4,19 +4,42 @@ namespace kiwix {
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) : CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
Indexer(zimFilePath) { Indexer(zimFilePath) {
this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true);
this->writer = new IndexWriter(dir, &analyzer, true);
} }
void CluceneIndexer::indexNextPercentPre() { void CluceneIndexer::indexNextPercentPre() {
} }
void CluceneIndexer::indexNextArticle(string &url, string &title, string &unaccentedTitle, void CluceneIndexer::indexNextArticle(const string &url,
string &keywords, string &content) { const string &title,
const string &unaccentedTitle,
const string &keywords,
const string &content) {
Document doc;
/* Not indexed */
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed */
doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
this->writer->addDocument(&doc);
} }
void CluceneIndexer::indexNextPercentPost() { void CluceneIndexer::indexNextPercentPost() {
} }
void CluceneIndexer::stopIndexing() { void CluceneIndexer::stopIndexing() {
this->writer->close();
} }
} }

View File

@ -1,20 +1,18 @@
#ifndef KIWIX_CLUCENE_INDEXER_H #ifndef KIWIX_CLUCENE_INDEXER_H
#define KIWIX_CLUCENE_INDEXER_H #define KIWIX_CLUCENE_INDEXER_H
#include <string> #include <CLucene.h>
#include <vector>
#include <fstream>
#include <iostream>
#include <unaccent.h>
#include <zim/file.h>
#include <zim/article.h>
#include <zim/fileiterator.h>
#include "xapian/myhtmlparse.h"
#include "indexer.h" #include "indexer.h"
using namespace std; using namespace std;
using namespace lucene::analysis;
using namespace lucene::index;
using namespace lucene::document;
using namespace lucene::queryParser;
using namespace lucene::search;
using namespace lucene::store;
namespace kiwix { namespace kiwix {
class CluceneIndexer : public Indexer { class CluceneIndexer : public Indexer {
@ -24,10 +22,17 @@ namespace kiwix {
protected: protected:
void indexNextPercentPre(); void indexNextPercentPre();
void indexNextArticle(string &url, string &title, string &unaccentedTitle, void indexNextArticle(const string &url,
string &keywords, string &content); const string &title,
const string &unaccentedTitle,
const string &keywords,
const string &content);
void indexNextPercentPost(); void indexNextPercentPost();
void stopIndexing(); void stopIndexing();
FSDirectory* dir;
IndexWriter* writer;
SimpleAnalyzer analyzer;
}; };
} }