+ first version working of the clucene indexer class

This commit is contained in:
kelson42 2010-11-02 19:00:37 +00:00
parent dd4913a77c
commit b4ab94e4c0
2 changed files with 42 additions and 14 deletions

View File

@ -4,19 +4,42 @@ namespace kiwix {
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
Indexer(zimFilePath) {
this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true);
this->writer = new IndexWriter(dir, &analyzer, true);
}
void CluceneIndexer::indexNextPercentPre() {
}
void CluceneIndexer::indexNextArticle(string &url, string &title, string &unaccentedTitle,
string &keywords, string &content) {
void CluceneIndexer::indexNextArticle(const string &url,
const string &title,
const string &unaccentedTitle,
const string &keywords,
const string &content) {
Document doc;
/* Not indexed */
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed */
doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
this->writer->addDocument(&doc);
}
void CluceneIndexer::indexNextPercentPost() {
}
void CluceneIndexer::stopIndexing() {
this->writer->close();
}
}

View File

@ -1,20 +1,18 @@
#ifndef KIWIX_CLUCENE_INDEXER_H
#define KIWIX_CLUCENE_INDEXER_H
#include <string>
#include <vector>
#include <fstream>
#include <iostream>
#include <unaccent.h>
#include <zim/file.h>
#include <zim/article.h>
#include <zim/fileiterator.h>
#include "xapian/myhtmlparse.h"
#include <CLucene.h>
#include "indexer.h"
using namespace std;
using namespace lucene::analysis;
using namespace lucene::index;
using namespace lucene::document;
using namespace lucene::queryParser;
using namespace lucene::search;
using namespace lucene::store;
namespace kiwix {
class CluceneIndexer : public Indexer {
@ -24,10 +22,17 @@ namespace kiwix {
protected:
void indexNextPercentPre();
void indexNextArticle(string &url, string &title, string &unaccentedTitle,
string &keywords, string &content);
void indexNextArticle(const string &url,
const string &title,
const string &unaccentedTitle,
const string &keywords,
const string &content);
void indexNextPercentPost();
void stopIndexing();
FSDirectory* dir;
IndexWriter* writer;
SimpleAnalyzer analyzer;
};
}