mirror of https://github.com/kiwix/libkiwix.git
+ first version working of the clucene indexer class
This commit is contained in:
parent
dd4913a77c
commit
b4ab94e4c0
|
@ -4,19 +4,42 @@ namespace kiwix {
|
||||||
|
|
||||||
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
|
CluceneIndexer::CluceneIndexer(const string &zimFilePath, const string &cluceneDirectoryPath) :
|
||||||
Indexer(zimFilePath) {
|
Indexer(zimFilePath) {
|
||||||
|
|
||||||
|
this->dir = FSDirectory::getDirectory(cluceneDirectoryPath.c_str(), true);
|
||||||
|
this->writer = new IndexWriter(dir, &analyzer, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CluceneIndexer::indexNextPercentPre() {
|
void CluceneIndexer::indexNextPercentPre() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CluceneIndexer::indexNextArticle(string &url, string &title, string &unaccentedTitle,
|
void CluceneIndexer::indexNextArticle(const string &url,
|
||||||
string &keywords, string &content) {
|
const string &title,
|
||||||
|
const string &unaccentedTitle,
|
||||||
|
const string &keywords,
|
||||||
|
const string &content) {
|
||||||
|
|
||||||
|
Document doc;
|
||||||
|
|
||||||
|
/* Not indexed */
|
||||||
|
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
|
||||||
|
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||||
|
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
|
||||||
|
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||||
|
|
||||||
|
/* indexed */
|
||||||
|
doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()),
|
||||||
|
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||||
|
doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()),
|
||||||
|
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||||
|
doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()),
|
||||||
|
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||||
|
this->writer->addDocument(&doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CluceneIndexer::indexNextPercentPost() {
|
void CluceneIndexer::indexNextPercentPost() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void CluceneIndexer::stopIndexing() {
|
void CluceneIndexer::stopIndexing() {
|
||||||
|
this->writer->close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,20 +1,18 @@
|
||||||
#ifndef KIWIX_CLUCENE_INDEXER_H
|
#ifndef KIWIX_CLUCENE_INDEXER_H
|
||||||
#define KIWIX_CLUCENE_INDEXER_H
|
#define KIWIX_CLUCENE_INDEXER_H
|
||||||
|
|
||||||
#include <string>
|
#include <CLucene.h>
|
||||||
#include <vector>
|
|
||||||
#include <fstream>
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#include <unaccent.h>
|
|
||||||
#include <zim/file.h>
|
|
||||||
#include <zim/article.h>
|
|
||||||
#include <zim/fileiterator.h>
|
|
||||||
#include "xapian/myhtmlparse.h"
|
|
||||||
#include "indexer.h"
|
#include "indexer.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
using namespace lucene::analysis;
|
||||||
|
using namespace lucene::index;
|
||||||
|
using namespace lucene::document;
|
||||||
|
using namespace lucene::queryParser;
|
||||||
|
using namespace lucene::search;
|
||||||
|
using namespace lucene::store;
|
||||||
|
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
||||||
class CluceneIndexer : public Indexer {
|
class CluceneIndexer : public Indexer {
|
||||||
|
@ -24,10 +22,17 @@ namespace kiwix {
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void indexNextPercentPre();
|
void indexNextPercentPre();
|
||||||
void indexNextArticle(string &url, string &title, string &unaccentedTitle,
|
void indexNextArticle(const string &url,
|
||||||
string &keywords, string &content);
|
const string &title,
|
||||||
|
const string &unaccentedTitle,
|
||||||
|
const string &keywords,
|
||||||
|
const string &content);
|
||||||
void indexNextPercentPost();
|
void indexNextPercentPost();
|
||||||
void stopIndexing();
|
void stopIndexing();
|
||||||
|
|
||||||
|
FSDirectory* dir;
|
||||||
|
IndexWriter* writer;
|
||||||
|
SimpleAnalyzer analyzer;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue