+ remove clucene source files

This commit is contained in:
kelson42 2013-12-16 15:22:21 +01:00
parent 840442fccd
commit fc6254090a
4 changed files with 0 additions and 330 deletions

View File

@ -1,86 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "cluceneIndexer.h"
namespace kiwix {
TCHAR buffer[MAX_BUFFER_SIZE];
CluceneIndexer::CluceneIndexer() {
}
void CluceneIndexer::indexingPrelude(const string indexPath) {
this->dir = FSDirectory::getDirectory(indexPath.c_str(), true);
this->writer = new IndexWriter(this->dir, &analyzer, true);
this->writer->setUseCompoundFile(false);
}
void CluceneIndexer::index(const string &url,
const string &title,
const string &unaccentedTitle,
const string &keywords,
const string &content,
const string &snippet,
const string &size,
const string &wordCount) {
Document doc;
/* Not indexed but stored */
//STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,title.c_str(),MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); // TODO: Why store, not analyzed? what is utitle?
//STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,url.c_str(),MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("url"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed but not stored */
//STRCPY_AtoT(buffer, unaccentedTitle.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,unaccentedTitle.c_str(),MAX_BUFFER_SIZE);
Field *titleField = new Field(_T("utitle"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
titleField->setBoost(getTitleBoostFactor(content.size()));
doc.add(*titleField);
//STRCPY_AtoT(buffer, keywords.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,keywords.c_str(),MAX_BUFFER_SIZE);
Field *keywordsField = new Field(_T("keywords"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED);
keywordsField->setBoost(keywordsBoostFactor);
doc.add(*keywordsField);
//STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE);
::mbstowcs(buffer,content.c_str(),MAX_BUFFER_SIZE);
doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); // TODO: TermVectors if you want to highlight
/* Add the document to the index */
this->writer->addDocument(&doc);
}
void CluceneIndexer::flush() {
}
void CluceneIndexer::indexingPostlude() {
this->writer->setUseCompoundFile(true);
this->writer->optimize();
this->writer->close();
delete this->writer;
_CLDECDELETE(this->dir);
}
}

View File

@ -1,65 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_CLUCENE_INDEXER_H
#define KIWIX_CLUCENE_INDEXER_H
#include <CLucene.h>
#include <assert.h>
#include "indexer.h"
#define MAX_BUFFER_SIZE 4200000
using namespace std;
using namespace lucene::analysis;
using namespace lucene::index;
using namespace lucene::document;
using namespace lucene::queryParser;
using namespace lucene::search;
using namespace lucene::store;
namespace kiwix {
class CluceneIndexer : public Indexer {
public:
CluceneIndexer();
protected:
void indexingPrelude(const string indexPath);
void index(const string &url,
const string &title,
const string &unaccentedTitle,
const string &keywords,
const string &content,
const string &snippet,
const string &size,
const string &wordCount);
void flush();
void indexingPostlude();
FSDirectory* dir;
IndexWriter* writer;
lucene::analysis::standard::StandardAnalyzer analyzer;
};
}
#endif

View File

@ -1,118 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "cluceneSearcher.h"
namespace kiwix {
IndexSearcher* CluceneSearcher::searcher = NULL;
Directory* CluceneSearcher::dir = NULL;
TCHAR buffer[MAX_BUFFER_SIZE];
/* Constructor */
CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath)
: kiwix::Searcher() {
if (searcher == NULL)
this->openIndex(cluceneDirectoryPath);
}
/* Open Clucene readable database */
void CluceneSearcher::openIndex(const string &directoryPath) {
cout << "Open index folder at " << directoryPath << endl;
dir = FSDirectory::getDirectory(directoryPath.c_str(), false);
searcher = new IndexSearcher(dir);
}
/* Close Clucene writable database */
void CluceneSearcher::closeIndex() {
}
void CluceneSearcher::terminate()
{
dir->close();
searcher->close();
delete searcher;
_CLLDECDELETE(dir);
}
std::string toString(const TCHAR* s){
/* Comment out for CLucene 2.3
int32_t len = _tcslen(s);
char* buf = new char[len+1];
STRCPY_WtoA(buf,s,len+1);
string ret = buf;
delete[] buf;
*/
return "";
// return ret;
}
/* Search strings in the database */
void CluceneSearcher::searchInIndex(string &search, const unsigned int resultStart,
const unsigned int resultEnd, const bool verbose) {
// Parse query
/* Comment out for Clucene 2.3
lucene::analysis::standard::StandardAnalyzer* analyzer = new lucene::analysis::standard::StandardAnalyzer();
QueryParser* parser = new QueryParser(_T("content"), analyzer);
STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE);
Query* query = parser->parse(buffer);
delete parser;
delete analyzer;
cout << "Query: " << search << endl;
wcout << "Buffer: " << buffer << endl;
if (query == NULL){
cout << "Hits length:0 (null query)" << endl;
return;
}
const wchar_t* querystring = query->toString();
wcout << L"Query2string: " << querystring << endl;
delete[] querystring;
// Search
Hits* hits = searcher->search(query);
cout << "Hits length:" << hits->length() << endl;
for (int32_t i=0; i < hits->length() && i<10; i++) {
Document* d = &hits->doc(i);
_tprintf(_T("#%d. %s, url: %s (score: %f)\n"),
i + 1, d->get(_T("title")), d->get(_T("url")),
hits->score(i));
}
*/
/*
Result result;
result.url = doc.get_data();
result.title = doc.get_value(0);
result.score = i.get_percent();
this->results.push_back(result);
*/
// delete hits;
// delete query;
return;
}
}

View File

@ -1,61 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_CLUCENE_SEARCHER_H
#define KIWIX_CLUCENE_SEARCHER_H
#include <CLucene.h>
#include <assert.h>
#include <CLucene/queryParser/MultiFieldQueryParser.h>
#include "searcher.h"
#define MAX_BUFFER_SIZE 4200000
using namespace std;
using namespace lucene::analysis;
using namespace lucene::index;
using namespace lucene::document;
using namespace lucene::queryParser;
using namespace lucene::search;
using namespace lucene::store;
namespace kiwix {
class CluceneSearcher : public Searcher {
public:
CluceneSearcher(const string &cluceneDirectoryPath);
void searchInIndex(string &search, const unsigned int resultStart,
const unsigned int resultEnd, const bool verbose=false);
static void terminate();
protected:
void closeIndex();
void openIndex(const string &cluceneDirectoryPath);
static IndexSearcher* searcher;
static Directory* dir;
};
}
#endif