diff --git a/src/common/kiwix/cluceneIndexer.cpp b/src/common/kiwix/cluceneIndexer.cpp deleted file mode 100644 index 0e939a7ec..000000000 --- a/src/common/kiwix/cluceneIndexer.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2011 Emmanuel Engelhart - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#include "cluceneIndexer.h" - -namespace kiwix { - - TCHAR buffer[MAX_BUFFER_SIZE]; - - CluceneIndexer::CluceneIndexer() { - } - - void CluceneIndexer::indexingPrelude(const string indexPath) { - this->dir = FSDirectory::getDirectory(indexPath.c_str(), true); - this->writer = new IndexWriter(this->dir, &analyzer, true); - this->writer->setUseCompoundFile(false); - } - - void CluceneIndexer::index(const string &url, - const string &title, - const string &unaccentedTitle, - const string &keywords, - const string &content, - const string &snippet, - const string &size, - const string &wordCount) { - - Document doc; - - /* Not indexed but stored */ - //STRCPY_AtoT(buffer, title.c_str(), MAX_BUFFER_SIZE); - ::mbstowcs(buffer,title.c_str(),MAX_BUFFER_SIZE); - doc.add(*_CLNEW Field(_T("title"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); // TODO: Why store, not analyzed? what is utitle? - - //STRCPY_AtoT(buffer, url.c_str(), MAX_BUFFER_SIZE); - ::mbstowcs(buffer,url.c_str(),MAX_BUFFER_SIZE); - doc.add(*_CLNEW Field(_T("url"), buffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); - - /* indexed but not stored */ - //STRCPY_AtoT(buffer, unaccentedTitle.c_str(), MAX_BUFFER_SIZE); - ::mbstowcs(buffer,unaccentedTitle.c_str(),MAX_BUFFER_SIZE); - Field *titleField = new Field(_T("utitle"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED); - titleField->setBoost(getTitleBoostFactor(content.size())); - doc.add(*titleField); - - //STRCPY_AtoT(buffer, keywords.c_str(), MAX_BUFFER_SIZE); - ::mbstowcs(buffer,keywords.c_str(),MAX_BUFFER_SIZE); - Field *keywordsField = new Field(_T("keywords"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED); - keywordsField->setBoost(keywordsBoostFactor); - doc.add(*keywordsField); - - //STRCPY_AtoT(buffer, content.c_str(), MAX_BUFFER_SIZE); - ::mbstowcs(buffer,content.c_str(),MAX_BUFFER_SIZE); - doc.add(*_CLNEW Field(_T("content"), buffer, Field::STORE_NO | Field::INDEX_TOKENIZED)); // TODO: TermVectors if you want to highlight - - /* Add the document to the index */ - this->writer->addDocument(&doc); - } - - void CluceneIndexer::flush() { - } - - void CluceneIndexer::indexingPostlude() { - this->writer->setUseCompoundFile(true); - this->writer->optimize(); - this->writer->close(); - delete this->writer; - _CLDECDELETE(this->dir); - } -} diff --git a/src/common/kiwix/cluceneIndexer.h b/src/common/kiwix/cluceneIndexer.h deleted file mode 100644 index a397a2bb7..000000000 --- a/src/common/kiwix/cluceneIndexer.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2011 Emmanuel Engelhart - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#ifndef KIWIX_CLUCENE_INDEXER_H -#define KIWIX_CLUCENE_INDEXER_H - -#include -#include -#include "indexer.h" - -#define MAX_BUFFER_SIZE 4200000 - -using namespace std; - -using namespace lucene::analysis; -using namespace lucene::index; -using namespace lucene::document; -using namespace lucene::queryParser; -using namespace lucene::search; -using namespace lucene::store; - -namespace kiwix { - - class CluceneIndexer : public Indexer { - - public: - CluceneIndexer(); - - protected: - void indexingPrelude(const string indexPath); - void index(const string &url, - const string &title, - const string &unaccentedTitle, - const string &keywords, - const string &content, - const string &snippet, - const string &size, - const string &wordCount); - void flush(); - void indexingPostlude(); - - FSDirectory* dir; - IndexWriter* writer; - lucene::analysis::standard::StandardAnalyzer analyzer; - }; - -} - -#endif diff --git a/src/common/kiwix/cluceneSearcher.cpp b/src/common/kiwix/cluceneSearcher.cpp deleted file mode 100644 index 7b1425252..000000000 --- a/src/common/kiwix/cluceneSearcher.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2011 Emmanuel Engelhart - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#include "cluceneSearcher.h" - -namespace kiwix { - - IndexSearcher* CluceneSearcher::searcher = NULL; - Directory* CluceneSearcher::dir = NULL; - - TCHAR buffer[MAX_BUFFER_SIZE]; - - /* Constructor */ - CluceneSearcher::CluceneSearcher(const string &cluceneDirectoryPath) - : kiwix::Searcher() { - if (searcher == NULL) - this->openIndex(cluceneDirectoryPath); - } - - /* Open Clucene readable database */ - void CluceneSearcher::openIndex(const string &directoryPath) { - cout << "Open index folder at " << directoryPath << endl; - dir = FSDirectory::getDirectory(directoryPath.c_str(), false); - searcher = new IndexSearcher(dir); - } - - /* Close Clucene writable database */ - void CluceneSearcher::closeIndex() { - } - -void CluceneSearcher::terminate() -{ - dir->close(); - searcher->close(); - delete searcher; - _CLLDECDELETE(dir); -} - -std::string toString(const TCHAR* s){ - /* Comment out for CLucene 2.3 - int32_t len = _tcslen(s); - char* buf = new char[len+1]; - STRCPY_WtoA(buf,s,len+1); - string ret = buf; - delete[] buf; - */ - return ""; - // return ret; -} - - /* Search strings in the database */ - void CluceneSearcher::searchInIndex(string &search, const unsigned int resultStart, - const unsigned int resultEnd, const bool verbose) { - - // Parse query - /* Comment out for Clucene 2.3 - lucene::analysis::standard::StandardAnalyzer* analyzer = new lucene::analysis::standard::StandardAnalyzer(); - QueryParser* parser = new QueryParser(_T("content"), analyzer); - STRCPY_AtoT(buffer, search.c_str(), MAX_BUFFER_SIZE); - - Query* query = parser->parse(buffer); - delete parser; - delete analyzer; - - cout << "Query: " << search << endl; - wcout << "Buffer: " << buffer << endl; - - if (query == NULL){ - cout << "Hits length:0 (null query)" << endl; - return; - } - - const wchar_t* querystring = query->toString(); - wcout << L"Query2string: " << querystring << endl; - delete[] querystring; - - // Search - Hits* hits = searcher->search(query); - cout << "Hits length:" << hits->length() << endl; - - for (int32_t i=0; i < hits->length() && i<10; i++) { - Document* d = &hits->doc(i); - _tprintf(_T("#%d. %s, url: %s (score: %f)\n"), - i + 1, d->get(_T("title")), d->get(_T("url")), - hits->score(i)); - } - */ -/* - Result result; - result.url = doc.get_data(); - result.title = doc.get_value(0); - result.score = i.get_percent(); - - this->results.push_back(result); -*/ - -// delete hits; - // delete query; - - return; - } -} diff --git a/src/common/kiwix/cluceneSearcher.h b/src/common/kiwix/cluceneSearcher.h deleted file mode 100644 index f6c2f4a35..000000000 --- a/src/common/kiwix/cluceneSearcher.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2011 Emmanuel Engelhart - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#ifndef KIWIX_CLUCENE_SEARCHER_H -#define KIWIX_CLUCENE_SEARCHER_H - -#include -#include -#include -#include "searcher.h" - -#define MAX_BUFFER_SIZE 4200000 - -using namespace std; -using namespace lucene::analysis; -using namespace lucene::index; -using namespace lucene::document; -using namespace lucene::queryParser; -using namespace lucene::search; -using namespace lucene::store; - -namespace kiwix { - - class CluceneSearcher : public Searcher { - - public: - CluceneSearcher(const string &cluceneDirectoryPath); - - void searchInIndex(string &search, const unsigned int resultStart, - const unsigned int resultEnd, const bool verbose=false); - - static void terminate(); - - protected: - void closeIndex(); - void openIndex(const string &cluceneDirectoryPath); - - static IndexSearcher* searcher; - static Directory* dir; - - }; - -} - -#endif diff --git a/src/common/kiwix/ctpp2/CTPP2VMStringLoader.cpp b/src/common/kiwix/ctpp2/CTPP2VMStringLoader.cpp index cb910dce0..206856017 100644 --- a/src/common/kiwix/ctpp2/CTPP2VMStringLoader.cpp +++ b/src/common/kiwix/ctpp2/CTPP2VMStringLoader.cpp @@ -213,11 +213,7 @@ VMStringLoader::VMStringLoader(CCHAR_P rawContent, size_t rawContentSize) } } - std::cout << "Last ?" << std::endl; - pVMMemoryCore = new VMMemoryCore(oCore); - - std::cout << "last2" << std::endl; } else {