diff --git a/src/common/kiwix/cluceneIndexer.cpp b/src/common/kiwix/cluceneIndexer.cpp index f2502e882..36e630ecc 100644 --- a/src/common/kiwix/cluceneIndexer.cpp +++ b/src/common/kiwix/cluceneIndexer.cpp @@ -20,19 +20,30 @@ namespace kiwix { Document doc; - /* Not indexed */ + /* Not indexed but stored */ doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()), Field::STORE_YES | Field::INDEX_UNTOKENIZED)); doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()), Field::STORE_YES | Field::INDEX_UNTOKENIZED)); - /* indexed */ - doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()), - Field::STORE_NO | Field::INDEX_TOKENIZED)); - doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()), - Field::STORE_NO | Field::INDEX_TOKENIZED)); - doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()), + /* indexed but not stored */ + Field *titleField = new Field((const wchar_t*)("unaccentedTitle"), + (const wchar_t*)(unaccentedTitle.c_str()), + Field::STORE_NO | Field::INDEX_TOKENIZED); + titleField->setBoost(getTitleBoostFactor(content.size())); + doc.add(*titleField); + + Field *keywordsField = new Field((const wchar_t*)("keywords"), + (const wchar_t*)(keywords.c_str()), + Field::STORE_NO | Field::INDEX_TOKENIZED); + keywordsField->setBoost(keywordsBoostFactor); + doc.add(*keywordsField); + + doc.add(*_CLNEW Field((const wchar_t*)("content"), + (const wchar_t*)(content.c_str()), Field::STORE_NO | Field::INDEX_TOKENIZED)); + + /* Add the document to the index */ this->writer->addDocument(&doc); } diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index 4b91b67ac..3bbb4b82e 100644 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -19,7 +19,8 @@ namespace kiwix { Indexer::Indexer(const string &zimFilePath) : zimFileHandler(NULL), articleCount(0), - stepSize(0) { + stepSize(0), + keywordsBoostFactor(3) { /* Open the ZIM file */ this->zimFileHandler = new zim::File(zimFilePath); diff --git a/src/common/kiwix/indexer.h b/src/common/kiwix/indexer.h index 27fd521bb..6ecbe8b76 100644 --- a/src/common/kiwix/indexer.h +++ b/src/common/kiwix/indexer.h @@ -32,7 +32,7 @@ namespace kiwix { const string &content) = 0; virtual void indexNextPercentPost() = 0; virtual void stopIndexing() = 0; - + /* ZIM file handling */ zim::File* zimFileHandler; zim::size_type firstArticleOffset; @@ -50,6 +50,12 @@ namespace kiwix { /* Others */ unsigned int articleCount; float stepSize; + + /* Boost factor */ + const unsigned int keywordsBoostFactor; + inline const unsigned int getTitleBoostFactor(const unsigned int contentLength) { + return contentLength / 500 + 1; + } }; } diff --git a/src/common/kiwix/xapianIndexer.cpp b/src/common/kiwix/xapianIndexer.cpp index 12308d68e..e3d95d76c 100644 --- a/src/common/kiwix/xapianIndexer.cpp +++ b/src/common/kiwix/xapianIndexer.cpp @@ -46,12 +46,12 @@ namespace kiwix { /* Index the title */ if (!unaccentedTitle.empty()) { - this->indexer.index_text_without_positions(unaccentedTitle, content.size() / 500 + 1); + this->indexer.index_text_without_positions(unaccentedTitle, this->getTitleBoostFactor(content.size())); } /* Index the keywords */ if (!keywords.empty()) { - this->indexer.index_text_without_positions(keywords, 3); + this->indexer.index_text_without_positions(keywords, keywordsBoostFactor); } /* Index the content */