+ introduce standart boost factor for title & keywords during the index process

This commit is contained in:
kelson42 2010-11-05 14:51:36 +00:00
parent 4484623e67
commit e48d5b5a24
4 changed files with 29 additions and 11 deletions

View File

@ -20,19 +20,30 @@ namespace kiwix {
Document doc; Document doc;
/* Not indexed */ /* Not indexed but stored */
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()), doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED)); Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()), doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED)); Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed */ /* indexed but not stored */
doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()), Field *titleField = new Field((const wchar_t*)("unaccentedTitle"),
Field::STORE_NO | Field::INDEX_TOKENIZED)); (const wchar_t*)(unaccentedTitle.c_str()),
doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()), Field::STORE_NO | Field::INDEX_TOKENIZED);
Field::STORE_NO | Field::INDEX_TOKENIZED)); titleField->setBoost(getTitleBoostFactor(content.size()));
doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()), doc.add(*titleField);
Field *keywordsField = new Field((const wchar_t*)("keywords"),
(const wchar_t*)(keywords.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED);
keywordsField->setBoost(keywordsBoostFactor);
doc.add(*keywordsField);
doc.add(*_CLNEW Field((const wchar_t*)("content"),
(const wchar_t*)(content.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED)); Field::STORE_NO | Field::INDEX_TOKENIZED));
/* Add the document to the index */
this->writer->addDocument(&doc); this->writer->addDocument(&doc);
} }

View File

@ -19,7 +19,8 @@ namespace kiwix {
Indexer::Indexer(const string &zimFilePath) Indexer::Indexer(const string &zimFilePath)
: zimFileHandler(NULL), : zimFileHandler(NULL),
articleCount(0), articleCount(0),
stepSize(0) { stepSize(0),
keywordsBoostFactor(3) {
/* Open the ZIM file */ /* Open the ZIM file */
this->zimFileHandler = new zim::File(zimFilePath); this->zimFileHandler = new zim::File(zimFilePath);

View File

@ -50,6 +50,12 @@ namespace kiwix {
/* Others */ /* Others */
unsigned int articleCount; unsigned int articleCount;
float stepSize; float stepSize;
/* Boost factor */
const unsigned int keywordsBoostFactor;
inline const unsigned int getTitleBoostFactor(const unsigned int contentLength) {
return contentLength / 500 + 1;
}
}; };
} }

View File

@ -46,12 +46,12 @@ namespace kiwix {
/* Index the title */ /* Index the title */
if (!unaccentedTitle.empty()) { if (!unaccentedTitle.empty()) {
this->indexer.index_text_without_positions(unaccentedTitle, content.size() / 500 + 1); this->indexer.index_text_without_positions(unaccentedTitle, this->getTitleBoostFactor(content.size()));
} }
/* Index the keywords */ /* Index the keywords */
if (!keywords.empty()) { if (!keywords.empty()) {
this->indexer.index_text_without_positions(keywords, 3); this->indexer.index_text_without_positions(keywords, keywordsBoostFactor);
} }
/* Index the content */ /* Index the content */