+ introduce standart boost factor for title & keywords during the index process

This commit is contained in:
kelson42 2010-11-05 14:51:36 +00:00
parent 4484623e67
commit e48d5b5a24
4 changed files with 29 additions and 11 deletions

View File

@ -20,19 +20,30 @@ namespace kiwix {
Document doc;
/* Not indexed */
/* Not indexed but stored */
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
/* indexed */
doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()),
/* indexed but not stored */
Field *titleField = new Field((const wchar_t*)("unaccentedTitle"),
(const wchar_t*)(unaccentedTitle.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED);
titleField->setBoost(getTitleBoostFactor(content.size()));
doc.add(*titleField);
Field *keywordsField = new Field((const wchar_t*)("keywords"),
(const wchar_t*)(keywords.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED);
keywordsField->setBoost(keywordsBoostFactor);
doc.add(*keywordsField);
doc.add(*_CLNEW Field((const wchar_t*)("content"),
(const wchar_t*)(content.c_str()),
Field::STORE_NO | Field::INDEX_TOKENIZED));
/* Add the document to the index */
this->writer->addDocument(&doc);
}

View File

@ -19,7 +19,8 @@ namespace kiwix {
Indexer::Indexer(const string &zimFilePath)
: zimFileHandler(NULL),
articleCount(0),
stepSize(0) {
stepSize(0),
keywordsBoostFactor(3) {
/* Open the ZIM file */
this->zimFileHandler = new zim::File(zimFilePath);

View File

@ -32,7 +32,7 @@ namespace kiwix {
const string &content) = 0;
virtual void indexNextPercentPost() = 0;
virtual void stopIndexing() = 0;
/* ZIM file handling */
zim::File* zimFileHandler;
zim::size_type firstArticleOffset;
@ -50,6 +50,12 @@ namespace kiwix {
/* Others */
unsigned int articleCount;
float stepSize;
/* Boost factor */
const unsigned int keywordsBoostFactor;
inline const unsigned int getTitleBoostFactor(const unsigned int contentLength) {
return contentLength / 500 + 1;
}
};
}

View File

@ -46,12 +46,12 @@ namespace kiwix {
/* Index the title */
if (!unaccentedTitle.empty()) {
this->indexer.index_text_without_positions(unaccentedTitle, content.size() / 500 + 1);
this->indexer.index_text_without_positions(unaccentedTitle, this->getTitleBoostFactor(content.size()));
}
/* Index the keywords */
if (!keywords.empty()) {
this->indexer.index_text_without_positions(keywords, 3);
this->indexer.index_text_without_positions(keywords, keywordsBoostFactor);
}
/* Index the content */