mirror of https://github.com/kiwix/libkiwix.git
+ introduce standart boost factor for title & keywords during the index process
This commit is contained in:
parent
4484623e67
commit
e48d5b5a24
|
@ -20,19 +20,30 @@ namespace kiwix {
|
|||
|
||||
Document doc;
|
||||
|
||||
/* Not indexed */
|
||||
/* Not indexed but stored */
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("title"), (const wchar_t*)(title.c_str()),
|
||||
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("url"), (const wchar_t*)(url.c_str()),
|
||||
Field::STORE_YES | Field::INDEX_UNTOKENIZED));
|
||||
|
||||
/* indexed */
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("unaccentedTitle"), (const wchar_t*)(unaccentedTitle.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("keywords"), (const wchar_t*)(keywords.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("content"), (const wchar_t*)(content.c_str()),
|
||||
/* indexed but not stored */
|
||||
Field *titleField = new Field((const wchar_t*)("unaccentedTitle"),
|
||||
(const wchar_t*)(unaccentedTitle.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED);
|
||||
titleField->setBoost(getTitleBoostFactor(content.size()));
|
||||
doc.add(*titleField);
|
||||
|
||||
Field *keywordsField = new Field((const wchar_t*)("keywords"),
|
||||
(const wchar_t*)(keywords.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED);
|
||||
keywordsField->setBoost(keywordsBoostFactor);
|
||||
doc.add(*keywordsField);
|
||||
|
||||
doc.add(*_CLNEW Field((const wchar_t*)("content"),
|
||||
(const wchar_t*)(content.c_str()),
|
||||
Field::STORE_NO | Field::INDEX_TOKENIZED));
|
||||
|
||||
/* Add the document to the index */
|
||||
this->writer->addDocument(&doc);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,8 @@ namespace kiwix {
|
|||
Indexer::Indexer(const string &zimFilePath)
|
||||
: zimFileHandler(NULL),
|
||||
articleCount(0),
|
||||
stepSize(0) {
|
||||
stepSize(0),
|
||||
keywordsBoostFactor(3) {
|
||||
|
||||
/* Open the ZIM file */
|
||||
this->zimFileHandler = new zim::File(zimFilePath);
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace kiwix {
|
|||
const string &content) = 0;
|
||||
virtual void indexNextPercentPost() = 0;
|
||||
virtual void stopIndexing() = 0;
|
||||
|
||||
|
||||
/* ZIM file handling */
|
||||
zim::File* zimFileHandler;
|
||||
zim::size_type firstArticleOffset;
|
||||
|
@ -50,6 +50,12 @@ namespace kiwix {
|
|||
/* Others */
|
||||
unsigned int articleCount;
|
||||
float stepSize;
|
||||
|
||||
/* Boost factor */
|
||||
const unsigned int keywordsBoostFactor;
|
||||
inline const unsigned int getTitleBoostFactor(const unsigned int contentLength) {
|
||||
return contentLength / 500 + 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -46,12 +46,12 @@ namespace kiwix {
|
|||
|
||||
/* Index the title */
|
||||
if (!unaccentedTitle.empty()) {
|
||||
this->indexer.index_text_without_positions(unaccentedTitle, content.size() / 500 + 1);
|
||||
this->indexer.index_text_without_positions(unaccentedTitle, this->getTitleBoostFactor(content.size()));
|
||||
}
|
||||
|
||||
/* Index the keywords */
|
||||
if (!keywords.empty()) {
|
||||
this->indexer.index_text_without_positions(keywords, 3);
|
||||
this->indexer.index_text_without_positions(keywords, keywordsBoostFactor);
|
||||
}
|
||||
|
||||
/* Index the content */
|
||||
|
|
Loading…
Reference in New Issue