From 801e6226444dd3dc80fab6190ce76e9ffe8dca72 Mon Sep 17 00:00:00 2001 From: Chris Li Date: Tue, 7 Jun 2016 14:32:11 -0400 Subject: [PATCH] add "eng" to stop word map + pragma mark in indexer.cpp + progress calculation --- src/common/kiwix/indexer.cpp | 51 +++++++++++++++++++++++++----------- src/common/resourceTools.cpp | 0 src/common/resourceTools.h | 3 ++- 3 files changed, 37 insertions(+), 17 deletions(-) mode change 100644 => 100755 src/common/resourceTools.cpp mode change 100644 => 100755 src/common/resourceTools.h diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index 53aa0236f..2bd442fd8 100755 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -71,8 +71,14 @@ namespace kiwix { while (getline(file, stopWord, '\n')) { this->stopWords.push_back(stopWord); } + + if (this->verboseFlag) { + std::cout << "Read stop words, lang code:" << languageCode << ", count:" << this->stopWords.size() << std::endl; + } } - + +#pragma mark - Extractor + /* Article extractor methods */ void *Indexer::extractArticles(void *ptr) { pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL); @@ -89,7 +95,7 @@ namespace kiwix { unsigned int readArticleCount = 0; unsigned int currentProgression = 0; self->setProgression(currentProgression); - unsigned int tmpProgression; + unsigned int newProgress; /* StopWords */ self->readStopWords(reader.getLanguage()); @@ -101,35 +107,34 @@ namespace kiwix { zim::Article currentArticle; while (currentOffset < lastOffset) { - if (self->getVerboseFlag()) { - std::cout << "currentOffset:" << currentOffset << " lastOffset:" << lastOffset - << " readArticleCount:" << readArticleCount << " totalArticleCount:" << articleCount <getVerboseFlag()) { +// std::cout << "currentOffset:" << currentOffset << " lastOffset:" << lastOffset +// << " readArticleCount:" << readArticleCount << " totalArticleCount:" << articleCount <getArticle(currentOffset); if (!currentArticle.isRedirect()) { - /* Add articles to the queue */ - indexerToken token; + /* Add articles to the queue */ + indexerToken token; token.title = currentArticle.getTitle(); token.url = currentArticle.getLongUrl(); token.content = string(currentArticle.getData().data(), currentArticle.getData().size()); self->pushToParseQueue(token); readArticleCount += 1; + + /* Update progress */ if (self->progressCallback) { self->progressCallback(readArticleCount, articleCount); } + newProgress = (unsigned int)((float)readArticleCount / (float)articleCount * 100); + if (newProgress != currentProgression) { + self->setProgression(newProgress); + } } currentOffset += 1; - /* Update the progression counter (in percent) */ - tmpProgression = (unsigned int)((float)readArticleCount/(float)articleCount*100 - 1); - if (tmpProgression > currentProgression) { - currentProgression = tmpProgression; - self->setProgression(currentProgression); - } - /* Test if the thread should be cancelled */ pthread_testcancel(); } @@ -152,6 +157,8 @@ namespace kiwix { return retVal; } +#pragma mark - Parser + /* Article parser methods */ void *Indexer::parseArticles(void *ptr) { pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL); @@ -223,6 +230,8 @@ namespace kiwix { pthread_mutex_unlock(&articleParserRunningMutex); return retVal; } + +#pragma mark - Indexer /* Article indexer methods */ void *Indexer::indexArticles(void *ptr) { @@ -280,6 +289,8 @@ namespace kiwix { pthread_mutex_unlock(&articleIndexerRunningMutex); return retVal; } + +#pragma mark - Parse Queue /* ToParseQueue methods */ bool Indexer::isToParseQueueEmpty() { @@ -317,6 +328,8 @@ namespace kiwix { return true; } + +#pragma mark - Index Queue /* ToIndexQueue methods */ bool Indexer::isToIndexQueueEmpty() { @@ -354,6 +367,8 @@ namespace kiwix { return true; } + +#pragma mark - Properties Getter & Setter /* ZIM & Index methods */ void Indexer::setZimPath(const string path) { @@ -420,6 +435,8 @@ namespace kiwix { pthread_mutex_unlock(&zimIdMutex); return retVal; } + +#pragma mark - Status Management /* Manage */ bool Indexer::start(const string zimPath, const string indexPath, ProgressCallback callback) { @@ -493,6 +510,8 @@ namespace kiwix { return true; } + +#pragma mark - verbose /* Manage the verboseFlag */ void Indexer::setVerboseFlag(const bool value) { diff --git a/src/common/resourceTools.cpp b/src/common/resourceTools.cpp old mode 100644 new mode 100755 diff --git a/src/common/resourceTools.h b/src/common/resourceTools.h old mode 100644 new mode 100755 index 32e06efec..daff8121a --- a/src/common/resourceTools.h +++ b/src/common/resourceTools.h @@ -39180,7 +39180,8 @@ const unsigned char server_include_html_part[]={ static std::map > createResourceMap() { std::map > m; - m["stopwords/en"] = std::pair (stopwords_en, sizeof stopwords_en); + m["stopwords/en"] = std::pair (stopwords_en, sizeof stopwords_en); + m["stopwords/eng"] = std::pair (stopwords_en, sizeof stopwords_en); m["stopwords/fra"] = std::pair (stopwords_fra, sizeof stopwords_fra); m["stopwords/he"] = std::pair (stopwords_he, sizeof stopwords_he); m["results.tmpl"] = std::pair (results_tmpl, sizeof results_tmpl);