From 1acb7b4d29167a7699341c9db2e24ba04a8e6d28 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Fri, 19 Oct 2012 18:47:02 +0000 Subject: [PATCH] * NEW: Indexing progression more accurate --- src/common/kiwix/indexer.cpp | 35 ++++++++++++++++++++--------------- src/common/kiwix/reader.cpp | 6 ++++++ src/common/kiwix/reader.h | 1 + 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index 444685163..d84951d7f 100644 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -72,9 +72,15 @@ namespace kiwix { /* Get the number of article to index */ kiwix::Reader reader(self->getZimPath()); - unsigned int articleCount = reader.getArticleCount(); + unsigned int articleCount = reader.getGlobalCount(); self->setArticleCount(articleCount); + /* Progression */ + unsigned int readArticleCount = 0; + unsigned int currentProgression = 0; + self->setProgression(currentProgression); + unsigned int tmpProgression; + /* StopWords */ self->readStopWords(reader.getLanguage()); @@ -95,8 +101,16 @@ namespace kiwix { token.content = string(currentArticle.getData().data(), currentArticle.getData().size()); self->pushToParseQueue(token); } - - currentOffset++; + + readArticleCount += 1; + currentOffset += 1; + + /* Update the progression counter (in percent) */ + tmpProgression = (unsigned int)((float)readArticleCount/(float)articleCount*100 - 1); + if (tmpProgression > currentProgression) { + currentProgression = tmpProgression; + self->setProgression(currentProgression); + } /* Test if the thread should be cancelled */ pthread_testcancel(); @@ -196,11 +210,8 @@ namespace kiwix { void *Indexer::indexArticles(void *ptr) { pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL); kiwix::Indexer *self = (kiwix::Indexer *)ptr; + unsigned int indexedArticleCount = 0; indexerToken token; - unsigned indexedArticleCount = 0; - unsigned int articleCount = self->getArticleCount(); - unsigned int currentProgression = self->getProgression(); - unsigned int tmpProgression; self->indexingPrelude(self->getIndexPath()); @@ -214,15 +225,8 @@ namespace kiwix { token.size, token.wordCount ); - - indexedArticleCount += 1; - /* Update the progression counter (in percent) */ - tmpProgression = (unsigned int)((float)indexedArticleCount/(float)articleCount*100); - if (tmpProgression > currentProgression) { - currentProgression = tmpProgression; - self->setProgression(currentProgression); - } + indexedArticleCount += 1; /* Make a hard-disk flush every 10.000 articles */ if (indexedArticleCount % 5000 == 0) { @@ -283,6 +287,7 @@ namespace kiwix { #else usleep(500000); #endif + if (this->getVerboseFlag()) { std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl; } diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 8b063e16e..c2fa9810a 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -119,6 +119,7 @@ namespace kiwix { return counter; } + /* Get the count of medias content in the ZIM file */ unsigned int Reader::getMediaCount() { std::map counterMap = this->parseCounterMetadata(); unsigned int counter = 0; @@ -144,6 +145,11 @@ namespace kiwix { return counter; } + /* Get the total of all items of a ZIM file, redirects included */ + unsigned int Reader::getGlobalCount() { + return this->zimFileHandler->getCountArticles(); + } + /* Return the UID of the ZIM file */ string Reader::getId() { std::ostringstream s; diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index fa658d291..0f51f3b3a 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -46,6 +46,7 @@ namespace kiwix { void reset(); unsigned int getArticleCount(); unsigned int getMediaCount(); + unsigned int getGlobalCount(); string getId(); string getRandomPageUrl(); string getFirstPageUrl();