* NEW: Indexing progression more accurate

This commit is contained in:
kelson42 2012-10-19 18:47:02 +00:00
parent 49ec0b5652
commit 1acb7b4d29
3 changed files with 27 additions and 15 deletions

View File

@ -72,9 +72,15 @@ namespace kiwix {
/* Get the number of article to index */ /* Get the number of article to index */
kiwix::Reader reader(self->getZimPath()); kiwix::Reader reader(self->getZimPath());
unsigned int articleCount = reader.getArticleCount(); unsigned int articleCount = reader.getGlobalCount();
self->setArticleCount(articleCount); self->setArticleCount(articleCount);
/* Progression */
unsigned int readArticleCount = 0;
unsigned int currentProgression = 0;
self->setProgression(currentProgression);
unsigned int tmpProgression;
/* StopWords */ /* StopWords */
self->readStopWords(reader.getLanguage()); self->readStopWords(reader.getLanguage());
@ -96,7 +102,15 @@ namespace kiwix {
self->pushToParseQueue(token); self->pushToParseQueue(token);
} }
currentOffset++; readArticleCount += 1;
currentOffset += 1;
/* Update the progression counter (in percent) */
tmpProgression = (unsigned int)((float)readArticleCount/(float)articleCount*100 - 1);
if (tmpProgression > currentProgression) {
currentProgression = tmpProgression;
self->setProgression(currentProgression);
}
/* Test if the thread should be cancelled */ /* Test if the thread should be cancelled */
pthread_testcancel(); pthread_testcancel();
@ -196,11 +210,8 @@ namespace kiwix {
void *Indexer::indexArticles(void *ptr) { void *Indexer::indexArticles(void *ptr) {
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL); pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
kiwix::Indexer *self = (kiwix::Indexer *)ptr; kiwix::Indexer *self = (kiwix::Indexer *)ptr;
unsigned int indexedArticleCount = 0;
indexerToken token; indexerToken token;
unsigned indexedArticleCount = 0;
unsigned int articleCount = self->getArticleCount();
unsigned int currentProgression = self->getProgression();
unsigned int tmpProgression;
self->indexingPrelude(self->getIndexPath()); self->indexingPrelude(self->getIndexPath());
@ -217,13 +228,6 @@ namespace kiwix {
indexedArticleCount += 1; indexedArticleCount += 1;
/* Update the progression counter (in percent) */
tmpProgression = (unsigned int)((float)indexedArticleCount/(float)articleCount*100);
if (tmpProgression > currentProgression) {
currentProgression = tmpProgression;
self->setProgression(currentProgression);
}
/* Make a hard-disk flush every 10.000 articles */ /* Make a hard-disk flush every 10.000 articles */
if (indexedArticleCount % 5000 == 0) { if (indexedArticleCount % 5000 == 0) {
self->flush(); self->flush();
@ -283,6 +287,7 @@ namespace kiwix {
#else #else
usleep(500000); usleep(500000);
#endif #endif
if (this->getVerboseFlag()) { if (this->getVerboseFlag()) {
std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl; std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl;
} }

View File

@ -119,6 +119,7 @@ namespace kiwix {
return counter; return counter;
} }
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() { unsigned int Reader::getMediaCount() {
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata(); std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
unsigned int counter = 0; unsigned int counter = 0;
@ -144,6 +145,11 @@ namespace kiwix {
return counter; return counter;
} }
/* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() {
return this->zimFileHandler->getCountArticles();
}
/* Return the UID of the ZIM file */ /* Return the UID of the ZIM file */
string Reader::getId() { string Reader::getId() {
std::ostringstream s; std::ostringstream s;

View File

@ -46,6 +46,7 @@ namespace kiwix {
void reset(); void reset();
unsigned int getArticleCount(); unsigned int getArticleCount();
unsigned int getMediaCount(); unsigned int getMediaCount();
unsigned int getGlobalCount();
string getId(); string getId();
string getRandomPageUrl(); string getRandomPageUrl();
string getFirstPageUrl(); string getFirstPageUrl();