From 65b4015f03ecafe891a28657e63d36348e523ca2 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sat, 7 Apr 2012 14:09:47 +0000 Subject: [PATCH] + new indexer --- src/common/kiwix/indexer.cpp | 24 ++++++++++++++++++++---- src/common/kiwix/reader.cpp | 7 +++++-- src/common/kiwix/xapianIndexer.h | 1 - 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index e114c20f3..0babb0042 100644 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -61,7 +61,9 @@ namespace kiwix { /* Get the number of article to index */ kiwix::Reader reader(self->getZimPath()); - self->setArticleCount(reader.getArticleCount()); + unsigned int articleCount = reader.getArticleCount(); + self->setArticleCount(articleCount); + cout << "Article Count from reader: " << articleCount << endl; /* Goes trough all articles */ zim::File *zimHandler = reader.getZimFileHandler(); @@ -184,7 +186,14 @@ namespace kiwix { indexerToken token; unsigned indexedArticleCount = 0; - unsigned int stepSize = ((self->getArticleCount() / 100) < 1 ? 1 : (self->getArticleCount() / 100)); + unsigned int articleCount = self->getArticleCount(); + unsigned int currentProgression = self->getProgression(); + float stepSize = articleCount / 100; + + cout << "Article count: " << articleCount << endl; + cout << "Progression step size: " << stepSize << endl; + cout << "Curent progression: " << currentProgression << endl; + self->indexingPrelude(self->getIndexPath()); while (self->popFromToIndexQueue(token)) { @@ -198,10 +207,16 @@ namespace kiwix { token.wordCount ); - if (++indexedArticleCount % stepSize == 0) { - self->setProgression(self->getProgression() + 1); + indexedArticleCount += 1; + + /* Update the progression counter (in percent) */ + if ((unsigned int)((float)indexedArticleCount/(float)articleCount*100) > currentProgression) { + self->setProgression((unsigned int)((float)indexedArticleCount/(float)articleCount*100)); + currentProgression = self->getProgression(); + cout << indexedArticleCount << " articles indexed, that means a progression of " << currentProgression << endl; } + /* Make a hard-disk flush every 10.000 articles */ if (indexedArticleCount % 10000 == 0) { self->flush(); } @@ -211,6 +226,7 @@ namespace kiwix { } self->setProgression(100); self->indexingPostlude(); + sleep(1); self->articleIndexerRunning(false); pthread_exit(NULL); return NULL; diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 2d92a1c7f..2386f8eef 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -79,6 +79,7 @@ namespace kiwix { string counterUrl = "/M/Counter"; this->getContentByUrl(counterUrl, content, contentLength, mimeType); + cout << "Counter URL: " << content << endl; stringstream ssContent(content); while(getline(ssContent, item, ';')) { @@ -99,12 +100,14 @@ namespace kiwix { std::map counterMap = this->parseCounterMetadata(); unsigned int counter = 0; - if (counterMap.empty()) + if (counterMap.empty()) { counter = this->articleCount; - else { + cout << "Article count from offsets: " << counter << endl; + } else { std::map::const_iterator it = counterMap.find("text/html"); if (it != counterMap.end()) counter = it->second; + cout << "Article count from metadata: " << counter << endl; } return counter; diff --git a/src/common/kiwix/xapianIndexer.h b/src/common/kiwix/xapianIndexer.h index 635fc80de..06fadbe7a 100644 --- a/src/common/kiwix/xapianIndexer.h +++ b/src/common/kiwix/xapianIndexer.h @@ -31,7 +31,6 @@ namespace kiwix { public: XapianIndexer(); - ~XapianIndexer(); protected: void indexingPrelude(const string &indexPath);