mirror of https://github.com/kiwix/libkiwix.git
* NEW: Indexing progression more accurate
This commit is contained in:
parent
49ec0b5652
commit
1acb7b4d29
|
@ -72,9 +72,15 @@ namespace kiwix {
|
||||||
|
|
||||||
/* Get the number of article to index */
|
/* Get the number of article to index */
|
||||||
kiwix::Reader reader(self->getZimPath());
|
kiwix::Reader reader(self->getZimPath());
|
||||||
unsigned int articleCount = reader.getArticleCount();
|
unsigned int articleCount = reader.getGlobalCount();
|
||||||
self->setArticleCount(articleCount);
|
self->setArticleCount(articleCount);
|
||||||
|
|
||||||
|
/* Progression */
|
||||||
|
unsigned int readArticleCount = 0;
|
||||||
|
unsigned int currentProgression = 0;
|
||||||
|
self->setProgression(currentProgression);
|
||||||
|
unsigned int tmpProgression;
|
||||||
|
|
||||||
/* StopWords */
|
/* StopWords */
|
||||||
self->readStopWords(reader.getLanguage());
|
self->readStopWords(reader.getLanguage());
|
||||||
|
|
||||||
|
@ -96,7 +102,15 @@ namespace kiwix {
|
||||||
self->pushToParseQueue(token);
|
self->pushToParseQueue(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
currentOffset++;
|
readArticleCount += 1;
|
||||||
|
currentOffset += 1;
|
||||||
|
|
||||||
|
/* Update the progression counter (in percent) */
|
||||||
|
tmpProgression = (unsigned int)((float)readArticleCount/(float)articleCount*100 - 1);
|
||||||
|
if (tmpProgression > currentProgression) {
|
||||||
|
currentProgression = tmpProgression;
|
||||||
|
self->setProgression(currentProgression);
|
||||||
|
}
|
||||||
|
|
||||||
/* Test if the thread should be cancelled */
|
/* Test if the thread should be cancelled */
|
||||||
pthread_testcancel();
|
pthread_testcancel();
|
||||||
|
@ -196,11 +210,8 @@ namespace kiwix {
|
||||||
void *Indexer::indexArticles(void *ptr) {
|
void *Indexer::indexArticles(void *ptr) {
|
||||||
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL);
|
||||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||||
|
unsigned int indexedArticleCount = 0;
|
||||||
indexerToken token;
|
indexerToken token;
|
||||||
unsigned indexedArticleCount = 0;
|
|
||||||
unsigned int articleCount = self->getArticleCount();
|
|
||||||
unsigned int currentProgression = self->getProgression();
|
|
||||||
unsigned int tmpProgression;
|
|
||||||
|
|
||||||
self->indexingPrelude(self->getIndexPath());
|
self->indexingPrelude(self->getIndexPath());
|
||||||
|
|
||||||
|
@ -217,13 +228,6 @@ namespace kiwix {
|
||||||
|
|
||||||
indexedArticleCount += 1;
|
indexedArticleCount += 1;
|
||||||
|
|
||||||
/* Update the progression counter (in percent) */
|
|
||||||
tmpProgression = (unsigned int)((float)indexedArticleCount/(float)articleCount*100);
|
|
||||||
if (tmpProgression > currentProgression) {
|
|
||||||
currentProgression = tmpProgression;
|
|
||||||
self->setProgression(currentProgression);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Make a hard-disk flush every 10.000 articles */
|
/* Make a hard-disk flush every 10.000 articles */
|
||||||
if (indexedArticleCount % 5000 == 0) {
|
if (indexedArticleCount % 5000 == 0) {
|
||||||
self->flush();
|
self->flush();
|
||||||
|
@ -283,6 +287,7 @@ namespace kiwix {
|
||||||
#else
|
#else
|
||||||
usleep(500000);
|
usleep(500000);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (this->getVerboseFlag()) {
|
if (this->getVerboseFlag()) {
|
||||||
std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl;
|
std::cout << "Waiting... ToParseQueue is empty for now..." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
|
@ -119,6 +119,7 @@ namespace kiwix {
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Get the count of medias content in the ZIM file */
|
||||||
unsigned int Reader::getMediaCount() {
|
unsigned int Reader::getMediaCount() {
|
||||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||||
unsigned int counter = 0;
|
unsigned int counter = 0;
|
||||||
|
@ -144,6 +145,11 @@ namespace kiwix {
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Get the total of all items of a ZIM file, redirects included */
|
||||||
|
unsigned int Reader::getGlobalCount() {
|
||||||
|
return this->zimFileHandler->getCountArticles();
|
||||||
|
}
|
||||||
|
|
||||||
/* Return the UID of the ZIM file */
|
/* Return the UID of the ZIM file */
|
||||||
string Reader::getId() {
|
string Reader::getId() {
|
||||||
std::ostringstream s;
|
std::ostringstream s;
|
||||||
|
|
|
@ -46,6 +46,7 @@ namespace kiwix {
|
||||||
void reset();
|
void reset();
|
||||||
unsigned int getArticleCount();
|
unsigned int getArticleCount();
|
||||||
unsigned int getMediaCount();
|
unsigned int getMediaCount();
|
||||||
|
unsigned int getGlobalCount();
|
||||||
string getId();
|
string getId();
|
||||||
string getRandomPageUrl();
|
string getRandomPageUrl();
|
||||||
string getFirstPageUrl();
|
string getFirstPageUrl();
|
||||||
|
|
Loading…
Reference in New Issue