From 20daa92b65bc0e81db1978aeb2f6d6d3d7a277da Mon Sep 17 00:00:00 2001 From: kelson42 Date: Wed, 28 Mar 2012 14:59:57 +0000 Subject: [PATCH] + start of the work on the multithreading indexer --- src/common/kiwix/indexer.cpp | 66 ++++++++++++++++++++++++++++++++++-- src/common/kiwix/indexer.h | 24 +++++++++---- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index c2941cd91..02e67a275 100644 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -59,18 +59,76 @@ namespace kiwix { /* Compute few things */ this->articleCount = this->zimFileHandler->getNamespaceCount('A'); this->stepSize = (float)this->articleCount / (float)100; + + /* Thread mgmt */ + this->runningStatus = 0; } - + + void *Indexer::extractArticles(void *ptr) { + kiwix::Indexer *self = (kiwix::Indexer *)ptr; + self->incrementRunningStatus(); + unsigned int startOffset = self->zimFileHandler->getNamespaceBeginOffset('A'); + unsigned int endOffset = self->zimFileHandler->getNamespaceEndOffset('A'); + + /* Goes trough all articles */ + unsigned int currentOffset = startOffset; + zim::Article currentArticle; + + while (currentOffset <= endOffset) { + /* Test if the thread should be cancelled */ + pthread_testcancel(); + + /* Redirects are not indexed */ + do { + currentArticle = self->zimFileHandler->getArticle(currentOffset++); + } while (currentArticle.isRedirect() && currentOffset++ != endOffset); + + cout << currentArticle.getTitle() << endl; + + } + + self->decrementRunningStatus(); + pthread_exit(NULL); + return NULL; + } + + void *Indexer::parseArticles(void *ptr) { + pthread_exit(NULL); + return NULL; + } + + void *Indexer::writeIndex(void *ptr) { + pthread_exit(NULL); + return NULL; + } + bool Indexer::start() { + pthread_create(&(this->articleExtracter), NULL, Indexer::extractArticles, ( void *)this); + pthread_detach(this->articleExtracter); + cout << "end" << endl; + return true; } bool Indexer::stop() { - return true; + + return true; + } + + void Indexer::incrementRunningStatus() { + this->runningStatus++; + } + + void Indexer::decrementRunningStatus() { + this->runningStatus--; + } + + unsigned int Indexer::getRunningStatus() { + return this->runningStatus; } bool Indexer::isRunning() { - return true; + return this->runningStatus > 0; } void Indexer::setCurrentArticleOffset(unsigned int offset) { @@ -82,6 +140,8 @@ namespace kiwix { } unsigned int Indexer::getProgression() { + unsigned int progression = 0; + return progression; } /* Read the file containing the stopwords */ diff --git a/src/common/kiwix/indexer.h b/src/common/kiwix/indexer.h index f97dc3075..31a68c744 100644 --- a/src/common/kiwix/indexer.h +++ b/src/common/kiwix/indexer.h @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,23 @@ namespace kiwix { public: Indexer(const string &zimFilePath); bool indexNextPercent(const bool &verbose = false); + bool setZimFilePath(const string &zimFilePath); + bool start(); + bool stop(); + bool isRunning(); + unsigned int getProgression(); + + private: + pthread_t articleExtracter, articleParser, indexWriter; + static void *extractArticles(void *ptr); + static void *parseArticles(void *ptr); + static void *writeIndex(void *ptr); + unsigned int runningStatus; + void incrementRunningStatus(); + void decrementRunningStatus(); + unsigned int getRunningStatus(); + protected: virtual void indexNextPercentPre() = 0; virtual void indexNextArticle(const string &url, @@ -56,14 +73,9 @@ namespace kiwix { virtual void indexNextPercentPost() = 0; virtual void stopIndexing() = 0; - /* General */ - bool setZimFilePath(const string &zimFilePath); - bool start(); - bool stop(); - bool isRunning(); + /* Article offset */ void setCurrentArticleOffset(unsigned int offset); unsigned int getCurrentArticleOffset(); - unsigned int getProgression(); /* ZIM file handling */ zim::File* zimFileHandler;