+ start of the work on the multithreading indexer

This commit is contained in:
kelson42 2012-03-28 14:59:57 +00:00
parent 81424fedf3
commit 20daa92b65
2 changed files with 81 additions and 9 deletions

View File

@ -59,18 +59,76 @@ namespace kiwix {
/* Compute few things */
this->articleCount = this->zimFileHandler->getNamespaceCount('A');
this->stepSize = (float)this->articleCount / (float)100;
/* Thread mgmt */
this->runningStatus = 0;
}
void *Indexer::extractArticles(void *ptr) {
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
self->incrementRunningStatus();
unsigned int startOffset = self->zimFileHandler->getNamespaceBeginOffset('A');
unsigned int endOffset = self->zimFileHandler->getNamespaceEndOffset('A');
/* Goes trough all articles */
unsigned int currentOffset = startOffset;
zim::Article currentArticle;
while (currentOffset <= endOffset) {
/* Test if the thread should be cancelled */
pthread_testcancel();
/* Redirects are not indexed */
do {
currentArticle = self->zimFileHandler->getArticle(currentOffset++);
} while (currentArticle.isRedirect() && currentOffset++ != endOffset);
cout << currentArticle.getTitle() << endl;
}
self->decrementRunningStatus();
pthread_exit(NULL);
return NULL;
}
void *Indexer::parseArticles(void *ptr) {
pthread_exit(NULL);
return NULL;
}
void *Indexer::writeIndex(void *ptr) {
pthread_exit(NULL);
return NULL;
}
bool Indexer::start() {
pthread_create(&(this->articleExtracter), NULL, Indexer::extractArticles, ( void *)this);
pthread_detach(this->articleExtracter);
cout << "end" << endl;
return true;
}
bool Indexer::stop() {
return true;
return true;
}
void Indexer::incrementRunningStatus() {
this->runningStatus++;
}
void Indexer::decrementRunningStatus() {
this->runningStatus--;
}
unsigned int Indexer::getRunningStatus() {
return this->runningStatus;
}
bool Indexer::isRunning() {
return true;
return this->runningStatus > 0;
}
void Indexer::setCurrentArticleOffset(unsigned int offset) {
@ -82,6 +140,8 @@ namespace kiwix {
}
unsigned int Indexer::getProgression() {
unsigned int progression = 0;
return progression;
}
/* Read the file containing the stopwords */

View File

@ -27,6 +27,7 @@
#include <sstream>
#include <xapian.h>
#include <pthread.h>
#include <unaccent.h>
#include <zim/file.h>
#include <zim/article.h>
@ -42,7 +43,23 @@ namespace kiwix {
public:
Indexer(const string &zimFilePath);
bool indexNextPercent(const bool &verbose = false);
bool setZimFilePath(const string &zimFilePath);
bool start();
bool stop();
bool isRunning();
unsigned int getProgression();
private:
pthread_t articleExtracter, articleParser, indexWriter;
static void *extractArticles(void *ptr);
static void *parseArticles(void *ptr);
static void *writeIndex(void *ptr);
unsigned int runningStatus;
void incrementRunningStatus();
void decrementRunningStatus();
unsigned int getRunningStatus();
protected:
virtual void indexNextPercentPre() = 0;
virtual void indexNextArticle(const string &url,
@ -56,14 +73,9 @@ namespace kiwix {
virtual void indexNextPercentPost() = 0;
virtual void stopIndexing() = 0;
/* General */
bool setZimFilePath(const string &zimFilePath);
bool start();
bool stop();
bool isRunning();
/* Article offset */
void setCurrentArticleOffset(unsigned int offset);
unsigned int getCurrentArticleOffset();
unsigned int getProgression();
/* ZIM file handling */
zim::File* zimFileHandler;