mirror of https://github.com/kiwix/libkiwix.git
+ start of the work on the multithreading indexer
This commit is contained in:
parent
81424fedf3
commit
20daa92b65
|
@ -59,18 +59,76 @@ namespace kiwix {
|
|||
/* Compute few things */
|
||||
this->articleCount = this->zimFileHandler->getNamespaceCount('A');
|
||||
this->stepSize = (float)this->articleCount / (float)100;
|
||||
|
||||
/* Thread mgmt */
|
||||
this->runningStatus = 0;
|
||||
}
|
||||
|
||||
|
||||
void *Indexer::extractArticles(void *ptr) {
|
||||
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||
self->incrementRunningStatus();
|
||||
unsigned int startOffset = self->zimFileHandler->getNamespaceBeginOffset('A');
|
||||
unsigned int endOffset = self->zimFileHandler->getNamespaceEndOffset('A');
|
||||
|
||||
/* Goes trough all articles */
|
||||
unsigned int currentOffset = startOffset;
|
||||
zim::Article currentArticle;
|
||||
|
||||
while (currentOffset <= endOffset) {
|
||||
/* Test if the thread should be cancelled */
|
||||
pthread_testcancel();
|
||||
|
||||
/* Redirects are not indexed */
|
||||
do {
|
||||
currentArticle = self->zimFileHandler->getArticle(currentOffset++);
|
||||
} while (currentArticle.isRedirect() && currentOffset++ != endOffset);
|
||||
|
||||
cout << currentArticle.getTitle() << endl;
|
||||
|
||||
}
|
||||
|
||||
self->decrementRunningStatus();
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *Indexer::parseArticles(void *ptr) {
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *Indexer::writeIndex(void *ptr) {
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Indexer::start() {
|
||||
pthread_create(&(this->articleExtracter), NULL, Indexer::extractArticles, ( void *)this);
|
||||
pthread_detach(this->articleExtracter);
|
||||
cout << "end" << endl;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Indexer::stop() {
|
||||
return true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Indexer::incrementRunningStatus() {
|
||||
this->runningStatus++;
|
||||
}
|
||||
|
||||
void Indexer::decrementRunningStatus() {
|
||||
this->runningStatus--;
|
||||
}
|
||||
|
||||
unsigned int Indexer::getRunningStatus() {
|
||||
return this->runningStatus;
|
||||
}
|
||||
|
||||
bool Indexer::isRunning() {
|
||||
return true;
|
||||
return this->runningStatus > 0;
|
||||
}
|
||||
|
||||
void Indexer::setCurrentArticleOffset(unsigned int offset) {
|
||||
|
@ -82,6 +140,8 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
unsigned int Indexer::getProgression() {
|
||||
unsigned int progression = 0;
|
||||
return progression;
|
||||
}
|
||||
|
||||
/* Read the file containing the stopwords */
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <sstream>
|
||||
|
||||
#include <xapian.h>
|
||||
#include <pthread.h>
|
||||
#include <unaccent.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
|
@ -42,7 +43,23 @@ namespace kiwix {
|
|||
public:
|
||||
Indexer(const string &zimFilePath);
|
||||
bool indexNextPercent(const bool &verbose = false);
|
||||
bool setZimFilePath(const string &zimFilePath);
|
||||
bool start();
|
||||
bool stop();
|
||||
bool isRunning();
|
||||
unsigned int getProgression();
|
||||
|
||||
private:
|
||||
pthread_t articleExtracter, articleParser, indexWriter;
|
||||
static void *extractArticles(void *ptr);
|
||||
static void *parseArticles(void *ptr);
|
||||
static void *writeIndex(void *ptr);
|
||||
|
||||
unsigned int runningStatus;
|
||||
void incrementRunningStatus();
|
||||
void decrementRunningStatus();
|
||||
unsigned int getRunningStatus();
|
||||
|
||||
protected:
|
||||
virtual void indexNextPercentPre() = 0;
|
||||
virtual void indexNextArticle(const string &url,
|
||||
|
@ -56,14 +73,9 @@ namespace kiwix {
|
|||
virtual void indexNextPercentPost() = 0;
|
||||
virtual void stopIndexing() = 0;
|
||||
|
||||
/* General */
|
||||
bool setZimFilePath(const string &zimFilePath);
|
||||
bool start();
|
||||
bool stop();
|
||||
bool isRunning();
|
||||
/* Article offset */
|
||||
void setCurrentArticleOffset(unsigned int offset);
|
||||
unsigned int getCurrentArticleOffset();
|
||||
unsigned int getProgression();
|
||||
|
||||
/* ZIM file handling */
|
||||
zim::File* zimFileHandler;
|
||||
|
|
Loading…
Reference in New Issue