mirror of https://github.com/kiwix/libkiwix.git
+ start of the work on the multithreading indexer
This commit is contained in:
parent
81424fedf3
commit
20daa92b65
|
@ -59,18 +59,76 @@ namespace kiwix {
|
||||||
/* Compute few things */
|
/* Compute few things */
|
||||||
this->articleCount = this->zimFileHandler->getNamespaceCount('A');
|
this->articleCount = this->zimFileHandler->getNamespaceCount('A');
|
||||||
this->stepSize = (float)this->articleCount / (float)100;
|
this->stepSize = (float)this->articleCount / (float)100;
|
||||||
|
|
||||||
|
/* Thread mgmt */
|
||||||
|
this->runningStatus = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void *Indexer::extractArticles(void *ptr) {
|
||||||
|
kiwix::Indexer *self = (kiwix::Indexer *)ptr;
|
||||||
|
self->incrementRunningStatus();
|
||||||
|
unsigned int startOffset = self->zimFileHandler->getNamespaceBeginOffset('A');
|
||||||
|
unsigned int endOffset = self->zimFileHandler->getNamespaceEndOffset('A');
|
||||||
|
|
||||||
|
/* Goes trough all articles */
|
||||||
|
unsigned int currentOffset = startOffset;
|
||||||
|
zim::Article currentArticle;
|
||||||
|
|
||||||
|
while (currentOffset <= endOffset) {
|
||||||
|
/* Test if the thread should be cancelled */
|
||||||
|
pthread_testcancel();
|
||||||
|
|
||||||
|
/* Redirects are not indexed */
|
||||||
|
do {
|
||||||
|
currentArticle = self->zimFileHandler->getArticle(currentOffset++);
|
||||||
|
} while (currentArticle.isRedirect() && currentOffset++ != endOffset);
|
||||||
|
|
||||||
|
cout << currentArticle.getTitle() << endl;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
self->decrementRunningStatus();
|
||||||
|
pthread_exit(NULL);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *Indexer::parseArticles(void *ptr) {
|
||||||
|
pthread_exit(NULL);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *Indexer::writeIndex(void *ptr) {
|
||||||
|
pthread_exit(NULL);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
bool Indexer::start() {
|
bool Indexer::start() {
|
||||||
|
pthread_create(&(this->articleExtracter), NULL, Indexer::extractArticles, ( void *)this);
|
||||||
|
pthread_detach(this->articleExtracter);
|
||||||
|
cout << "end" << endl;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Indexer::stop() {
|
bool Indexer::stop() {
|
||||||
return true;
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Indexer::incrementRunningStatus() {
|
||||||
|
this->runningStatus++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Indexer::decrementRunningStatus() {
|
||||||
|
this->runningStatus--;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int Indexer::getRunningStatus() {
|
||||||
|
return this->runningStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Indexer::isRunning() {
|
bool Indexer::isRunning() {
|
||||||
return true;
|
return this->runningStatus > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Indexer::setCurrentArticleOffset(unsigned int offset) {
|
void Indexer::setCurrentArticleOffset(unsigned int offset) {
|
||||||
|
@ -82,6 +140,8 @@ namespace kiwix {
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int Indexer::getProgression() {
|
unsigned int Indexer::getProgression() {
|
||||||
|
unsigned int progression = 0;
|
||||||
|
return progression;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read the file containing the stopwords */
|
/* Read the file containing the stopwords */
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
|
||||||
#include <xapian.h>
|
#include <xapian.h>
|
||||||
|
#include <pthread.h>
|
||||||
#include <unaccent.h>
|
#include <unaccent.h>
|
||||||
#include <zim/file.h>
|
#include <zim/file.h>
|
||||||
#include <zim/article.h>
|
#include <zim/article.h>
|
||||||
|
@ -42,7 +43,23 @@ namespace kiwix {
|
||||||
public:
|
public:
|
||||||
Indexer(const string &zimFilePath);
|
Indexer(const string &zimFilePath);
|
||||||
bool indexNextPercent(const bool &verbose = false);
|
bool indexNextPercent(const bool &verbose = false);
|
||||||
|
bool setZimFilePath(const string &zimFilePath);
|
||||||
|
bool start();
|
||||||
|
bool stop();
|
||||||
|
bool isRunning();
|
||||||
|
unsigned int getProgression();
|
||||||
|
|
||||||
|
private:
|
||||||
|
pthread_t articleExtracter, articleParser, indexWriter;
|
||||||
|
static void *extractArticles(void *ptr);
|
||||||
|
static void *parseArticles(void *ptr);
|
||||||
|
static void *writeIndex(void *ptr);
|
||||||
|
|
||||||
|
unsigned int runningStatus;
|
||||||
|
void incrementRunningStatus();
|
||||||
|
void decrementRunningStatus();
|
||||||
|
unsigned int getRunningStatus();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void indexNextPercentPre() = 0;
|
virtual void indexNextPercentPre() = 0;
|
||||||
virtual void indexNextArticle(const string &url,
|
virtual void indexNextArticle(const string &url,
|
||||||
|
@ -56,14 +73,9 @@ namespace kiwix {
|
||||||
virtual void indexNextPercentPost() = 0;
|
virtual void indexNextPercentPost() = 0;
|
||||||
virtual void stopIndexing() = 0;
|
virtual void stopIndexing() = 0;
|
||||||
|
|
||||||
/* General */
|
/* Article offset */
|
||||||
bool setZimFilePath(const string &zimFilePath);
|
|
||||||
bool start();
|
|
||||||
bool stop();
|
|
||||||
bool isRunning();
|
|
||||||
void setCurrentArticleOffset(unsigned int offset);
|
void setCurrentArticleOffset(unsigned int offset);
|
||||||
unsigned int getCurrentArticleOffset();
|
unsigned int getCurrentArticleOffset();
|
||||||
unsigned int getProgression();
|
|
||||||
|
|
||||||
/* ZIM file handling */
|
/* ZIM file handling */
|
||||||
zim::File* zimFileHandler;
|
zim::File* zimFileHandler;
|
||||||
|
|
Loading…
Reference in New Issue