mirror of https://github.com/kiwix/libkiwix.git
+ new indexer
This commit is contained in:
parent
62daa9ffe5
commit
65b4015f03
|
@ -61,7 +61,9 @@ namespace kiwix {
|
||||||
|
|
||||||
/* Get the number of article to index */
|
/* Get the number of article to index */
|
||||||
kiwix::Reader reader(self->getZimPath());
|
kiwix::Reader reader(self->getZimPath());
|
||||||
self->setArticleCount(reader.getArticleCount());
|
unsigned int articleCount = reader.getArticleCount();
|
||||||
|
self->setArticleCount(articleCount);
|
||||||
|
cout << "Article Count from reader: " << articleCount << endl;
|
||||||
|
|
||||||
/* Goes trough all articles */
|
/* Goes trough all articles */
|
||||||
zim::File *zimHandler = reader.getZimFileHandler();
|
zim::File *zimHandler = reader.getZimFileHandler();
|
||||||
|
@ -184,7 +186,14 @@ namespace kiwix {
|
||||||
|
|
||||||
indexerToken token;
|
indexerToken token;
|
||||||
unsigned indexedArticleCount = 0;
|
unsigned indexedArticleCount = 0;
|
||||||
unsigned int stepSize = ((self->getArticleCount() / 100) < 1 ? 1 : (self->getArticleCount() / 100));
|
unsigned int articleCount = self->getArticleCount();
|
||||||
|
unsigned int currentProgression = self->getProgression();
|
||||||
|
float stepSize = articleCount / 100;
|
||||||
|
|
||||||
|
cout << "Article count: " << articleCount << endl;
|
||||||
|
cout << "Progression step size: " << stepSize << endl;
|
||||||
|
cout << "Curent progression: " << currentProgression << endl;
|
||||||
|
|
||||||
self->indexingPrelude(self->getIndexPath());
|
self->indexingPrelude(self->getIndexPath());
|
||||||
|
|
||||||
while (self->popFromToIndexQueue(token)) {
|
while (self->popFromToIndexQueue(token)) {
|
||||||
|
@ -198,10 +207,16 @@ namespace kiwix {
|
||||||
token.wordCount
|
token.wordCount
|
||||||
);
|
);
|
||||||
|
|
||||||
if (++indexedArticleCount % stepSize == 0) {
|
indexedArticleCount += 1;
|
||||||
self->setProgression(self->getProgression() + 1);
|
|
||||||
|
/* Update the progression counter (in percent) */
|
||||||
|
if ((unsigned int)((float)indexedArticleCount/(float)articleCount*100) > currentProgression) {
|
||||||
|
self->setProgression((unsigned int)((float)indexedArticleCount/(float)articleCount*100));
|
||||||
|
currentProgression = self->getProgression();
|
||||||
|
cout << indexedArticleCount << " articles indexed, that means a progression of " << currentProgression << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make a hard-disk flush every 10.000 articles */
|
||||||
if (indexedArticleCount % 10000 == 0) {
|
if (indexedArticleCount % 10000 == 0) {
|
||||||
self->flush();
|
self->flush();
|
||||||
}
|
}
|
||||||
|
@ -211,6 +226,7 @@ namespace kiwix {
|
||||||
}
|
}
|
||||||
self->setProgression(100);
|
self->setProgression(100);
|
||||||
self->indexingPostlude();
|
self->indexingPostlude();
|
||||||
|
sleep(1);
|
||||||
self->articleIndexerRunning(false);
|
self->articleIndexerRunning(false);
|
||||||
pthread_exit(NULL);
|
pthread_exit(NULL);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
@ -79,6 +79,7 @@ namespace kiwix {
|
||||||
string counterUrl = "/M/Counter";
|
string counterUrl = "/M/Counter";
|
||||||
|
|
||||||
this->getContentByUrl(counterUrl, content, contentLength, mimeType);
|
this->getContentByUrl(counterUrl, content, contentLength, mimeType);
|
||||||
|
cout << "Counter URL: " << content << endl;
|
||||||
stringstream ssContent(content);
|
stringstream ssContent(content);
|
||||||
|
|
||||||
while(getline(ssContent, item, ';')) {
|
while(getline(ssContent, item, ';')) {
|
||||||
|
@ -99,12 +100,14 @@ namespace kiwix {
|
||||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||||
unsigned int counter = 0;
|
unsigned int counter = 0;
|
||||||
|
|
||||||
if (counterMap.empty())
|
if (counterMap.empty()) {
|
||||||
counter = this->articleCount;
|
counter = this->articleCount;
|
||||||
else {
|
cout << "Article count from offsets: " << counter << endl;
|
||||||
|
} else {
|
||||||
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
||||||
if (it != counterMap.end())
|
if (it != counterMap.end())
|
||||||
counter = it->second;
|
counter = it->second;
|
||||||
|
cout << "Article count from metadata: " << counter << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return counter;
|
return counter;
|
||||||
|
|
|
@ -31,7 +31,6 @@ namespace kiwix {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
XapianIndexer();
|
XapianIndexer();
|
||||||
~XapianIndexer();
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void indexingPrelude(const string &indexPath);
|
void indexingPrelude(const string &indexPath);
|
||||||
|
|
Loading…
Reference in New Issue