mirror of https://github.com/kiwix/libkiwix.git
+ new indexer
This commit is contained in:
parent
62daa9ffe5
commit
65b4015f03
|
@ -61,7 +61,9 @@ namespace kiwix {
|
|||
|
||||
/* Get the number of article to index */
|
||||
kiwix::Reader reader(self->getZimPath());
|
||||
self->setArticleCount(reader.getArticleCount());
|
||||
unsigned int articleCount = reader.getArticleCount();
|
||||
self->setArticleCount(articleCount);
|
||||
cout << "Article Count from reader: " << articleCount << endl;
|
||||
|
||||
/* Goes trough all articles */
|
||||
zim::File *zimHandler = reader.getZimFileHandler();
|
||||
|
@ -184,7 +186,14 @@ namespace kiwix {
|
|||
|
||||
indexerToken token;
|
||||
unsigned indexedArticleCount = 0;
|
||||
unsigned int stepSize = ((self->getArticleCount() / 100) < 1 ? 1 : (self->getArticleCount() / 100));
|
||||
unsigned int articleCount = self->getArticleCount();
|
||||
unsigned int currentProgression = self->getProgression();
|
||||
float stepSize = articleCount / 100;
|
||||
|
||||
cout << "Article count: " << articleCount << endl;
|
||||
cout << "Progression step size: " << stepSize << endl;
|
||||
cout << "Curent progression: " << currentProgression << endl;
|
||||
|
||||
self->indexingPrelude(self->getIndexPath());
|
||||
|
||||
while (self->popFromToIndexQueue(token)) {
|
||||
|
@ -198,10 +207,16 @@ namespace kiwix {
|
|||
token.wordCount
|
||||
);
|
||||
|
||||
if (++indexedArticleCount % stepSize == 0) {
|
||||
self->setProgression(self->getProgression() + 1);
|
||||
indexedArticleCount += 1;
|
||||
|
||||
/* Update the progression counter (in percent) */
|
||||
if ((unsigned int)((float)indexedArticleCount/(float)articleCount*100) > currentProgression) {
|
||||
self->setProgression((unsigned int)((float)indexedArticleCount/(float)articleCount*100));
|
||||
currentProgression = self->getProgression();
|
||||
cout << indexedArticleCount << " articles indexed, that means a progression of " << currentProgression << endl;
|
||||
}
|
||||
|
||||
/* Make a hard-disk flush every 10.000 articles */
|
||||
if (indexedArticleCount % 10000 == 0) {
|
||||
self->flush();
|
||||
}
|
||||
|
@ -211,6 +226,7 @@ namespace kiwix {
|
|||
}
|
||||
self->setProgression(100);
|
||||
self->indexingPostlude();
|
||||
sleep(1);
|
||||
self->articleIndexerRunning(false);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
|
|
|
@ -79,6 +79,7 @@ namespace kiwix {
|
|||
string counterUrl = "/M/Counter";
|
||||
|
||||
this->getContentByUrl(counterUrl, content, contentLength, mimeType);
|
||||
cout << "Counter URL: " << content << endl;
|
||||
stringstream ssContent(content);
|
||||
|
||||
while(getline(ssContent, item, ';')) {
|
||||
|
@ -99,12 +100,14 @@ namespace kiwix {
|
|||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty())
|
||||
if (counterMap.empty()) {
|
||||
counter = this->articleCount;
|
||||
else {
|
||||
cout << "Article count from offsets: " << counter << endl;
|
||||
} else {
|
||||
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
||||
if (it != counterMap.end())
|
||||
counter = it->second;
|
||||
cout << "Article count from metadata: " << counter << endl;
|
||||
}
|
||||
|
||||
return counter;
|
||||
|
|
|
@ -31,7 +31,6 @@ namespace kiwix {
|
|||
|
||||
public:
|
||||
XapianIndexer();
|
||||
~XapianIndexer();
|
||||
|
||||
protected:
|
||||
void indexingPrelude(const string &indexPath);
|
||||
|
|
Loading…
Reference in New Issue