+ new indexer

This commit is contained in:
kelson42 2012-04-07 14:09:47 +00:00
parent 62daa9ffe5
commit 65b4015f03
3 changed files with 25 additions and 7 deletions

View File

@ -61,7 +61,9 @@ namespace kiwix {
/* Get the number of article to index */ /* Get the number of article to index */
kiwix::Reader reader(self->getZimPath()); kiwix::Reader reader(self->getZimPath());
self->setArticleCount(reader.getArticleCount()); unsigned int articleCount = reader.getArticleCount();
self->setArticleCount(articleCount);
cout << "Article Count from reader: " << articleCount << endl;
/* Goes trough all articles */ /* Goes trough all articles */
zim::File *zimHandler = reader.getZimFileHandler(); zim::File *zimHandler = reader.getZimFileHandler();
@ -184,7 +186,14 @@ namespace kiwix {
indexerToken token; indexerToken token;
unsigned indexedArticleCount = 0; unsigned indexedArticleCount = 0;
unsigned int stepSize = ((self->getArticleCount() / 100) < 1 ? 1 : (self->getArticleCount() / 100)); unsigned int articleCount = self->getArticleCount();
unsigned int currentProgression = self->getProgression();
float stepSize = articleCount / 100;
cout << "Article count: " << articleCount << endl;
cout << "Progression step size: " << stepSize << endl;
cout << "Curent progression: " << currentProgression << endl;
self->indexingPrelude(self->getIndexPath()); self->indexingPrelude(self->getIndexPath());
while (self->popFromToIndexQueue(token)) { while (self->popFromToIndexQueue(token)) {
@ -198,10 +207,16 @@ namespace kiwix {
token.wordCount token.wordCount
); );
if (++indexedArticleCount % stepSize == 0) { indexedArticleCount += 1;
self->setProgression(self->getProgression() + 1);
/* Update the progression counter (in percent) */
if ((unsigned int)((float)indexedArticleCount/(float)articleCount*100) > currentProgression) {
self->setProgression((unsigned int)((float)indexedArticleCount/(float)articleCount*100));
currentProgression = self->getProgression();
cout << indexedArticleCount << " articles indexed, that means a progression of " << currentProgression << endl;
} }
/* Make a hard-disk flush every 10.000 articles */
if (indexedArticleCount % 10000 == 0) { if (indexedArticleCount % 10000 == 0) {
self->flush(); self->flush();
} }
@ -211,6 +226,7 @@ namespace kiwix {
} }
self->setProgression(100); self->setProgression(100);
self->indexingPostlude(); self->indexingPostlude();
sleep(1);
self->articleIndexerRunning(false); self->articleIndexerRunning(false);
pthread_exit(NULL); pthread_exit(NULL);
return NULL; return NULL;

View File

@ -79,6 +79,7 @@ namespace kiwix {
string counterUrl = "/M/Counter"; string counterUrl = "/M/Counter";
this->getContentByUrl(counterUrl, content, contentLength, mimeType); this->getContentByUrl(counterUrl, content, contentLength, mimeType);
cout << "Counter URL: " << content << endl;
stringstream ssContent(content); stringstream ssContent(content);
while(getline(ssContent, item, ';')) { while(getline(ssContent, item, ';')) {
@ -99,12 +100,14 @@ namespace kiwix {
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata(); std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) if (counterMap.empty()) {
counter = this->articleCount; counter = this->articleCount;
else { cout << "Article count from offsets: " << counter << endl;
} else {
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html"); std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
if (it != counterMap.end()) if (it != counterMap.end())
counter = it->second; counter = it->second;
cout << "Article count from metadata: " << counter << endl;
} }
return counter; return counter;

View File

@ -31,7 +31,6 @@ namespace kiwix {
public: public:
XapianIndexer(); XapianIndexer();
~XapianIndexer();
protected: protected:
void indexingPrelude(const string &indexPath); void indexingPrelude(const string &indexPath);