+ fix sync. bug in multithreading indexer

This commit is contained in:
kelson42 2012-09-05 15:43:53 +00:00
parent 3a884083de
commit 3e24b168da
3 changed files with 21 additions and 14 deletions

View File

@ -85,10 +85,6 @@ namespace kiwix {
zim::Article currentArticle;
while (currentOffset < lastOffset) {
if (self->getVerboseFlag()) {
std::cout << "Extracting article with offset: " << currentOffset << std::endl;
}
currentArticle = zimHandler->getArticle(currentOffset);
if (!currentArticle.isRedirect()) {
@ -410,6 +406,7 @@ namespace kiwix {
std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl;
}
this->setArticleCount(0);
this->setProgression(0);
this->setZimPath(zimPath);
this->setIndexPath(indexPath);
@ -418,9 +415,19 @@ namespace kiwix {
this->articleExtractorRunning(true);
pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this);
pthread_detach(this->articleExtractor);
while(this->isArticleExtractorRunning() && this->getArticleCount() == 0) {
#ifdef _WIN32
Sleep(100);
#else
usleep(100000);
#endif
}
this->articleParserRunning(true);
pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this);
pthread_detach(this->articleParser);
this->articleIndexerRunning(true);
pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this);
pthread_detach(this->articleIndexer);

View File

@ -48,8 +48,8 @@ namespace kiwix {
this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A');
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
this->currentArticleOffset = this->firstArticleOffset;
this->articleCount = this->zimFileHandler->getNamespaceCount('A');
this->mediaCount = this->zimFileHandler->getNamespaceCount('I');
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
}
/* initialize random seed: */
@ -100,7 +100,7 @@ namespace kiwix {
unsigned int counter = 0;
if (counterMap.empty()) {
counter = this->articleCount;
counter = this->nsACount;
} else {
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
if (it != counterMap.end())
@ -115,7 +115,7 @@ namespace kiwix {
unsigned int counter = 0;
if (counterMap.empty())
counter = this->mediaCount;
counter = this->nsICount;
else {
std::map<std::string, unsigned int>::const_iterator it;
@ -169,7 +169,7 @@ namespace kiwix {
/* Return an URL from a title*/
string Reader::getRandomPageUrl() {
zim::size_type idx = this->firstArticleOffset +
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->articleCount);
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
zim::Article article = zimFileHandler->getArticle(idx);
return article.getLongUrl().c_str();

View File

@ -74,8 +74,8 @@ namespace kiwix {
zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset;
zim::size_type currentArticleOffset;
zim::size_type articleCount;
zim::size_type mediaCount;
zim::size_type nsACount;
zim::size_type nsICount;
std::vector<std::string> suggestions;
std::vector<std::string>::iterator suggestionsOffset;