+ fix sync. bug in multithreading indexer

This commit is contained in:
kelson42 2012-09-05 15:43:53 +00:00
parent 3a884083de
commit 3e24b168da
3 changed files with 21 additions and 14 deletions

View File

@ -85,10 +85,6 @@ namespace kiwix {
zim::Article currentArticle; zim::Article currentArticle;
while (currentOffset < lastOffset) { while (currentOffset < lastOffset) {
if (self->getVerboseFlag()) {
std::cout << "Extracting article with offset: " << currentOffset << std::endl;
}
currentArticle = zimHandler->getArticle(currentOffset); currentArticle = zimHandler->getArticle(currentOffset);
if (!currentArticle.isRedirect()) { if (!currentArticle.isRedirect()) {
@ -410,6 +406,7 @@ namespace kiwix {
std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl; std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl;
} }
this->setArticleCount(0);
this->setProgression(0); this->setProgression(0);
this->setZimPath(zimPath); this->setZimPath(zimPath);
this->setIndexPath(indexPath); this->setIndexPath(indexPath);
@ -418,9 +415,19 @@ namespace kiwix {
this->articleExtractorRunning(true); this->articleExtractorRunning(true);
pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this); pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this);
pthread_detach(this->articleExtractor); pthread_detach(this->articleExtractor);
while(this->isArticleExtractorRunning() && this->getArticleCount() == 0) {
#ifdef _WIN32
Sleep(100);
#else
usleep(100000);
#endif
}
this->articleParserRunning(true); this->articleParserRunning(true);
pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this); pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this);
pthread_detach(this->articleParser); pthread_detach(this->articleParser);
this->articleIndexerRunning(true); this->articleIndexerRunning(true);
pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this); pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this);
pthread_detach(this->articleIndexer); pthread_detach(this->articleIndexer);

View File

@ -48,8 +48,8 @@ namespace kiwix {
this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A'); this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A');
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A'); this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
this->currentArticleOffset = this->firstArticleOffset; this->currentArticleOffset = this->firstArticleOffset;
this->articleCount = this->zimFileHandler->getNamespaceCount('A'); this->nsACount = this->zimFileHandler->getNamespaceCount('A');
this->mediaCount = this->zimFileHandler->getNamespaceCount('I'); this->nsICount = this->zimFileHandler->getNamespaceCount('I');
} }
/* initialize random seed: */ /* initialize random seed: */
@ -100,7 +100,7 @@ namespace kiwix {
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) { if (counterMap.empty()) {
counter = this->articleCount; counter = this->nsACount;
} else { } else {
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html"); std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
if (it != counterMap.end()) if (it != counterMap.end())
@ -115,7 +115,7 @@ namespace kiwix {
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) if (counterMap.empty())
counter = this->mediaCount; counter = this->nsICount;
else { else {
std::map<std::string, unsigned int>::const_iterator it; std::map<std::string, unsigned int>::const_iterator it;
@ -169,7 +169,7 @@ namespace kiwix {
/* Return an URL from a title*/ /* Return an URL from a title*/
string Reader::getRandomPageUrl() { string Reader::getRandomPageUrl() {
zim::size_type idx = this->firstArticleOffset + zim::size_type idx = this->firstArticleOffset +
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->articleCount); (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
zim::Article article = zimFileHandler->getArticle(idx); zim::Article article = zimFileHandler->getArticle(idx);
return article.getLongUrl().c_str(); return article.getLongUrl().c_str();

View File

@ -74,8 +74,8 @@ namespace kiwix {
zim::size_type firstArticleOffset; zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset; zim::size_type lastArticleOffset;
zim::size_type currentArticleOffset; zim::size_type currentArticleOffset;
zim::size_type articleCount; zim::size_type nsACount;
zim::size_type mediaCount; zim::size_type nsICount;
std::vector<std::string> suggestions; std::vector<std::string> suggestions;
std::vector<std::string>::iterator suggestionsOffset; std::vector<std::string>::iterator suggestionsOffset;