mirror of https://github.com/kiwix/libkiwix.git
+ fix sync. bug in multithreading indexer
This commit is contained in:
parent
3a884083de
commit
3e24b168da
|
@ -85,10 +85,6 @@ namespace kiwix {
|
||||||
zim::Article currentArticle;
|
zim::Article currentArticle;
|
||||||
|
|
||||||
while (currentOffset < lastOffset) {
|
while (currentOffset < lastOffset) {
|
||||||
if (self->getVerboseFlag()) {
|
|
||||||
std::cout << "Extracting article with offset: " << currentOffset << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
currentArticle = zimHandler->getArticle(currentOffset);
|
currentArticle = zimHandler->getArticle(currentOffset);
|
||||||
|
|
||||||
if (!currentArticle.isRedirect()) {
|
if (!currentArticle.isRedirect()) {
|
||||||
|
@ -410,6 +406,7 @@ namespace kiwix {
|
||||||
std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl;
|
std::cout << "Indexing of '" << zimPath << "' starting..." <<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->setArticleCount(0);
|
||||||
this->setProgression(0);
|
this->setProgression(0);
|
||||||
this->setZimPath(zimPath);
|
this->setZimPath(zimPath);
|
||||||
this->setIndexPath(indexPath);
|
this->setIndexPath(indexPath);
|
||||||
|
@ -418,9 +415,19 @@ namespace kiwix {
|
||||||
this->articleExtractorRunning(true);
|
this->articleExtractorRunning(true);
|
||||||
pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this);
|
pthread_create(&(this->articleExtractor), NULL, Indexer::extractArticles, (void*)this);
|
||||||
pthread_detach(this->articleExtractor);
|
pthread_detach(this->articleExtractor);
|
||||||
|
|
||||||
|
while(this->isArticleExtractorRunning() && this->getArticleCount() == 0) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
Sleep(100);
|
||||||
|
#else
|
||||||
|
usleep(100000);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
this->articleParserRunning(true);
|
this->articleParserRunning(true);
|
||||||
pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this);
|
pthread_create(&(this->articleParser), NULL, Indexer::parseArticles, (void*)this);
|
||||||
pthread_detach(this->articleParser);
|
pthread_detach(this->articleParser);
|
||||||
|
|
||||||
this->articleIndexerRunning(true);
|
this->articleIndexerRunning(true);
|
||||||
pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this);
|
pthread_create(&(this->articleIndexer), NULL, Indexer::indexArticles, (void*)this);
|
||||||
pthread_detach(this->articleIndexer);
|
pthread_detach(this->articleIndexer);
|
||||||
|
|
|
@ -48,8 +48,8 @@ namespace kiwix {
|
||||||
this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A');
|
this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A');
|
||||||
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
|
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
|
||||||
this->currentArticleOffset = this->firstArticleOffset;
|
this->currentArticleOffset = this->firstArticleOffset;
|
||||||
this->articleCount = this->zimFileHandler->getNamespaceCount('A');
|
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
|
||||||
this->mediaCount = this->zimFileHandler->getNamespaceCount('I');
|
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initialize random seed: */
|
/* initialize random seed: */
|
||||||
|
@ -100,7 +100,7 @@ namespace kiwix {
|
||||||
unsigned int counter = 0;
|
unsigned int counter = 0;
|
||||||
|
|
||||||
if (counterMap.empty()) {
|
if (counterMap.empty()) {
|
||||||
counter = this->articleCount;
|
counter = this->nsACount;
|
||||||
} else {
|
} else {
|
||||||
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
std::map<std::string, unsigned int>::const_iterator it = counterMap.find("text/html");
|
||||||
if (it != counterMap.end())
|
if (it != counterMap.end())
|
||||||
|
@ -115,7 +115,7 @@ namespace kiwix {
|
||||||
unsigned int counter = 0;
|
unsigned int counter = 0;
|
||||||
|
|
||||||
if (counterMap.empty())
|
if (counterMap.empty())
|
||||||
counter = this->mediaCount;
|
counter = this->nsICount;
|
||||||
else {
|
else {
|
||||||
std::map<std::string, unsigned int>::const_iterator it;
|
std::map<std::string, unsigned int>::const_iterator it;
|
||||||
|
|
||||||
|
@ -169,7 +169,7 @@ namespace kiwix {
|
||||||
/* Return an URL from a title*/
|
/* Return an URL from a title*/
|
||||||
string Reader::getRandomPageUrl() {
|
string Reader::getRandomPageUrl() {
|
||||||
zim::size_type idx = this->firstArticleOffset +
|
zim::size_type idx = this->firstArticleOffset +
|
||||||
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->articleCount);
|
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
|
||||||
zim::Article article = zimFileHandler->getArticle(idx);
|
zim::Article article = zimFileHandler->getArticle(idx);
|
||||||
|
|
||||||
return article.getLongUrl().c_str();
|
return article.getLongUrl().c_str();
|
||||||
|
|
|
@ -74,8 +74,8 @@ namespace kiwix {
|
||||||
zim::size_type firstArticleOffset;
|
zim::size_type firstArticleOffset;
|
||||||
zim::size_type lastArticleOffset;
|
zim::size_type lastArticleOffset;
|
||||||
zim::size_type currentArticleOffset;
|
zim::size_type currentArticleOffset;
|
||||||
zim::size_type articleCount;
|
zim::size_type nsACount;
|
||||||
zim::size_type mediaCount;
|
zim::size_type nsICount;
|
||||||
|
|
||||||
std::vector<std::string> suggestions;
|
std::vector<std::string> suggestions;
|
||||||
std::vector<std::string>::iterator suggestionsOffset;
|
std::vector<std::string>::iterator suggestionsOffset;
|
||||||
|
|
Loading…
Reference in New Issue