From 14a4394f6d8f4cb5d40793ac1be80459e9b89fdd Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Fri, 20 Sep 2013 18:35:20 +0530 Subject: [PATCH] Permanant fix for bug651. Kiwix manager class is back to original state, additional function for obtaining origID has been added to Kiwix::Reader class. Kiwix-manage and Kiwix-serve back to normal. --- src/common/kiwix/manager.cpp | 9 +- src/common/kiwix/manager.h | 4 +- src/common/kiwix/reader.cpp | 197 +++++++++++++++++++++++------------ src/common/kiwix/reader.h | 7 +- 4 files changed, 138 insertions(+), 79 deletions(-) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index b4f017954..e0da1e423 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -225,7 +225,7 @@ namespace kiwix { /* Add a book to the library. Return empty string if failed, book id otherwise */ string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, - const string url, const bool checkMetaData, const string origId) { + const string url, const bool checkMetaData) { kiwix::Book book; if (this->readBookFromPath(pathToOpen, &book)) { @@ -239,7 +239,6 @@ namespace kiwix { if (!checkMetaData || (checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) { book.url = url; - book.origID=origId; library.addBook(book); return book.id; } @@ -249,8 +248,8 @@ namespace kiwix { } /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ - bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData, const string origId) { - return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData, origId).empty()); + bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) { + return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty()); } bool Manager::readBookFromPath(const string path, kiwix::Book *book) { @@ -267,7 +266,7 @@ namespace kiwix { book->creator = reader->getCreator(); book->publisher = reader->getPublisher(); book->title = reader->getTitle(); - + book->origID=reader->getOrigID(); std::ostringstream articleCountStream; articleCountStream << reader->getArticleCount(); book->articleCount = articleCountStream.str(); diff --git a/src/common/kiwix/manager.h b/src/common/kiwix/manager.h index dc2544866..6190a1c65 100644 --- a/src/common/kiwix/manager.h +++ b/src/common/kiwix/manager.h @@ -56,9 +56,9 @@ namespace kiwix { bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookPath(const string id, const string path); string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", - const bool checkMetaData = false, const string origID=""); + const bool checkMetaData = false); bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", - const bool checkMetaData = false, const string origID=""); + const bool checkMetaData = false); Library cloneLibrary(); bool getBookById(const string id, Book &book); bool getCurrentBook(Book &book); diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 2a928c080..c355994ef 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -19,6 +19,38 @@ #include "reader.h" +inline char hi(char v) { + char hex[] = "0123456789abcdef"; + return hex[(v >> 4) & 0xf]; +} + +inline char lo(char v) { + char hex[] = "0123456789abcdef"; + return hex[v & 0xf]; +} + +std::string hexUUID (std::string in) { + std::ostringstream out; + for (unsigned n = 0; n < 4; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 4; n < 6; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 6; n < 8; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 8; n < 10; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 10; n < 16; ++n) + out << hi(in[n]) << lo(in[n]); + std::string op=out.str(); + return op; +} + + + static char charFromHex(std::string a) { std::istringstream Blat (a); int Z; @@ -28,7 +60,7 @@ static char charFromHex(std::string a) { void unescapeUrl(string &url) { std::string::size_type pos = 0; - while ((pos = url.find('%', pos + 1)) != std::string::npos && + while ((pos = url.find('%', pos + 1)) != std::string::npos && pos + 3 <= url.length()) { url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); } @@ -38,14 +70,14 @@ void unescapeUrl(string &url) { namespace kiwix { /* Constructor */ - Reader::Reader(const string zimFilePath) + Reader::Reader(const string zimFilePath) : zimFileHandler(NULL) { string tmpZimFilePath = zimFilePath; /* Remove potential trailing zimaa */ size_t found = tmpZimFilePath.rfind("zimaa"); - if (found != string::npos && - tmpZimFilePath.size() > 5 && + if (found != string::npos && + tmpZimFilePath.size() > 5 && found == tmpZimFilePath.size() - 5) { tmpZimFilePath.resize(tmpZimFilePath.size() - 2); } @@ -63,7 +95,7 @@ namespace kiwix { /* initialize random seed: */ srand ( time(NULL) ); } - + /* Destructor */ Reader::~Reader() { if (this->zimFileHandler != NULL) { @@ -74,7 +106,7 @@ namespace kiwix { zim::File* Reader::getZimFileHandler() { return this->zimFileHandler; } - + /* Reset the cursor for GetNextArticle() */ void Reader::reset() { this->currentArticleOffset = this->firstArticleOffset; @@ -101,12 +133,12 @@ namespace kiwix { return counters; } - + /* Get the count of articles which can be indexed/displayed */ unsigned int Reader::getArticleCount() { std::map counterMap = this->parseCounterMetadata(); unsigned int counter = 0; - + if (counterMap.empty()) { counter = this->nsACount; } else { @@ -114,7 +146,7 @@ namespace kiwix { if (it != counterMap.end()) counter = it->second; } - + return counter; } @@ -140,10 +172,10 @@ namespace kiwix { if (it != counterMap.end()) counter += it->second; } - + return counter; } - + /* Get the total of all items of a ZIM file, redirects included */ unsigned int Reader::getGlobalCount() { return this->zimFileHandler->getCountArticles(); @@ -155,7 +187,7 @@ namespace kiwix { s << this->zimFileHandler->getFileheader().getUuid(); return s.str(); } - + /* Return a page url from a title */ bool Reader::getPageUrlFromTitle(const string &title, string &url) { /* Extract the content from the zim file */ @@ -163,7 +195,7 @@ namespace kiwix { /* Test if the article was found */ if (resultPair.first == true) { - + /* Get the article */ zim::Article article = *resultPair.second; @@ -172,7 +204,7 @@ namespace kiwix { while (article.isRedirect() && loopCounter++<42) { article = article.getRedirectArticle(); } - + url = article.getLongUrl(); return true; } @@ -182,53 +214,53 @@ namespace kiwix { /* Return an URL from a title*/ string Reader::getRandomPageUrl() { - zim::size_type idx = this->firstArticleOffset + - (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); + zim::size_type idx = this->firstArticleOffset + + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); zim::Article article = zimFileHandler->getArticle(idx); return article.getLongUrl().c_str(); } - + /* Return the welcome page URL */ string Reader::getMainPageUrl() { string url = ""; - + if (this->zimFileHandler->getFileheader().hasMainPage()) { zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); url = article.getLongUrl(); if (url.empty()) { - url = getFirstPageUrl(); + url = getFirstPageUrl(); } } else { - url = getFirstPageUrl(); + url = getFirstPageUrl(); } - + return url; } - + bool Reader::getFavicon(string &content, string &mimeType) { unsigned int contentLength = 0; - - this->getContentByUrl( "/-/favicon.png", content, + + this->getContentByUrl( "/-/favicon.png", content, contentLength, mimeType); - + if (content.empty()) { - this->getContentByUrl( "/I/favicon.png", content, + this->getContentByUrl( "/I/favicon.png", content, contentLength, mimeType); if (content.empty()) { - this->getContentByUrl( "/I/favicon", content, + this->getContentByUrl( "/I/favicon", content, contentLength, mimeType); - + if (content.empty()) { - this->getContentByUrl( "/-/favicon", content, + this->getContentByUrl( "/-/favicon", content, contentLength, mimeType); } } } - + return content.empty() ? false : true; } @@ -236,11 +268,11 @@ namespace kiwix { bool Reader::getMetatag(const string &name, string &value) { unsigned int contentLength = 0; string contentType = ""; - - return this->getContentByUrl( "/M/" + name, value, + + return this->getContentByUrl( "/M/" + name, value, contentLength, contentType); } - + string Reader::getTitle() { string value; this->getMetatag("Title", value); @@ -256,7 +288,7 @@ namespace kiwix { string Reader::getDescription() { string value; this->getMetatag("Description", value); - + /* Mediawiki Collection tends to use the "Subtitle" name */ if (value.empty()) { this->getMetatag("Subtitle", value); @@ -289,34 +321,61 @@ namespace kiwix { return value; } + string Reader::getOrigID() { + string value; + this->getMetatag("startfileuid", value); + if(value.empty()) + return ""; + std::string id=value; + std::string origID; + std::string temp=""; + unsigned int k=0; + char tempArray[16]=""; + for(unsigned int i=0; igetNamespaceBeginOffset('A'); zim::Article article = zimFileHandler->getArticle(firstPageOffset); url = article.getLongUrl(); - + return url; } - + bool Reader::parseUrl(const string &url, char *ns, string &title) { /* Offset to visit the url */ unsigned int urlLength = url.size(); unsigned int offset = 0; - + /* Ignore the '/' */ while ((offset < urlLength) && (url[offset] == '/')) offset++; - + /* Get namespace */ while ((offset < urlLength) && (url[offset] != '/')) { *ns= url[offset]; offset++; } - + /* Ignore the '/' */ - while ((offset < urlLength) && (url[offset] == '/')) offset++; - + while ((offset < urlLength) && (url[offset] == '/')) offset++; + /* Get content title */ unsigned int titleOffset = offset; while (offset < urlLength) { @@ -338,7 +397,7 @@ namespace kiwix { contentLength = 0; if (this->zimFileHandler != NULL) { - + /* Parse the url */ char ns = 0; string titleStr; @@ -348,48 +407,48 @@ namespace kiwix { if (titleStr.empty() && ns == 0) { this->parseUrl(this->getMainPageUrl(), &ns, titleStr); } - + /* Extract the content from the zim file */ std::pair resultPair = zimFileHandler->findx(ns, titleStr); - + /* Test if the article was found */ if (resultPair.first == true) { - + /* Get the article */ zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex()); - + /* If redirect */ unsigned int loopCounter = 0; while (article.isRedirect() && loopCounter++<42) { article = article.getRedirectArticle(); } - + /* Get the content mime-type */ - contentType = string(article.getMimeType().data(), article.getMimeType().size()); - + contentType = string(article.getMimeType().data(), article.getMimeType().size()); + /* Get the data */ content = string(article.getData().data(), article.getArticleSize()); - + /* Try to set a stub HTML header/footer if necesssary */ if (contentType == "text/html" && std::string::npos == content.find("")) { content = "" + article.getTitle() + "" + content + ""; } - + /* Get the data length */ contentLength = article.getArticleSize(); - + /* Set return value */ retVal = true; } } - + return retVal; } - + /* Search titles by prefix */ bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool retVal = false; - zim::File::const_iterator articleItr; + zim::File::const_iterator articleItr; std::vector::iterator suggestionItr; int result; @@ -400,16 +459,16 @@ namespace kiwix { if (prefix.size()) { for (articleItr = zimFileHandler->findByTitle('A', prefix); - articleItr != zimFileHandler->end() && - articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && - this->suggestions.size() < suggestionsCount ; + articleItr != zimFileHandler->end() && + articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && + this->suggestions.size() < suggestionsCount ; ++articleItr) { if (this->suggestions.size() == 0) { this->suggestions.push_back(articleItr->getTitle()); } else { - for (suggestionItr = this->suggestions.begin() ; - suggestionItr != this->suggestions.end(); + for (suggestionItr = this->suggestions.begin() ; + suggestionItr != this->suggestions.end(); ++suggestionItr) { result = articleItr->getTitle().compare(*suggestionItr); @@ -425,25 +484,25 @@ namespace kiwix { this->suggestions.push_back(articleItr->getTitle()); } } - + /* Suggestions where found */ retVal = true; } } - + /* Set the cursor to the begining */ this->suggestionsOffset = this->suggestions.begin(); - + return retVal; } - + /* Try also a few variations of the prefix to have better results */ bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) { std::string myPrefix = prefix; /* Normal suggestion request */ bool retVal = this->searchSuggestions(prefix, suggestionsCount, true); - + /* Try with first letter uppercase */ myPrefix = kiwix::ucFirst(myPrefix); this->searchSuggestions(myPrefix, suggestionsCount, false); @@ -460,10 +519,10 @@ namespace kiwix { if (this->suggestionsOffset != this->suggestions.end()) { /* title */ title = *(this->suggestionsOffset); - + /* increment the cursor for the next call */ this->suggestionsOffset++; - + return true; } @@ -492,7 +551,7 @@ namespace kiwix { unsigned int Reader::getFileSize() { zim::File *file = this->getZimFileHandler(); zim::offset_type size = 0; - + if (file != NULL) { size = file->getFilesize(); } diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index dec5c8be7..5325e5be0 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -38,7 +38,7 @@ using namespace std; namespace kiwix { class Reader { - + public: Reader(const string zimFilePath); ~Reader(); @@ -58,6 +58,7 @@ namespace kiwix { string getDate(); string getCreator(); string getPublisher(); + string getOrigID(); bool getFavicon(string &content, string &mimeType); bool getPageUrlFromTitle(const string &title, string &url); bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); @@ -69,7 +70,7 @@ namespace kiwix { bool parseUrl(const string &url, char *ns, string &title); unsigned int getFileSize(); zim::File* getZimFileHandler(); - + protected: zim::File* zimFileHandler; zim::size_type firstArticleOffset; @@ -77,7 +78,7 @@ namespace kiwix { zim::size_type currentArticleOffset; zim::size_type nsACount; zim::size_type nsICount; - + std::vector suggestions; std::vector::iterator suggestionsOffset;