From 14a4394f6d8f4cb5d40793ac1be80459e9b89fdd Mon Sep 17 00:00:00 2001
From: Kiran Mathew Koshy <kiranmathewkoshy@gmail.com>
Date: Fri, 20 Sep 2013 18:35:20 +0530
Subject: [PATCH] Permanant fix for bug651. Kiwix manager class is back to
 original state, additional function for obtaining origID has been added to
 Kiwix::Reader class. Kiwix-manage and Kiwix-serve back to normal.

---
 src/common/kiwix/manager.cpp |   9 +-
 src/common/kiwix/manager.h   |   4 +-
 src/common/kiwix/reader.cpp  | 197 +++++++++++++++++++++++------------
 src/common/kiwix/reader.h    |   7 +-
 4 files changed, 138 insertions(+), 79 deletions(-)

diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp
index b4f017954..e0da1e423 100644
--- a/src/common/kiwix/manager.cpp
+++ b/src/common/kiwix/manager.cpp
@@ -225,7 +225,7 @@ namespace kiwix {
 
   /* Add a book to the library. Return empty string if failed, book id otherwise */
   string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave,
-					  const string url, const bool checkMetaData, const string origId) {
+					  const string url, const bool checkMetaData) {
     kiwix::Book book;
 
     if (this->readBookFromPath(pathToOpen, &book)) {
@@ -239,7 +239,6 @@ namespace kiwix {
       if (!checkMetaData ||
 	  (checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) {
 	book.url = url;
-	book.origID=origId;
 	library.addBook(book);
 	return book.id;
       }
@@ -249,8 +248,8 @@ namespace kiwix {
   }
 
   /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */
-  bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData, const string origId) {
-    return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData, origId).empty());
+  bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) {
+    return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty());
   }
 
   bool Manager::readBookFromPath(const string path, kiwix::Book *book) {
@@ -267,7 +266,7 @@ namespace kiwix {
 	book->creator = reader->getCreator();
 	book->publisher = reader->getPublisher();
 	book->title = reader->getTitle();
-
+    book->origID=reader->getOrigID();
 	std::ostringstream articleCountStream;
 	articleCountStream << reader->getArticleCount();
 	book->articleCount = articleCountStream.str();
diff --git a/src/common/kiwix/manager.h b/src/common/kiwix/manager.h
index dc2544866..6190a1c65 100644
--- a/src/common/kiwix/manager.h
+++ b/src/common/kiwix/manager.h
@@ -56,9 +56,9 @@ namespace kiwix {
     bool setBookIndex(const string id, const string path, const supportedIndexType type);
     bool setBookPath(const string id, const string path);
     string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
-				   const bool checkMetaData = false, const string origID="");
+				   const bool checkMetaData = false);
     bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
-			 const bool checkMetaData = false, const string origID="");
+			 const bool checkMetaData = false);
     Library cloneLibrary();
     bool getBookById(const string id, Book &book);
     bool getCurrentBook(Book &book);
diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp
index 2a928c080..c355994ef 100644
--- a/src/common/kiwix/reader.cpp
+++ b/src/common/kiwix/reader.cpp
@@ -19,6 +19,38 @@
 
 #include "reader.h"
 
+inline char hi(char v) {
+    char hex[] = "0123456789abcdef";
+    return hex[(v >> 4) & 0xf];
+}
+
+inline char lo(char v) {
+    char hex[] = "0123456789abcdef";
+    return hex[v & 0xf];
+}
+
+std::string hexUUID (std::string in) {
+    std::ostringstream out;
+    for (unsigned n = 0; n < 4; ++n)
+      out << hi(in[n]) << lo(in[n]);
+    out << '-';
+    for (unsigned n = 4; n < 6; ++n)
+      out << hi(in[n]) << lo(in[n]);
+    out << '-';
+    for (unsigned n = 6; n < 8; ++n)
+      out << hi(in[n]) << lo(in[n]);
+    out << '-';
+    for (unsigned n = 8; n < 10; ++n)
+      out << hi(in[n]) << lo(in[n]);
+    out << '-';
+    for (unsigned n = 10; n < 16; ++n)
+      out << hi(in[n]) << lo(in[n]);
+    std::string op=out.str();
+    return op;
+}
+
+
+
 static char charFromHex(std::string a) {
   std::istringstream Blat (a);
   int Z;
@@ -28,7 +60,7 @@ static char charFromHex(std::string a) {
 
 void unescapeUrl(string &url) {
   std::string::size_type pos = 0;
-  while ((pos = url.find('%', pos + 1)) != std::string::npos && 
+  while ((pos = url.find('%', pos + 1)) != std::string::npos &&
 	 pos + 3 <= url.length()) {
     url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
   }
@@ -38,14 +70,14 @@ void unescapeUrl(string &url) {
 namespace kiwix {
 
   /* Constructor */
-  Reader::Reader(const string zimFilePath) 
+  Reader::Reader(const string zimFilePath)
     : zimFileHandler(NULL) {
     string tmpZimFilePath = zimFilePath;
 
     /* Remove potential trailing zimaa */
     size_t found = tmpZimFilePath.rfind("zimaa");
-    if (found != string::npos && 
-	tmpZimFilePath.size() > 5 && 
+    if (found != string::npos &&
+	tmpZimFilePath.size() > 5 &&
 	found == tmpZimFilePath.size() - 5) {
       tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
     }
@@ -63,7 +95,7 @@ namespace kiwix {
     /* initialize random seed: */
     srand ( time(NULL) );
   }
-  
+
   /* Destructor */
   Reader::~Reader() {
     if (this->zimFileHandler != NULL) {
@@ -74,7 +106,7 @@ namespace kiwix {
   zim::File* Reader::getZimFileHandler() {
     return this->zimFileHandler;
   }
-  
+
   /* Reset the cursor for GetNextArticle() */
   void Reader::reset() {
     this->currentArticleOffset = this->firstArticleOffset;
@@ -101,12 +133,12 @@ namespace kiwix {
 
     return counters;
   }
-  
+
   /* Get the count of articles which can be indexed/displayed */
   unsigned int Reader::getArticleCount() {
     std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
     unsigned int counter = 0;
-    
+
     if (counterMap.empty()) {
       counter = this->nsACount;
     } else {
@@ -114,7 +146,7 @@ namespace kiwix {
       if (it != counterMap.end())
 	counter = it->second;
     }
-    
+
     return counter;
   }
 
@@ -140,10 +172,10 @@ namespace kiwix {
       if (it != counterMap.end())
 	counter += it->second;
     }
-    
+
     return counter;
   }
-  
+
   /* Get the total of all items of a ZIM file, redirects included */
   unsigned int Reader::getGlobalCount() {
     return this->zimFileHandler->getCountArticles();
@@ -155,7 +187,7 @@ namespace kiwix {
     s << this->zimFileHandler->getFileheader().getUuid();
     return  s.str();
   }
-  
+
   /* Return a page url from a title */
   bool Reader::getPageUrlFromTitle(const string &title, string &url) {
     /* Extract the content from the zim file */
@@ -163,7 +195,7 @@ namespace kiwix {
 
     /* Test if the article was found */
     if (resultPair.first == true) {
-      
+
       /* Get the article */
       zim::Article article = *resultPair.second;
 
@@ -172,7 +204,7 @@ namespace kiwix {
       while (article.isRedirect() && loopCounter++<42) {
 	article = article.getRedirectArticle();
       }
-      
+
       url = article.getLongUrl();
       return true;
     }
@@ -182,53 +214,53 @@ namespace kiwix {
 
   /* Return an URL from a title*/
   string Reader::getRandomPageUrl() {
-    zim::size_type idx = this->firstArticleOffset + 
-      (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); 
+    zim::size_type idx = this->firstArticleOffset +
+      (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
     zim::Article article = zimFileHandler->getArticle(idx);
 
     return article.getLongUrl().c_str();
   }
-  
+
   /* Return the welcome page URL */
   string Reader::getMainPageUrl() {
     string url = "";
-    
+
     if (this->zimFileHandler->getFileheader().hasMainPage()) {
       zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage());
       url = article.getLongUrl();
 
       if (url.empty()) {
-	url = getFirstPageUrl(); 
+	url = getFirstPageUrl();
       }
     } else {
-	url = getFirstPageUrl(); 
+	url = getFirstPageUrl();
     }
-    
+
     return url;
   }
-  
+
   bool Reader::getFavicon(string &content, string &mimeType) {
     unsigned int contentLength = 0;
-    
-    this->getContentByUrl( "/-/favicon.png", content, 
+
+    this->getContentByUrl( "/-/favicon.png", content,
 			   contentLength, mimeType);
-    
+
     if (content.empty()) {
-      this->getContentByUrl( "/I/favicon.png", content, 
+      this->getContentByUrl( "/I/favicon.png", content,
 			     contentLength, mimeType);
 
 
       if (content.empty()) {
-	this->getContentByUrl( "/I/favicon", content, 
+	this->getContentByUrl( "/I/favicon", content,
 			       contentLength, mimeType);
-	
+
 	if (content.empty()) {
-	  this->getContentByUrl( "/-/favicon", content, 
+	  this->getContentByUrl( "/-/favicon", content,
 				 contentLength, mimeType);
 	}
       }
     }
-    
+
     return content.empty() ? false : true;
   }
 
@@ -236,11 +268,11 @@ namespace kiwix {
   bool Reader::getMetatag(const string &name, string &value) {
     unsigned int contentLength = 0;
     string contentType = "";
-    
-    return this->getContentByUrl( "/M/" + name, value, 
+
+    return this->getContentByUrl( "/M/" + name, value,
 				  contentLength, contentType);
   }
-  
+
   string Reader::getTitle() {
     string value;
     this->getMetatag("Title", value);
@@ -256,7 +288,7 @@ namespace kiwix {
   string Reader::getDescription() {
     string value;
     this->getMetatag("Description", value);
-    
+
     /* Mediawiki Collection tends to use the "Subtitle" name */
     if (value.empty()) {
       this->getMetatag("Subtitle", value);
@@ -289,34 +321,61 @@ namespace kiwix {
     return value;
   }
 
+  string Reader::getOrigID() {
+    string value;
+    this->getMetatag("startfileuid", value);
+    if(value.empty())
+        return "";
+    std::string id=value;
+    std::string origID;
+    std::string temp="";
+    unsigned int k=0;
+    char tempArray[16]="";
+    for(unsigned int i=0; i<id.size(); i++)
+    {
+        if(id[i]=='\n')
+        {
+            tempArray[k]= atoi(temp.c_str());
+            temp="";
+            k++;
+        }
+        else
+        {
+            temp+=id[i];
+        }
+    }
+    origID=hexUUID(tempArray);
+    return origID;
+  }
+
   /* Return the first page URL */
   string Reader::getFirstPageUrl() {
     string url;
-    
+
     zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
     zim::Article article = zimFileHandler->getArticle(firstPageOffset);
     url = article.getLongUrl();
-    
+
     return url;
   }
-  
+
   bool Reader::parseUrl(const string &url, char *ns, string &title) {
     /* Offset to visit the url */
     unsigned int urlLength = url.size();
     unsigned int offset = 0;
-    
+
     /* Ignore the '/' */
     while ((offset < urlLength) && (url[offset] == '/')) offset++;
-    
+
     /* Get namespace */
     while ((offset < urlLength) && (url[offset] != '/')) {
       *ns= url[offset];
       offset++;
     }
-    
+
     /* Ignore the '/' */
-    while ((offset < urlLength) && (url[offset] == '/')) offset++;  
-      
+    while ((offset < urlLength) && (url[offset] == '/')) offset++;
+
     /* Get content title */
     unsigned int titleOffset = offset;
     while (offset < urlLength) {
@@ -338,7 +397,7 @@ namespace kiwix {
     contentLength = 0;
 
     if (this->zimFileHandler != NULL) {
-      
+
       /* Parse the url */
       char ns = 0;
       string titleStr;
@@ -348,48 +407,48 @@ namespace kiwix {
       if (titleStr.empty() && ns == 0) {
 	this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
       }
-      
+
       /* Extract the content from the zim file */
       std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
-      
+
       /* Test if the article was found */
       if (resultPair.first == true) {
-	
+
 	/* Get the article */
 	zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex());
-	
+
 	/* If redirect */
 	unsigned int loopCounter = 0;
 	while (article.isRedirect() && loopCounter++<42) {
 	  article = article.getRedirectArticle();
 	}
-	
+
 	/* Get the content mime-type */
-	contentType = string(article.getMimeType().data(), article.getMimeType().size()); 
-	
+	contentType = string(article.getMimeType().data(), article.getMimeType().size());
+
 	/* Get the data */
 	content = string(article.getData().data(), article.getArticleSize());
-	
+
 	/* Try to set a stub HTML header/footer if necesssary */
 	if (contentType == "text/html" && std::string::npos == content.find("<body>")) {
 	  content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
 	}
-	
+
 	/* Get the data length */
 	contentLength = article.getArticleSize();
-	
+
 	/* Set return value */
 	retVal = true;
       }
     }
-    
+
     return retVal;
   }
-  
+
   /* Search titles by prefix */
   bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
     bool retVal = false;
-    zim::File::const_iterator articleItr; 
+    zim::File::const_iterator articleItr;
     std::vector<std::string>::iterator suggestionItr;
     int result;
 
@@ -400,16 +459,16 @@ namespace kiwix {
 
     if (prefix.size()) {
       for (articleItr = zimFileHandler->findByTitle('A', prefix);
-	   articleItr != zimFileHandler->end() && 
-	     articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && 
-	     this->suggestions.size() < suggestionsCount ; 
+	   articleItr != zimFileHandler->end() &&
+	     articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
+	     this->suggestions.size() < suggestionsCount ;
 	   ++articleItr) {
 
 	  if (this->suggestions.size() == 0) {
 	    this->suggestions.push_back(articleItr->getTitle());
 	  } else {
-	    for (suggestionItr = this->suggestions.begin() ; 
-		 suggestionItr != this->suggestions.end(); 
+	    for (suggestionItr = this->suggestions.begin() ;
+		 suggestionItr != this->suggestions.end();
 		 ++suggestionItr) {
 
 	      result = articleItr->getTitle().compare(*suggestionItr);
@@ -425,25 +484,25 @@ namespace kiwix {
 	      this->suggestions.push_back(articleItr->getTitle());
 	    }
 	  }
-	  
+
 	  /* Suggestions where found */
 	  retVal = true;
       }
     }
-    
+
     /* Set the cursor to the begining */
     this->suggestionsOffset = this->suggestions.begin();
-    
+
     return retVal;
   }
-  
+
   /* Try also a few variations of the prefix to have better results */
   bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) {
     std::string myPrefix = prefix;
 
     /* Normal suggestion request */
     bool retVal = this->searchSuggestions(prefix, suggestionsCount, true);
-    
+
     /* Try with first letter uppercase */
     myPrefix = kiwix::ucFirst(myPrefix);
     this->searchSuggestions(myPrefix, suggestionsCount, false);
@@ -460,10 +519,10 @@ namespace kiwix {
     if (this->suggestionsOffset != this->suggestions.end()) {
       /* title */
       title = *(this->suggestionsOffset);
-      
+
       /* increment the cursor for the next call */
       this->suggestionsOffset++;
-      
+
       return true;
     }
 
@@ -492,7 +551,7 @@ namespace kiwix {
   unsigned int Reader::getFileSize() {
     zim::File *file = this->getZimFileHandler();
     zim::offset_type size = 0;
-    
+
     if (file != NULL) {
       size = file->getFilesize();
     }
diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h
index dec5c8be7..5325e5be0 100644
--- a/src/common/kiwix/reader.h
+++ b/src/common/kiwix/reader.h
@@ -38,7 +38,7 @@ using namespace std;
 namespace kiwix {
 
   class Reader {
-    
+
   public:
     Reader(const string zimFilePath);
     ~Reader();
@@ -58,6 +58,7 @@ namespace kiwix {
     string getDate();
     string getCreator();
     string getPublisher();
+    string getOrigID();
     bool getFavicon(string &content, string &mimeType);
     bool getPageUrlFromTitle(const string &title, string &url);
     bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
@@ -69,7 +70,7 @@ namespace kiwix {
     bool parseUrl(const string &url, char *ns, string &title);
     unsigned int getFileSize();
     zim::File* getZimFileHandler();
-    
+
   protected:
     zim::File* zimFileHandler;
     zim::size_type firstArticleOffset;
@@ -77,7 +78,7 @@ namespace kiwix {
     zim::size_type currentArticleOffset;
     zim::size_type nsACount;
     zim::size_type nsICount;
-    
+
     std::vector<std::string> suggestions;
     std::vector<std::string>::iterator suggestionsOffset;