Permanant fix for bug651.

Kiwix manager class is back to original state, additional function for obtaining origID has been added to Kiwix::Reader class.
Kiwix-manage and Kiwix-serve back to normal.
This commit is contained in:
Kiran Mathew Koshy 2013-09-20 18:35:20 +05:30
parent 2f89926616
commit 14a4394f6d
4 changed files with 138 additions and 79 deletions

View File

@ -225,7 +225,7 @@ namespace kiwix {
/* Add a book to the library. Return empty string if failed, book id otherwise */ /* Add a book to the library. Return empty string if failed, book id otherwise */
string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave,
const string url, const bool checkMetaData, const string origId) { const string url, const bool checkMetaData) {
kiwix::Book book; kiwix::Book book;
if (this->readBookFromPath(pathToOpen, &book)) { if (this->readBookFromPath(pathToOpen, &book)) {
@ -239,7 +239,6 @@ namespace kiwix {
if (!checkMetaData || if (!checkMetaData ||
(checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) { (checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) {
book.url = url; book.url = url;
book.origID=origId;
library.addBook(book); library.addBook(book);
return book.id; return book.id;
} }
@ -249,8 +248,8 @@ namespace kiwix {
} }
/* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */
bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData, const string origId) { bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) {
return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData, origId).empty()); return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty());
} }
bool Manager::readBookFromPath(const string path, kiwix::Book *book) { bool Manager::readBookFromPath(const string path, kiwix::Book *book) {
@ -267,7 +266,7 @@ namespace kiwix {
book->creator = reader->getCreator(); book->creator = reader->getCreator();
book->publisher = reader->getPublisher(); book->publisher = reader->getPublisher();
book->title = reader->getTitle(); book->title = reader->getTitle();
book->origID=reader->getOrigID();
std::ostringstream articleCountStream; std::ostringstream articleCountStream;
articleCountStream << reader->getArticleCount(); articleCountStream << reader->getArticleCount();
book->articleCount = articleCountStream.str(); book->articleCount = articleCountStream.str();

View File

@ -56,9 +56,9 @@ namespace kiwix {
bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookIndex(const string id, const string path, const supportedIndexType type);
bool setBookPath(const string id, const string path); bool setBookPath(const string id, const string path);
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
const bool checkMetaData = false, const string origID=""); const bool checkMetaData = false);
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
const bool checkMetaData = false, const string origID=""); const bool checkMetaData = false);
Library cloneLibrary(); Library cloneLibrary();
bool getBookById(const string id, Book &book); bool getBookById(const string id, Book &book);
bool getCurrentBook(Book &book); bool getCurrentBook(Book &book);

View File

@ -19,6 +19,38 @@
#include "reader.h" #include "reader.h"
inline char hi(char v) {
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}
inline char lo(char v) {
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}
std::string hexUUID (std::string in) {
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 4; n < 6; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 6; n < 8; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 8; n < 10; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 10; n < 16; ++n)
out << hi(in[n]) << lo(in[n]);
std::string op=out.str();
return op;
}
static char charFromHex(std::string a) { static char charFromHex(std::string a) {
std::istringstream Blat (a); std::istringstream Blat (a);
int Z; int Z;
@ -28,7 +60,7 @@ static char charFromHex(std::string a) {
void unescapeUrl(string &url) { void unescapeUrl(string &url) {
std::string::size_type pos = 0; std::string::size_type pos = 0;
while ((pos = url.find('%', pos + 1)) != std::string::npos && while ((pos = url.find('%', pos + 1)) != std::string::npos &&
pos + 3 <= url.length()) { pos + 3 <= url.length()) {
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
} }
@ -38,14 +70,14 @@ void unescapeUrl(string &url) {
namespace kiwix { namespace kiwix {
/* Constructor */ /* Constructor */
Reader::Reader(const string zimFilePath) Reader::Reader(const string zimFilePath)
: zimFileHandler(NULL) { : zimFileHandler(NULL) {
string tmpZimFilePath = zimFilePath; string tmpZimFilePath = zimFilePath;
/* Remove potential trailing zimaa */ /* Remove potential trailing zimaa */
size_t found = tmpZimFilePath.rfind("zimaa"); size_t found = tmpZimFilePath.rfind("zimaa");
if (found != string::npos && if (found != string::npos &&
tmpZimFilePath.size() > 5 && tmpZimFilePath.size() > 5 &&
found == tmpZimFilePath.size() - 5) { found == tmpZimFilePath.size() - 5) {
tmpZimFilePath.resize(tmpZimFilePath.size() - 2); tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
} }
@ -63,7 +95,7 @@ namespace kiwix {
/* initialize random seed: */ /* initialize random seed: */
srand ( time(NULL) ); srand ( time(NULL) );
} }
/* Destructor */ /* Destructor */
Reader::~Reader() { Reader::~Reader() {
if (this->zimFileHandler != NULL) { if (this->zimFileHandler != NULL) {
@ -74,7 +106,7 @@ namespace kiwix {
zim::File* Reader::getZimFileHandler() { zim::File* Reader::getZimFileHandler() {
return this->zimFileHandler; return this->zimFileHandler;
} }
/* Reset the cursor for GetNextArticle() */ /* Reset the cursor for GetNextArticle() */
void Reader::reset() { void Reader::reset() {
this->currentArticleOffset = this->firstArticleOffset; this->currentArticleOffset = this->firstArticleOffset;
@ -101,12 +133,12 @@ namespace kiwix {
return counters; return counters;
} }
/* Get the count of articles which can be indexed/displayed */ /* Get the count of articles which can be indexed/displayed */
unsigned int Reader::getArticleCount() { unsigned int Reader::getArticleCount() {
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata(); std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) { if (counterMap.empty()) {
counter = this->nsACount; counter = this->nsACount;
} else { } else {
@ -114,7 +146,7 @@ namespace kiwix {
if (it != counterMap.end()) if (it != counterMap.end())
counter = it->second; counter = it->second;
} }
return counter; return counter;
} }
@ -140,10 +172,10 @@ namespace kiwix {
if (it != counterMap.end()) if (it != counterMap.end())
counter += it->second; counter += it->second;
} }
return counter; return counter;
} }
/* Get the total of all items of a ZIM file, redirects included */ /* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() { unsigned int Reader::getGlobalCount() {
return this->zimFileHandler->getCountArticles(); return this->zimFileHandler->getCountArticles();
@ -155,7 +187,7 @@ namespace kiwix {
s << this->zimFileHandler->getFileheader().getUuid(); s << this->zimFileHandler->getFileheader().getUuid();
return s.str(); return s.str();
} }
/* Return a page url from a title */ /* Return a page url from a title */
bool Reader::getPageUrlFromTitle(const string &title, string &url) { bool Reader::getPageUrlFromTitle(const string &title, string &url) {
/* Extract the content from the zim file */ /* Extract the content from the zim file */
@ -163,7 +195,7 @@ namespace kiwix {
/* Test if the article was found */ /* Test if the article was found */
if (resultPair.first == true) { if (resultPair.first == true) {
/* Get the article */ /* Get the article */
zim::Article article = *resultPair.second; zim::Article article = *resultPair.second;
@ -172,7 +204,7 @@ namespace kiwix {
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++<42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
url = article.getLongUrl(); url = article.getLongUrl();
return true; return true;
} }
@ -182,53 +214,53 @@ namespace kiwix {
/* Return an URL from a title*/ /* Return an URL from a title*/
string Reader::getRandomPageUrl() { string Reader::getRandomPageUrl() {
zim::size_type idx = this->firstArticleOffset + zim::size_type idx = this->firstArticleOffset +
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
zim::Article article = zimFileHandler->getArticle(idx); zim::Article article = zimFileHandler->getArticle(idx);
return article.getLongUrl().c_str(); return article.getLongUrl().c_str();
} }
/* Return the welcome page URL */ /* Return the welcome page URL */
string Reader::getMainPageUrl() { string Reader::getMainPageUrl() {
string url = ""; string url = "";
if (this->zimFileHandler->getFileheader().hasMainPage()) { if (this->zimFileHandler->getFileheader().hasMainPage()) {
zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage());
url = article.getLongUrl(); url = article.getLongUrl();
if (url.empty()) { if (url.empty()) {
url = getFirstPageUrl(); url = getFirstPageUrl();
} }
} else { } else {
url = getFirstPageUrl(); url = getFirstPageUrl();
} }
return url; return url;
} }
bool Reader::getFavicon(string &content, string &mimeType) { bool Reader::getFavicon(string &content, string &mimeType) {
unsigned int contentLength = 0; unsigned int contentLength = 0;
this->getContentByUrl( "/-/favicon.png", content, this->getContentByUrl( "/-/favicon.png", content,
contentLength, mimeType); contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/I/favicon.png", content, this->getContentByUrl( "/I/favicon.png", content,
contentLength, mimeType); contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/I/favicon", content, this->getContentByUrl( "/I/favicon", content,
contentLength, mimeType); contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/-/favicon", content, this->getContentByUrl( "/-/favicon", content,
contentLength, mimeType); contentLength, mimeType);
} }
} }
} }
return content.empty() ? false : true; return content.empty() ? false : true;
} }
@ -236,11 +268,11 @@ namespace kiwix {
bool Reader::getMetatag(const string &name, string &value) { bool Reader::getMetatag(const string &name, string &value) {
unsigned int contentLength = 0; unsigned int contentLength = 0;
string contentType = ""; string contentType = "";
return this->getContentByUrl( "/M/" + name, value, return this->getContentByUrl( "/M/" + name, value,
contentLength, contentType); contentLength, contentType);
} }
string Reader::getTitle() { string Reader::getTitle() {
string value; string value;
this->getMetatag("Title", value); this->getMetatag("Title", value);
@ -256,7 +288,7 @@ namespace kiwix {
string Reader::getDescription() { string Reader::getDescription() {
string value; string value;
this->getMetatag("Description", value); this->getMetatag("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */ /* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) { if (value.empty()) {
this->getMetatag("Subtitle", value); this->getMetatag("Subtitle", value);
@ -289,34 +321,61 @@ namespace kiwix {
return value; return value;
} }
string Reader::getOrigID() {
string value;
this->getMetatag("startfileuid", value);
if(value.empty())
return "";
std::string id=value;
std::string origID;
std::string temp="";
unsigned int k=0;
char tempArray[16]="";
for(unsigned int i=0; i<id.size(); i++)
{
if(id[i]=='\n')
{
tempArray[k]= atoi(temp.c_str());
temp="";
k++;
}
else
{
temp+=id[i];
}
}
origID=hexUUID(tempArray);
return origID;
}
/* Return the first page URL */ /* Return the first page URL */
string Reader::getFirstPageUrl() { string Reader::getFirstPageUrl() {
string url; string url;
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
zim::Article article = zimFileHandler->getArticle(firstPageOffset); zim::Article article = zimFileHandler->getArticle(firstPageOffset);
url = article.getLongUrl(); url = article.getLongUrl();
return url; return url;
} }
bool Reader::parseUrl(const string &url, char *ns, string &title) { bool Reader::parseUrl(const string &url, char *ns, string &title) {
/* Offset to visit the url */ /* Offset to visit the url */
unsigned int urlLength = url.size(); unsigned int urlLength = url.size();
unsigned int offset = 0; unsigned int offset = 0;
/* Ignore the '/' */ /* Ignore the '/' */
while ((offset < urlLength) && (url[offset] == '/')) offset++; while ((offset < urlLength) && (url[offset] == '/')) offset++;
/* Get namespace */ /* Get namespace */
while ((offset < urlLength) && (url[offset] != '/')) { while ((offset < urlLength) && (url[offset] != '/')) {
*ns= url[offset]; *ns= url[offset];
offset++; offset++;
} }
/* Ignore the '/' */ /* Ignore the '/' */
while ((offset < urlLength) && (url[offset] == '/')) offset++; while ((offset < urlLength) && (url[offset] == '/')) offset++;
/* Get content title */ /* Get content title */
unsigned int titleOffset = offset; unsigned int titleOffset = offset;
while (offset < urlLength) { while (offset < urlLength) {
@ -338,7 +397,7 @@ namespace kiwix {
contentLength = 0; contentLength = 0;
if (this->zimFileHandler != NULL) { if (this->zimFileHandler != NULL) {
/* Parse the url */ /* Parse the url */
char ns = 0; char ns = 0;
string titleStr; string titleStr;
@ -348,48 +407,48 @@ namespace kiwix {
if (titleStr.empty() && ns == 0) { if (titleStr.empty() && ns == 0) {
this->parseUrl(this->getMainPageUrl(), &ns, titleStr); this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
} }
/* Extract the content from the zim file */ /* Extract the content from the zim file */
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr); std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
/* Test if the article was found */ /* Test if the article was found */
if (resultPair.first == true) { if (resultPair.first == true) {
/* Get the article */ /* Get the article */
zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex()); zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex());
/* If redirect */ /* If redirect */
unsigned int loopCounter = 0; unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++<42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
/* Get the content mime-type */ /* Get the content mime-type */
contentType = string(article.getMimeType().data(), article.getMimeType().size()); contentType = string(article.getMimeType().data(), article.getMimeType().size());
/* Get the data */ /* Get the data */
content = string(article.getData().data(), article.getArticleSize()); content = string(article.getData().data(), article.getArticleSize());
/* Try to set a stub HTML header/footer if necesssary */ /* Try to set a stub HTML header/footer if necesssary */
if (contentType == "text/html" && std::string::npos == content.find("<body>")) { if (contentType == "text/html" && std::string::npos == content.find("<body>")) {
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>"; content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
} }
/* Get the data length */ /* Get the data length */
contentLength = article.getArticleSize(); contentLength = article.getArticleSize();
/* Set return value */ /* Set return value */
retVal = true; retVal = true;
} }
} }
return retVal; return retVal;
} }
/* Search titles by prefix */ /* Search titles by prefix */
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
bool retVal = false; bool retVal = false;
zim::File::const_iterator articleItr; zim::File::const_iterator articleItr;
std::vector<std::string>::iterator suggestionItr; std::vector<std::string>::iterator suggestionItr;
int result; int result;
@ -400,16 +459,16 @@ namespace kiwix {
if (prefix.size()) { if (prefix.size()) {
for (articleItr = zimFileHandler->findByTitle('A', prefix); for (articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end() && articleItr != zimFileHandler->end() &&
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
this->suggestions.size() < suggestionsCount ; this->suggestions.size() < suggestionsCount ;
++articleItr) { ++articleItr) {
if (this->suggestions.size() == 0) { if (this->suggestions.size() == 0) {
this->suggestions.push_back(articleItr->getTitle()); this->suggestions.push_back(articleItr->getTitle());
} else { } else {
for (suggestionItr = this->suggestions.begin() ; for (suggestionItr = this->suggestions.begin() ;
suggestionItr != this->suggestions.end(); suggestionItr != this->suggestions.end();
++suggestionItr) { ++suggestionItr) {
result = articleItr->getTitle().compare(*suggestionItr); result = articleItr->getTitle().compare(*suggestionItr);
@ -425,25 +484,25 @@ namespace kiwix {
this->suggestions.push_back(articleItr->getTitle()); this->suggestions.push_back(articleItr->getTitle());
} }
} }
/* Suggestions where found */ /* Suggestions where found */
retVal = true; retVal = true;
} }
} }
/* Set the cursor to the begining */ /* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin(); this->suggestionsOffset = this->suggestions.begin();
return retVal; return retVal;
} }
/* Try also a few variations of the prefix to have better results */ /* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) { bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) {
std::string myPrefix = prefix; std::string myPrefix = prefix;
/* Normal suggestion request */ /* Normal suggestion request */
bool retVal = this->searchSuggestions(prefix, suggestionsCount, true); bool retVal = this->searchSuggestions(prefix, suggestionsCount, true);
/* Try with first letter uppercase */ /* Try with first letter uppercase */
myPrefix = kiwix::ucFirst(myPrefix); myPrefix = kiwix::ucFirst(myPrefix);
this->searchSuggestions(myPrefix, suggestionsCount, false); this->searchSuggestions(myPrefix, suggestionsCount, false);
@ -460,10 +519,10 @@ namespace kiwix {
if (this->suggestionsOffset != this->suggestions.end()) { if (this->suggestionsOffset != this->suggestions.end()) {
/* title */ /* title */
title = *(this->suggestionsOffset); title = *(this->suggestionsOffset);
/* increment the cursor for the next call */ /* increment the cursor for the next call */
this->suggestionsOffset++; this->suggestionsOffset++;
return true; return true;
} }
@ -492,7 +551,7 @@ namespace kiwix {
unsigned int Reader::getFileSize() { unsigned int Reader::getFileSize() {
zim::File *file = this->getZimFileHandler(); zim::File *file = this->getZimFileHandler();
zim::offset_type size = 0; zim::offset_type size = 0;
if (file != NULL) { if (file != NULL) {
size = file->getFilesize(); size = file->getFilesize();
} }

View File

@ -38,7 +38,7 @@ using namespace std;
namespace kiwix { namespace kiwix {
class Reader { class Reader {
public: public:
Reader(const string zimFilePath); Reader(const string zimFilePath);
~Reader(); ~Reader();
@ -58,6 +58,7 @@ namespace kiwix {
string getDate(); string getDate();
string getCreator(); string getCreator();
string getPublisher(); string getPublisher();
string getOrigID();
bool getFavicon(string &content, string &mimeType); bool getFavicon(string &content, string &mimeType);
bool getPageUrlFromTitle(const string &title, string &url); bool getPageUrlFromTitle(const string &title, string &url);
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
@ -69,7 +70,7 @@ namespace kiwix {
bool parseUrl(const string &url, char *ns, string &title); bool parseUrl(const string &url, char *ns, string &title);
unsigned int getFileSize(); unsigned int getFileSize();
zim::File* getZimFileHandler(); zim::File* getZimFileHandler();
protected: protected:
zim::File* zimFileHandler; zim::File* zimFileHandler;
zim::size_type firstArticleOffset; zim::size_type firstArticleOffset;
@ -77,7 +78,7 @@ namespace kiwix {
zim::size_type currentArticleOffset; zim::size_type currentArticleOffset;
zim::size_type nsACount; zim::size_type nsACount;
zim::size_type nsICount; zim::size_type nsICount;
std::vector<std::string> suggestions; std::vector<std::string> suggestions;
std::vector<std::string>::iterator suggestionsOffset; std::vector<std::string>::iterator suggestionsOffset;