From fb2dfdd67778723a97e574913c8a609b209308aa Mon Sep 17 00:00:00 2001 From: kelson42 Date: Wed, 20 Jan 2010 19:46:13 +0000 Subject: [PATCH] + factorization of the zim reader code --- src/common/kiwix/reader.cpp | 192 ++++++++++++++++++++++++++++++++++++ src/common/kiwix/reader.h | 42 ++++++++ 2 files changed, 234 insertions(+) create mode 100644 src/common/kiwix/reader.cpp create mode 100644 src/common/kiwix/reader.h diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp new file mode 100644 index 000000000..5e2e2d637 --- /dev/null +++ b/src/common/kiwix/reader.cpp @@ -0,0 +1,192 @@ +#include "reader.h" + +namespace kiwix { + + /* Constructor */ + Reader::Reader(const string &zimFilePath) + : zimFileHandler(NULL) { + + try { + this->zimFileHandler = new zim::File(zimFilePath); + + if (this->zimFileHandler != NULL) { + this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A'); + this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A'); + this->currentArticleOffset = this->firstArticleOffset; + this->articleCount = this->zimFileHandler->getNamespaceCount('A'); + } + } catch(...) { + } + } + + /* Destructor */ + Reader::~Reader() { + if (this->zimFileHandler != NULL) { + delete this->zimFileHandler; + } + } + + /* Reset the cursor for GetNextArticle() */ + void Reader::reset() { + this->currentArticleOffset = this->firstArticleOffset; + } + +/* Get the count of articles which can be indexed/displayed */ +unsigned int Reader::getArticleCount() { + return this->articleCount; +} + +/* Return the UID of the ZIM file */ +string Reader::getId() { + return string(this->zimFileHandler->getFileheader().getUuid().data, + this->zimFileHandler->getFileheader().getUuid().size()); +} + +/* Return a random article URL */ +string Reader::getRandomPageUrl() { + zim::size_type idx = this->firstArticleOffset + + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->articleCount); + + zim::Article article = zimFileHandler->getArticle(idx); + return article.getLongUrl().c_str(); +} + +/* Return the welcome page URL */ +string Reader::getMainPageUrl() { + string url = ""; + + if (this->zimFileHandler->getFileheader().hasMainPage()) { + zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); + url = article.getLongUrl(); + } + + return url; + + } + +/* Get a content from a zim file */ + bool Reader::getContent(const string &urlStr, string &content, unsigned int &contentLength, string &contentType) { + + bool retVal = false; + const char *url = urlStr.c_str(); + + /* Offset to visit the url */ + unsigned int urlLength = strlen(url); + unsigned int offset = 0; + + /* Ignore the '/' */ + while((offset < urlLength) && (url[offset] == '/')) offset++; + + /* Get namespace */ + char ns[1024]; + unsigned int nsOffset = 0; + while((offset < urlLength) && (url[offset] != '/')) { + ns[nsOffset] = url[offset]; + offset++; + nsOffset++; + } + ns[nsOffset] = 0; + + /* Ignore the '/' */ + while((offset < urlLength) && (url[offset] == '/')) offset++; + + /* Get content title */ + char title[1024]; + unsigned int titleOffset = 0; + while((offset < urlLength) && (url[offset] != '/')) { + title[titleOffset] = url[offset]; + offset++; + titleOffset++; + } + title[titleOffset] = 0; + + /* Extract the content from the zim file */ + try { + std::pair resultPair = zimFileHandler->findx(ns[0], title); + + /* Test if the article was found */ + if (resultPair.first == true) { + + /* Get the article */ + zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex()); + + /* If redirect */ + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++<42) { + article = article.getRedirectArticle(); + } + + /* Get the content mime-type */ + contentType = string(article.getMimeType().data(), article.getMimeType().size()); + + /* Get the data */ + content = string(article.getData().data(), article.getArticleSize()); + + /* Get the data length */ + contentLength = article.getArticleSize(); + + /* Set return value */ + retVal = true; + } else { + /* The found article is not the good one */ + content=""; + contentType=""; + contentLength = 0; + retVal = false; + } + } catch(...) { + retVal = false; + } + + return retVal; +} + +/* Search titles by prefix*/ + bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount) { + + bool retVal = true; + + /* Reset the suggestions */ + this->suggestions.clear(); + + if (prefix.size()) { + + cout << prefix << endl; + + for (zim::File::const_iterator it = zimFileHandler->findByTitle('A', prefix); + it != zimFileHandler->end() && it->getTitle().compare(0, prefix.size(), prefix) == 0 + && this->suggestions.size() < suggestionsCount ; ++it) { + + this->suggestions.push_back(it->getTitle()); + + cout << " " << it->getTitle() << endl; + } + } else { + retVal = false; + } + + /* Set the cursor to the begining */ + this->suggestionsOffset = this->suggestions.begin(); + + return retVal; +} + +/* Get next suggestion */ +bool Reader::getNextSuggestion(string &title) { + bool retVal = false; + + if (this->suggestionsOffset != this->suggestions.end()) { + /* title */ + title = *(this->suggestionsOffset); + + /* increment the cursor for the next call */ + this->suggestionsOffset++; + + retVal = true; + } + + return retVal; +} + + +} diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h new file mode 100644 index 000000000..de15ede38 --- /dev/null +++ b/src/common/kiwix/reader.h @@ -0,0 +1,42 @@ +#ifndef KIWIX_READER_H +#define KIWIX_READER_H + +#include +#include +#include +#include +#include + +using namespace std; + +namespace kiwix { + + class Reader { + + public: + Reader(const string &zimFilePath); + ~Reader(); + + void reset(); + unsigned int getArticleCount(); + string getId(); + string getRandomPageUrl(); + string getMainPageUrl(); + bool getContent(const string &url, string &content, unsigned int &contentLength, string &contentType); + bool searchSuggestions(const string &prefix, unsigned int suggestionsCount); + bool getNextSuggestion(string &title); + + protected: + zim::File* zimFileHandler; + zim::size_type firstArticleOffset; + zim::size_type lastArticleOffset; + zim::size_type currentArticleOffset; + zim::size_type articleCount; + + std::vector suggestions; + std::vector::iterator suggestionsOffset; + }; + +} + +#endif