mirror of https://github.com/kiwix/libkiwix.git
catching up with master
This commit is contained in:
commit
fd9c9ac17e
|
@ -67,6 +67,7 @@ namespace kiwix {
|
||||||
string publisher;
|
string publisher;
|
||||||
string date;
|
string date;
|
||||||
string url;
|
string url;
|
||||||
|
string origId;
|
||||||
string articleCount;
|
string articleCount;
|
||||||
string mediaCount;
|
string mediaCount;
|
||||||
bool readOnly;
|
bool readOnly;
|
||||||
|
|
|
@ -56,6 +56,7 @@ namespace kiwix {
|
||||||
book.creator = bookNode.attribute("creator").value();
|
book.creator = bookNode.attribute("creator").value();
|
||||||
book.publisher = bookNode.attribute("publisher").value();
|
book.publisher = bookNode.attribute("publisher").value();
|
||||||
book.url = bookNode.attribute("url").value();
|
book.url = bookNode.attribute("url").value();
|
||||||
|
book.origId = bookNode.attribute("origId").value();
|
||||||
book.articleCount = bookNode.attribute("articleCount").value();
|
book.articleCount = bookNode.attribute("articleCount").value();
|
||||||
book.mediaCount = bookNode.attribute("mediaCount").value();
|
book.mediaCount = bookNode.attribute("mediaCount").value();
|
||||||
book.size = bookNode.attribute("size").value();
|
book.size = bookNode.attribute("size").value();
|
||||||
|
@ -154,41 +155,46 @@ namespace kiwix {
|
||||||
bookNode.append_attribute("indexType") = "xapian";
|
bookNode.append_attribute("indexType") = "xapian";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!itr->title.empty())
|
if (itr->origId.empty()) {
|
||||||
bookNode.append_attribute("title") = itr->title.c_str();
|
if (!itr->title.empty())
|
||||||
|
bookNode.append_attribute("title") = itr->title.c_str();
|
||||||
|
|
||||||
|
if (!itr->description.empty())
|
||||||
|
bookNode.append_attribute("description") = itr->description.c_str();
|
||||||
|
|
||||||
|
if (!itr->language.empty())
|
||||||
|
bookNode.append_attribute("language") = itr->language.c_str();
|
||||||
|
|
||||||
|
if (!itr->creator.empty())
|
||||||
|
bookNode.append_attribute("creator") = itr->creator.c_str();
|
||||||
|
|
||||||
|
if (!itr->publisher.empty())
|
||||||
|
bookNode.append_attribute("publisher") = itr->publisher.c_str();
|
||||||
|
|
||||||
|
if (!itr->favicon.empty())
|
||||||
|
bookNode.append_attribute("favicon") = itr->favicon.c_str();
|
||||||
|
|
||||||
|
if (!itr->faviconMimeType.empty())
|
||||||
|
bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
if (itr->description != "")
|
if (!itr->date.empty())
|
||||||
bookNode.append_attribute("description") = itr->description.c_str();
|
|
||||||
|
|
||||||
if (itr->language != "")
|
|
||||||
bookNode.append_attribute("language") = itr->language.c_str();
|
|
||||||
|
|
||||||
if (itr->date != "")
|
|
||||||
bookNode.append_attribute("date") = itr->date.c_str();
|
bookNode.append_attribute("date") = itr->date.c_str();
|
||||||
|
|
||||||
if (itr->creator != "")
|
if (!itr->url.empty())
|
||||||
bookNode.append_attribute("creator") = itr->creator.c_str();
|
|
||||||
|
|
||||||
if (itr->publisher != "")
|
|
||||||
bookNode.append_attribute("publisher") = itr->publisher.c_str();
|
|
||||||
|
|
||||||
if (itr->url != "")
|
|
||||||
bookNode.append_attribute("url") = itr->url.c_str();
|
bookNode.append_attribute("url") = itr->url.c_str();
|
||||||
|
|
||||||
if (itr->articleCount != "")
|
if (!itr->origId.empty())
|
||||||
|
bookNode.append_attribute("origId") = itr->origId.c_str();
|
||||||
|
|
||||||
|
if (!itr->articleCount.empty())
|
||||||
bookNode.append_attribute("articleCount") = itr->articleCount.c_str();
|
bookNode.append_attribute("articleCount") = itr->articleCount.c_str();
|
||||||
|
|
||||||
if (itr->mediaCount != "")
|
if (!itr->mediaCount.empty())
|
||||||
bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str();
|
bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str();
|
||||||
|
|
||||||
if (itr->size != "")
|
if (!itr->size.empty())
|
||||||
bookNode.append_attribute("size") = itr->size.c_str();
|
bookNode.append_attribute("size") = itr->size.c_str();
|
||||||
|
|
||||||
if (itr->favicon != "")
|
|
||||||
bookNode.append_attribute("favicon") = itr->favicon.c_str();
|
|
||||||
|
|
||||||
if (itr->faviconMimeType != "")
|
|
||||||
bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,7 +262,7 @@ namespace kiwix {
|
||||||
book->creator = reader->getCreator();
|
book->creator = reader->getCreator();
|
||||||
book->publisher = reader->getPublisher();
|
book->publisher = reader->getPublisher();
|
||||||
book->title = reader->getTitle();
|
book->title = reader->getTitle();
|
||||||
|
book->origId = reader->getOrigId();
|
||||||
std::ostringstream articleCountStream;
|
std::ostringstream articleCountStream;
|
||||||
articleCountStream << reader->getArticleCount();
|
articleCountStream << reader->getArticleCount();
|
||||||
book->articleCount = articleCountStream.str();
|
book->articleCount = articleCountStream.str();
|
||||||
|
@ -307,10 +313,12 @@ namespace kiwix {
|
||||||
std::map<string, bool> booksLanguagesMap;
|
std::map<string, bool> booksLanguagesMap;
|
||||||
|
|
||||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage);
|
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage);
|
||||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||||
if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) {
|
if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) {
|
||||||
booksLanguagesMap[itr->language] = true;
|
if (itr->origId.empty()) {
|
||||||
booksLanguages.push_back(itr->language);
|
booksLanguagesMap[itr->language] = true;
|
||||||
|
booksLanguages.push_back(itr->language);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,10 +331,12 @@ namespace kiwix {
|
||||||
std::map<string, bool> booksCreatorsMap;
|
std::map<string, bool> booksCreatorsMap;
|
||||||
|
|
||||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
|
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
|
||||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
|
||||||
if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) {
|
if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) {
|
||||||
booksCreatorsMap[itr->creator] = true;
|
if (itr->origId.empty()) {
|
||||||
booksCreators.push_back(itr->creator);
|
booksCreatorsMap[itr->creator] = true;
|
||||||
|
booksCreators.push_back(itr->creator);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -353,8 +363,10 @@ namespace kiwix {
|
||||||
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
|
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
|
||||||
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
|
||||||
if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) {
|
if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) {
|
||||||
booksPublishersMap[itr->publisher] = true;
|
if (itr->origId.empty()) {
|
||||||
booksPublishers.push_back(itr->publisher);
|
booksPublishersMap[itr->publisher] = true;
|
||||||
|
booksPublishers.push_back(itr->publisher);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ namespace kiwix {
|
||||||
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
|
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
|
||||||
|
|
||||||
class Manager {
|
class Manager {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Manager();
|
Manager();
|
||||||
~Manager();
|
~Manager();
|
||||||
|
@ -55,9 +55,9 @@ namespace kiwix {
|
||||||
string getCurrentBookId();
|
string getCurrentBookId();
|
||||||
bool setBookIndex(const string id, const string path, const supportedIndexType type);
|
bool setBookIndex(const string id, const string path, const supportedIndexType type);
|
||||||
bool setBookPath(const string id, const string path);
|
bool setBookPath(const string id, const string path);
|
||||||
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
|
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
|
||||||
const bool checkMetaData = false);
|
const bool checkMetaData = false);
|
||||||
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
|
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
|
||||||
const bool checkMetaData = false);
|
const bool checkMetaData = false);
|
||||||
Library cloneLibrary();
|
Library cloneLibrary();
|
||||||
bool getBookById(const string id, Book &book);
|
bool getBookById(const string id, Book &book);
|
||||||
|
@ -65,7 +65,7 @@ namespace kiwix {
|
||||||
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
|
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
|
||||||
bool updateBookLastOpenDateById(const string id);
|
bool updateBookLastOpenDateById(const string id);
|
||||||
void removeBookPaths();
|
void removeBookPaths();
|
||||||
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize,
|
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize,
|
||||||
const string language, const string creator, const string publisher, const string search);
|
const string language, const string creator, const string publisher, const string search);
|
||||||
vector<string> getBooksLanguages();
|
vector<string> getBooksLanguages();
|
||||||
vector<string> getBooksCreators();
|
vector<string> getBooksCreators();
|
||||||
|
@ -75,10 +75,10 @@ namespace kiwix {
|
||||||
string writableLibraryPath;
|
string writableLibraryPath;
|
||||||
|
|
||||||
vector<std::string> bookIdList;
|
vector<std::string> bookIdList;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
kiwix::Library library;
|
kiwix::Library library;
|
||||||
|
|
||||||
bool readBookFromPath(const string path, Book *book = NULL);
|
bool readBookFromPath(const string path, Book *book = NULL);
|
||||||
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath);
|
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath);
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,38 @@
|
||||||
|
|
||||||
#include "reader.h"
|
#include "reader.h"
|
||||||
|
|
||||||
|
inline char hi(char v) {
|
||||||
|
char hex[] = "0123456789abcdef";
|
||||||
|
return hex[(v >> 4) & 0xf];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline char lo(char v) {
|
||||||
|
char hex[] = "0123456789abcdef";
|
||||||
|
return hex[v & 0xf];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string hexUUID (std::string in) {
|
||||||
|
std::ostringstream out;
|
||||||
|
for (unsigned n = 0; n < 4; ++n)
|
||||||
|
out << hi(in[n]) << lo(in[n]);
|
||||||
|
out << '-';
|
||||||
|
for (unsigned n = 4; n < 6; ++n)
|
||||||
|
out << hi(in[n]) << lo(in[n]);
|
||||||
|
out << '-';
|
||||||
|
for (unsigned n = 6; n < 8; ++n)
|
||||||
|
out << hi(in[n]) << lo(in[n]);
|
||||||
|
out << '-';
|
||||||
|
for (unsigned n = 8; n < 10; ++n)
|
||||||
|
out << hi(in[n]) << lo(in[n]);
|
||||||
|
out << '-';
|
||||||
|
for (unsigned n = 10; n < 16; ++n)
|
||||||
|
out << hi(in[n]) << lo(in[n]);
|
||||||
|
std::string op=out.str();
|
||||||
|
return op;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static char charFromHex(std::string a) {
|
static char charFromHex(std::string a) {
|
||||||
std::istringstream Blat (a);
|
std::istringstream Blat (a);
|
||||||
int Z;
|
int Z;
|
||||||
|
@ -28,9 +60,10 @@ static char charFromHex(std::string a) {
|
||||||
|
|
||||||
void unescapeUrl(string &url) {
|
void unescapeUrl(string &url) {
|
||||||
std::string::size_type pos = 0;
|
std::string::size_type pos = 0;
|
||||||
while ((pos = url.find('%', pos + 1)) != std::string::npos &&
|
while ((pos = url.find('%', pos)) != std::string::npos &&
|
||||||
pos + 3 <= url.length()) {
|
pos + 2 < url.length()) {
|
||||||
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
|
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
|
||||||
|
++pos;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -38,14 +71,14 @@ void unescapeUrl(string &url) {
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
Reader::Reader(const string zimFilePath)
|
Reader::Reader(const string zimFilePath)
|
||||||
: zimFileHandler(NULL) {
|
: zimFileHandler(NULL) {
|
||||||
string tmpZimFilePath = zimFilePath;
|
string tmpZimFilePath = zimFilePath;
|
||||||
|
|
||||||
/* Remove potential trailing zimaa */
|
/* Remove potential trailing zimaa */
|
||||||
size_t found = tmpZimFilePath.rfind("zimaa");
|
size_t found = tmpZimFilePath.rfind("zimaa");
|
||||||
if (found != string::npos &&
|
if (found != string::npos &&
|
||||||
tmpZimFilePath.size() > 5 &&
|
tmpZimFilePath.size() > 5 &&
|
||||||
found == tmpZimFilePath.size() - 5) {
|
found == tmpZimFilePath.size() - 5) {
|
||||||
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
|
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
|
||||||
}
|
}
|
||||||
|
@ -63,7 +96,7 @@ namespace kiwix {
|
||||||
/* initialize random seed: */
|
/* initialize random seed: */
|
||||||
srand ( time(NULL) );
|
srand ( time(NULL) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Destructor */
|
/* Destructor */
|
||||||
Reader::~Reader() {
|
Reader::~Reader() {
|
||||||
if (this->zimFileHandler != NULL) {
|
if (this->zimFileHandler != NULL) {
|
||||||
|
@ -74,7 +107,7 @@ namespace kiwix {
|
||||||
zim::File* Reader::getZimFileHandler() {
|
zim::File* Reader::getZimFileHandler() {
|
||||||
return this->zimFileHandler;
|
return this->zimFileHandler;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reset the cursor for GetNextArticle() */
|
/* Reset the cursor for GetNextArticle() */
|
||||||
void Reader::reset() {
|
void Reader::reset() {
|
||||||
this->currentArticleOffset = this->firstArticleOffset;
|
this->currentArticleOffset = this->firstArticleOffset;
|
||||||
|
@ -101,12 +134,12 @@ namespace kiwix {
|
||||||
|
|
||||||
return counters;
|
return counters;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the count of articles which can be indexed/displayed */
|
/* Get the count of articles which can be indexed/displayed */
|
||||||
unsigned int Reader::getArticleCount() {
|
unsigned int Reader::getArticleCount() {
|
||||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||||
unsigned int counter = 0;
|
unsigned int counter = 0;
|
||||||
|
|
||||||
if (counterMap.empty()) {
|
if (counterMap.empty()) {
|
||||||
counter = this->nsACount;
|
counter = this->nsACount;
|
||||||
} else {
|
} else {
|
||||||
|
@ -114,7 +147,7 @@ namespace kiwix {
|
||||||
if (it != counterMap.end())
|
if (it != counterMap.end())
|
||||||
counter = it->second;
|
counter = it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,10 +173,10 @@ namespace kiwix {
|
||||||
if (it != counterMap.end())
|
if (it != counterMap.end())
|
||||||
counter += it->second;
|
counter += it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
return counter;
|
return counter;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the total of all items of a ZIM file, redirects included */
|
/* Get the total of all items of a ZIM file, redirects included */
|
||||||
unsigned int Reader::getGlobalCount() {
|
unsigned int Reader::getGlobalCount() {
|
||||||
return this->zimFileHandler->getCountArticles();
|
return this->zimFileHandler->getCountArticles();
|
||||||
|
@ -155,7 +188,7 @@ namespace kiwix {
|
||||||
s << this->zimFileHandler->getFileheader().getUuid();
|
s << this->zimFileHandler->getFileheader().getUuid();
|
||||||
return s.str();
|
return s.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return a page url from a title */
|
/* Return a page url from a title */
|
||||||
bool Reader::getPageUrlFromTitle(const string &title, string &url) {
|
bool Reader::getPageUrlFromTitle(const string &title, string &url) {
|
||||||
/* Extract the content from the zim file */
|
/* Extract the content from the zim file */
|
||||||
|
@ -163,7 +196,7 @@ namespace kiwix {
|
||||||
|
|
||||||
/* Test if the article was found */
|
/* Test if the article was found */
|
||||||
if (resultPair.first == true) {
|
if (resultPair.first == true) {
|
||||||
|
|
||||||
/* Get the article */
|
/* Get the article */
|
||||||
zim::Article article = *resultPair.second;
|
zim::Article article = *resultPair.second;
|
||||||
|
|
||||||
|
@ -172,7 +205,7 @@ namespace kiwix {
|
||||||
while (article.isRedirect() && loopCounter++<42) {
|
while (article.isRedirect() && loopCounter++<42) {
|
||||||
article = article.getRedirectArticle();
|
article = article.getRedirectArticle();
|
||||||
}
|
}
|
||||||
|
|
||||||
url = article.getLongUrl();
|
url = article.getLongUrl();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -182,53 +215,53 @@ namespace kiwix {
|
||||||
|
|
||||||
/* Return an URL from a title*/
|
/* Return an URL from a title*/
|
||||||
string Reader::getRandomPageUrl() {
|
string Reader::getRandomPageUrl() {
|
||||||
zim::size_type idx = this->firstArticleOffset +
|
zim::size_type idx = this->firstArticleOffset +
|
||||||
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
|
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
|
||||||
zim::Article article = zimFileHandler->getArticle(idx);
|
zim::Article article = zimFileHandler->getArticle(idx);
|
||||||
|
|
||||||
return article.getLongUrl().c_str();
|
return article.getLongUrl().c_str();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the welcome page URL */
|
/* Return the welcome page URL */
|
||||||
string Reader::getMainPageUrl() {
|
string Reader::getMainPageUrl() {
|
||||||
string url = "";
|
string url = "";
|
||||||
|
|
||||||
if (this->zimFileHandler->getFileheader().hasMainPage()) {
|
if (this->zimFileHandler->getFileheader().hasMainPage()) {
|
||||||
zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage());
|
zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage());
|
||||||
url = article.getLongUrl();
|
url = article.getLongUrl();
|
||||||
|
|
||||||
if (url.empty()) {
|
if (url.empty()) {
|
||||||
url = getFirstPageUrl();
|
url = getFirstPageUrl();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
url = getFirstPageUrl();
|
url = getFirstPageUrl();
|
||||||
}
|
}
|
||||||
|
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Reader::getFavicon(string &content, string &mimeType) {
|
bool Reader::getFavicon(string &content, string &mimeType) {
|
||||||
unsigned int contentLength = 0;
|
unsigned int contentLength = 0;
|
||||||
|
|
||||||
this->getContentByUrl( "/-/favicon.png", content,
|
this->getContentByUrl( "/-/favicon.png", content,
|
||||||
contentLength, mimeType);
|
contentLength, mimeType);
|
||||||
|
|
||||||
if (content.empty()) {
|
if (content.empty()) {
|
||||||
this->getContentByUrl( "/I/favicon.png", content,
|
this->getContentByUrl( "/I/favicon.png", content,
|
||||||
contentLength, mimeType);
|
contentLength, mimeType);
|
||||||
|
|
||||||
|
|
||||||
if (content.empty()) {
|
if (content.empty()) {
|
||||||
this->getContentByUrl( "/I/favicon", content,
|
this->getContentByUrl( "/I/favicon", content,
|
||||||
contentLength, mimeType);
|
contentLength, mimeType);
|
||||||
|
|
||||||
if (content.empty()) {
|
if (content.empty()) {
|
||||||
this->getContentByUrl( "/-/favicon", content,
|
this->getContentByUrl( "/-/favicon", content,
|
||||||
contentLength, mimeType);
|
contentLength, mimeType);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return content.empty() ? false : true;
|
return content.empty() ? false : true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,11 +269,11 @@ namespace kiwix {
|
||||||
bool Reader::getMetatag(const string &name, string &value) {
|
bool Reader::getMetatag(const string &name, string &value) {
|
||||||
unsigned int contentLength = 0;
|
unsigned int contentLength = 0;
|
||||||
string contentType = "";
|
string contentType = "";
|
||||||
|
|
||||||
return this->getContentByUrl( "/M/" + name, value,
|
return this->getContentByUrl( "/M/" + name, value,
|
||||||
contentLength, contentType);
|
contentLength, contentType);
|
||||||
}
|
}
|
||||||
|
|
||||||
string Reader::getTitle() {
|
string Reader::getTitle() {
|
||||||
string value;
|
string value;
|
||||||
this->getMetatag("Title", value);
|
this->getMetatag("Title", value);
|
||||||
|
@ -256,7 +289,7 @@ namespace kiwix {
|
||||||
string Reader::getDescription() {
|
string Reader::getDescription() {
|
||||||
string value;
|
string value;
|
||||||
this->getMetatag("Description", value);
|
this->getMetatag("Description", value);
|
||||||
|
|
||||||
/* Mediawiki Collection tends to use the "Subtitle" name */
|
/* Mediawiki Collection tends to use the "Subtitle" name */
|
||||||
if (value.empty()) {
|
if (value.empty()) {
|
||||||
this->getMetatag("Subtitle", value);
|
this->getMetatag("Subtitle", value);
|
||||||
|
@ -289,34 +322,61 @@ namespace kiwix {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string Reader::getOrigId() {
|
||||||
|
string value;
|
||||||
|
this->getMetatag("startfileuid", value);
|
||||||
|
if(value.empty())
|
||||||
|
return "";
|
||||||
|
std::string id=value;
|
||||||
|
std::string origID;
|
||||||
|
std::string temp="";
|
||||||
|
unsigned int k=0;
|
||||||
|
char tempArray[16]="";
|
||||||
|
for(unsigned int i=0; i<id.size(); i++)
|
||||||
|
{
|
||||||
|
if(id[i]=='\n')
|
||||||
|
{
|
||||||
|
tempArray[k]= atoi(temp.c_str());
|
||||||
|
temp="";
|
||||||
|
k++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
temp+=id[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
origID=hexUUID(tempArray);
|
||||||
|
return origID;
|
||||||
|
}
|
||||||
|
|
||||||
/* Return the first page URL */
|
/* Return the first page URL */
|
||||||
string Reader::getFirstPageUrl() {
|
string Reader::getFirstPageUrl() {
|
||||||
string url;
|
string url;
|
||||||
|
|
||||||
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||||
zim::Article article = zimFileHandler->getArticle(firstPageOffset);
|
zim::Article article = zimFileHandler->getArticle(firstPageOffset);
|
||||||
url = article.getLongUrl();
|
url = article.getLongUrl();
|
||||||
|
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Reader::parseUrl(const string &url, char *ns, string &title) {
|
bool Reader::parseUrl(const string &url, char *ns, string &title) {
|
||||||
/* Offset to visit the url */
|
/* Offset to visit the url */
|
||||||
unsigned int urlLength = url.size();
|
unsigned int urlLength = url.size();
|
||||||
unsigned int offset = 0;
|
unsigned int offset = 0;
|
||||||
|
|
||||||
/* Ignore the '/' */
|
/* Ignore the '/' */
|
||||||
while ((offset < urlLength) && (url[offset] == '/')) offset++;
|
while ((offset < urlLength) && (url[offset] == '/')) offset++;
|
||||||
|
|
||||||
/* Get namespace */
|
/* Get namespace */
|
||||||
while ((offset < urlLength) && (url[offset] != '/')) {
|
while ((offset < urlLength) && (url[offset] != '/')) {
|
||||||
*ns= url[offset];
|
*ns= url[offset];
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ignore the '/' */
|
/* Ignore the '/' */
|
||||||
while ((offset < urlLength) && (url[offset] == '/')) offset++;
|
while ((offset < urlLength) && (url[offset] == '/')) offset++;
|
||||||
|
|
||||||
/* Get content title */
|
/* Get content title */
|
||||||
unsigned int titleOffset = offset;
|
unsigned int titleOffset = offset;
|
||||||
while (offset < urlLength) {
|
while (offset < urlLength) {
|
||||||
|
@ -338,7 +398,7 @@ namespace kiwix {
|
||||||
contentLength = 0;
|
contentLength = 0;
|
||||||
|
|
||||||
if (this->zimFileHandler != NULL) {
|
if (this->zimFileHandler != NULL) {
|
||||||
|
|
||||||
/* Parse the url */
|
/* Parse the url */
|
||||||
char ns = 0;
|
char ns = 0;
|
||||||
string titleStr;
|
string titleStr;
|
||||||
|
@ -348,68 +408,72 @@ namespace kiwix {
|
||||||
if (titleStr.empty() && ns == 0) {
|
if (titleStr.empty() && ns == 0) {
|
||||||
this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
|
this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Extract the content from the zim file */
|
/* Extract the content from the zim file */
|
||||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
|
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
|
||||||
|
|
||||||
/* Test if the article was found */
|
/* Test if the article was found */
|
||||||
if (resultPair.first == true) {
|
if (resultPair.first == true) {
|
||||||
|
|
||||||
/* Get the article */
|
/* Get the article */
|
||||||
zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex());
|
zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex());
|
||||||
|
|
||||||
/* If redirect */
|
/* If redirect */
|
||||||
unsigned int loopCounter = 0;
|
unsigned int loopCounter = 0;
|
||||||
while (article.isRedirect() && loopCounter++<42) {
|
while (article.isRedirect() && loopCounter++<42) {
|
||||||
article = article.getRedirectArticle();
|
article = article.getRedirectArticle();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the content mime-type */
|
/* Get the content mime-type */
|
||||||
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||||
|
|
||||||
/* Get the data */
|
/* Get the data */
|
||||||
content = string(article.getData().data(), article.getArticleSize());
|
content = string(article.getData().data(), article.getArticleSize());
|
||||||
|
|
||||||
/* Try to set a stub HTML header/footer if necesssary */
|
/* Try to set a stub HTML header/footer if necesssary */
|
||||||
if (contentType == "text/html" && std::string::npos == content.find("<body>")) {
|
if (contentType == "text/html" && std::string::npos == content.find("<body>")) {
|
||||||
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the data length */
|
/* Get the data length */
|
||||||
contentLength = article.getArticleSize();
|
contentLength = article.getArticleSize();
|
||||||
|
|
||||||
/* Set return value */
|
/* Set return value */
|
||||||
retVal = true;
|
retVal = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Search titles by prefix */
|
/* Search titles by prefix */
|
||||||
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
|
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
|
||||||
bool retVal = false;
|
bool retVal = false;
|
||||||
zim::File::const_iterator articleItr;
|
zim::File::const_iterator articleItr;
|
||||||
std::vector<std::string>::iterator suggestionItr;
|
std::vector<std::string>::iterator suggestionItr;
|
||||||
int result;
|
int result;
|
||||||
|
|
||||||
/* Reset the suggestions */
|
/* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */
|
||||||
if (reset) {
|
if (reset) {
|
||||||
this->suggestions.clear();
|
this->suggestions.clear();
|
||||||
|
} else {
|
||||||
|
if (this->suggestions.size() > suggestionsCount) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (prefix.size()) {
|
if (prefix.size()) {
|
||||||
for (articleItr = zimFileHandler->findByTitle('A', prefix);
|
for (articleItr = zimFileHandler->findByTitle('A', prefix);
|
||||||
articleItr != zimFileHandler->end() &&
|
articleItr != zimFileHandler->end() &&
|
||||||
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
|
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
|
||||||
this->suggestions.size() < suggestionsCount ;
|
this->suggestions.size() < suggestionsCount ;
|
||||||
++articleItr) {
|
++articleItr) {
|
||||||
|
|
||||||
if (this->suggestions.size() == 0) {
|
if (this->suggestions.size() == 0) {
|
||||||
this->suggestions.push_back(articleItr->getTitle());
|
this->suggestions.push_back(articleItr->getTitle());
|
||||||
} else {
|
} else if (this->suggestions.size() < suggestionsCount) {
|
||||||
for (suggestionItr = this->suggestions.begin() ;
|
for (suggestionItr = this->suggestions.begin() ;
|
||||||
suggestionItr != this->suggestions.end();
|
suggestionItr != this->suggestions.end();
|
||||||
++suggestionItr) {
|
++suggestionItr) {
|
||||||
|
|
||||||
result = articleItr->getTitle().compare(*suggestionItr);
|
result = articleItr->getTitle().compare(*suggestionItr);
|
||||||
|
@ -425,25 +489,25 @@ namespace kiwix {
|
||||||
this->suggestions.push_back(articleItr->getTitle());
|
this->suggestions.push_back(articleItr->getTitle());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Suggestions where found */
|
/* Suggestions where found */
|
||||||
retVal = true;
|
retVal = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the cursor to the begining */
|
/* Set the cursor to the begining */
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
this->suggestionsOffset = this->suggestions.begin();
|
||||||
|
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try also a few variations of the prefix to have better results */
|
/* Try also a few variations of the prefix to have better results */
|
||||||
bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) {
|
bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) {
|
||||||
std::string myPrefix = prefix;
|
std::string myPrefix = prefix;
|
||||||
|
|
||||||
/* Normal suggestion request */
|
/* Normal suggestion request */
|
||||||
bool retVal = this->searchSuggestions(prefix, suggestionsCount, true);
|
bool retVal = this->searchSuggestions(prefix, suggestionsCount, true);
|
||||||
|
|
||||||
/* Try with first letter uppercase */
|
/* Try with first letter uppercase */
|
||||||
myPrefix = kiwix::ucFirst(myPrefix);
|
myPrefix = kiwix::ucFirst(myPrefix);
|
||||||
this->searchSuggestions(myPrefix, suggestionsCount, false);
|
this->searchSuggestions(myPrefix, suggestionsCount, false);
|
||||||
|
@ -452,6 +516,10 @@ namespace kiwix {
|
||||||
myPrefix = kiwix::lcFirst(myPrefix);
|
myPrefix = kiwix::lcFirst(myPrefix);
|
||||||
this->searchSuggestions(myPrefix, suggestionsCount, false);
|
this->searchSuggestions(myPrefix, suggestionsCount, false);
|
||||||
|
|
||||||
|
/* Try with title words */
|
||||||
|
myPrefix = kiwix::toTitle(myPrefix);
|
||||||
|
this->searchSuggestions(myPrefix, suggestionsCount, false);
|
||||||
|
|
||||||
return retVal;
|
return retVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -460,10 +528,10 @@ namespace kiwix {
|
||||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
if (this->suggestionsOffset != this->suggestions.end()) {
|
||||||
/* title */
|
/* title */
|
||||||
title = *(this->suggestionsOffset);
|
title = *(this->suggestionsOffset);
|
||||||
|
|
||||||
/* increment the cursor for the next call */
|
/* increment the cursor for the next call */
|
||||||
this->suggestionsOffset++;
|
this->suggestionsOffset++;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -492,7 +560,7 @@ namespace kiwix {
|
||||||
unsigned int Reader::getFileSize() {
|
unsigned int Reader::getFileSize() {
|
||||||
zim::File *file = this->getZimFileHandler();
|
zim::File *file = this->getZimFileHandler();
|
||||||
zim::offset_type size = 0;
|
zim::offset_type size = 0;
|
||||||
|
|
||||||
if (file != NULL) {
|
if (file != NULL) {
|
||||||
size = file->getFilesize();
|
size = file->getFilesize();
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ using namespace std;
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
||||||
class Reader {
|
class Reader {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Reader(const string zimFilePath);
|
Reader(const string zimFilePath);
|
||||||
~Reader();
|
~Reader();
|
||||||
|
@ -58,6 +58,7 @@ namespace kiwix {
|
||||||
string getDate();
|
string getDate();
|
||||||
string getCreator();
|
string getCreator();
|
||||||
string getPublisher();
|
string getPublisher();
|
||||||
|
string getOrigId();
|
||||||
bool getFavicon(string &content, string &mimeType);
|
bool getFavicon(string &content, string &mimeType);
|
||||||
bool getPageUrlFromTitle(const string &title, string &url);
|
bool getPageUrlFromTitle(const string &title, string &url);
|
||||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||||
|
@ -69,7 +70,7 @@ namespace kiwix {
|
||||||
bool parseUrl(const string &url, char *ns, string &title);
|
bool parseUrl(const string &url, char *ns, string &title);
|
||||||
unsigned int getFileSize();
|
unsigned int getFileSize();
|
||||||
zim::File* getZimFileHandler();
|
zim::File* getZimFileHandler();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
zim::File* zimFileHandler;
|
zim::File* zimFileHandler;
|
||||||
zim::size_type firstArticleOffset;
|
zim::size_type firstArticleOffset;
|
||||||
|
@ -77,7 +78,7 @@ namespace kiwix {
|
||||||
zim::size_type currentArticleOffset;
|
zim::size_type currentArticleOffset;
|
||||||
zim::size_type nsACount;
|
zim::size_type nsACount;
|
||||||
zim::size_type nsICount;
|
zim::size_type nsICount;
|
||||||
|
|
||||||
std::vector<std::string> suggestions;
|
std::vector<std::string> suggestions;
|
||||||
std::vector<std::string>::iterator suggestionsOffset;
|
std::vector<std::string>::iterator suggestionsOffset;
|
||||||
|
|
||||||
|
|
|
@ -174,36 +174,40 @@ std::string kiwix::ucFirst (const std::string &word) {
|
||||||
if (word.empty())
|
if (word.empty())
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
std::string ucFirstWord;
|
std::string result;
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
UnicodeString unicodeWord(word.c_str());
|
||||||
ucFirstWord = word;
|
UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toUpper();
|
||||||
ucFirstWord[0] = toupper(ucFirstWord[0]);
|
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||||
#else
|
unicodeWord.toUTF8String(result);
|
||||||
UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str());
|
|
||||||
UnicodeString ucFirstLetter = firstLetter.toUpper();
|
|
||||||
ucFirstLetter.toUTF8String(ucFirstWord);
|
|
||||||
ucFirstWord += word.substr(1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return ucFirstWord;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string kiwix::lcFirst (const std::string &word) {
|
std::string kiwix::lcFirst (const std::string &word) {
|
||||||
if (word.empty())
|
if (word.empty())
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
std::string ucFirstWord;
|
std::string result;
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
UnicodeString unicodeWord(word.c_str());
|
||||||
ucFirstWord = word;
|
UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toLower();
|
||||||
ucFirstWord[0] = tolower(ucFirstWord[0]);
|
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||||
#else
|
unicodeWord.toUTF8String(result);
|
||||||
UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str());
|
|
||||||
UnicodeString ucFirstLetter = firstLetter.toLower();
|
|
||||||
ucFirstLetter.toUTF8String(ucFirstWord);
|
|
||||||
ucFirstWord += word.substr(1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return ucFirstWord;
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::string kiwix::toTitle (const std::string &word) {
|
||||||
|
if (word.empty())
|
||||||
|
return "";
|
||||||
|
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
UnicodeString unicodeWord(word.c_str());
|
||||||
|
unicodeWord = unicodeWord.toTitle(0);
|
||||||
|
unicodeWord.toUTF8String(result);
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#ifndef KIWIX_STRINGTOOLS_H
|
#ifndef KIWIX_STRINGTOOLS_H
|
||||||
#define KIWIX_STRINGTOOLS_H
|
#define KIWIX_STRINGTOOLS_H
|
||||||
|
|
||||||
#ifndef __ANDROID__
|
|
||||||
#include <unicode/translit.h>
|
#include <unicode/translit.h>
|
||||||
#include <unicode/normlzr.h>
|
#include <unicode/normlzr.h>
|
||||||
#include <unicode/unistr.h>
|
#include <unicode/unistr.h>
|
||||||
|
@ -29,7 +28,6 @@
|
||||||
#include <unicode/uniset.h>
|
#include <unicode/uniset.h>
|
||||||
#include <unicode/ustring.h>
|
#include <unicode/ustring.h>
|
||||||
#include <unicode/ucnv.h>
|
#include <unicode/ucnv.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -58,6 +56,7 @@ namespace kiwix {
|
||||||
|
|
||||||
std::string ucFirst(const std::string &word);
|
std::string ucFirst(const std::string &word);
|
||||||
std::string lcFirst(const std::string &word);
|
std::string lcFirst(const std::string &word);
|
||||||
|
std::string toTitle(const std::string &word);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue