catching up with master

This commit is contained in:
renaud gaudin 2013-12-09 12:02:58 +00:00
commit fd9c9ac17e
7 changed files with 225 additions and 140 deletions

View File

@ -67,6 +67,7 @@ namespace kiwix {
string publisher; string publisher;
string date; string date;
string url; string url;
string origId;
string articleCount; string articleCount;
string mediaCount; string mediaCount;
bool readOnly; bool readOnly;

View File

@ -56,6 +56,7 @@ namespace kiwix {
book.creator = bookNode.attribute("creator").value(); book.creator = bookNode.attribute("creator").value();
book.publisher = bookNode.attribute("publisher").value(); book.publisher = bookNode.attribute("publisher").value();
book.url = bookNode.attribute("url").value(); book.url = bookNode.attribute("url").value();
book.origId = bookNode.attribute("origId").value();
book.articleCount = bookNode.attribute("articleCount").value(); book.articleCount = bookNode.attribute("articleCount").value();
book.mediaCount = bookNode.attribute("mediaCount").value(); book.mediaCount = bookNode.attribute("mediaCount").value();
book.size = bookNode.attribute("size").value(); book.size = bookNode.attribute("size").value();
@ -154,41 +155,46 @@ namespace kiwix {
bookNode.append_attribute("indexType") = "xapian"; bookNode.append_attribute("indexType") = "xapian";
} }
if (!itr->title.empty()) if (itr->origId.empty()) {
bookNode.append_attribute("title") = itr->title.c_str(); if (!itr->title.empty())
bookNode.append_attribute("title") = itr->title.c_str();
if (!itr->description.empty())
bookNode.append_attribute("description") = itr->description.c_str();
if (!itr->language.empty())
bookNode.append_attribute("language") = itr->language.c_str();
if (!itr->creator.empty())
bookNode.append_attribute("creator") = itr->creator.c_str();
if (!itr->publisher.empty())
bookNode.append_attribute("publisher") = itr->publisher.c_str();
if (!itr->favicon.empty())
bookNode.append_attribute("favicon") = itr->favicon.c_str();
if (!itr->faviconMimeType.empty())
bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str();
}
if (itr->description != "") if (!itr->date.empty())
bookNode.append_attribute("description") = itr->description.c_str();
if (itr->language != "")
bookNode.append_attribute("language") = itr->language.c_str();
if (itr->date != "")
bookNode.append_attribute("date") = itr->date.c_str(); bookNode.append_attribute("date") = itr->date.c_str();
if (itr->creator != "") if (!itr->url.empty())
bookNode.append_attribute("creator") = itr->creator.c_str();
if (itr->publisher != "")
bookNode.append_attribute("publisher") = itr->publisher.c_str();
if (itr->url != "")
bookNode.append_attribute("url") = itr->url.c_str(); bookNode.append_attribute("url") = itr->url.c_str();
if (itr->articleCount != "") if (!itr->origId.empty())
bookNode.append_attribute("origId") = itr->origId.c_str();
if (!itr->articleCount.empty())
bookNode.append_attribute("articleCount") = itr->articleCount.c_str(); bookNode.append_attribute("articleCount") = itr->articleCount.c_str();
if (itr->mediaCount != "") if (!itr->mediaCount.empty())
bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str(); bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str();
if (itr->size != "") if (!itr->size.empty())
bookNode.append_attribute("size") = itr->size.c_str(); bookNode.append_attribute("size") = itr->size.c_str();
if (itr->favicon != "")
bookNode.append_attribute("favicon") = itr->favicon.c_str();
if (itr->faviconMimeType != "")
bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str();
} }
} }
@ -256,7 +262,7 @@ namespace kiwix {
book->creator = reader->getCreator(); book->creator = reader->getCreator();
book->publisher = reader->getPublisher(); book->publisher = reader->getPublisher();
book->title = reader->getTitle(); book->title = reader->getTitle();
book->origId = reader->getOrigId();
std::ostringstream articleCountStream; std::ostringstream articleCountStream;
articleCountStream << reader->getArticleCount(); articleCountStream << reader->getArticleCount();
book->articleCount = articleCountStream.str(); book->articleCount = articleCountStream.str();
@ -307,10 +313,12 @@ namespace kiwix {
std::map<string, bool> booksLanguagesMap; std::map<string, bool> booksLanguagesMap;
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage);
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) {
booksLanguagesMap[itr->language] = true; if (itr->origId.empty()) {
booksLanguages.push_back(itr->language); booksLanguagesMap[itr->language] = true;
booksLanguages.push_back(itr->language);
}
} }
} }
@ -323,10 +331,12 @@ namespace kiwix {
std::map<string, bool> booksCreatorsMap; std::map<string, bool> booksCreatorsMap;
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) {
booksCreatorsMap[itr->creator] = true; if (itr->origId.empty()) {
booksCreators.push_back(itr->creator); booksCreatorsMap[itr->creator] = true;
booksCreators.push_back(itr->creator);
}
} }
} }
@ -353,8 +363,10 @@ namespace kiwix {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) {
if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) {
booksPublishersMap[itr->publisher] = true; if (itr->origId.empty()) {
booksPublishers.push_back(itr->publisher); booksPublishersMap[itr->publisher] = true;
booksPublishers.push_back(itr->publisher);
}
} }
} }

View File

@ -40,7 +40,7 @@ namespace kiwix {
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER }; enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
class Manager { class Manager {
public: public:
Manager(); Manager();
~Manager(); ~Manager();
@ -55,9 +55,9 @@ namespace kiwix {
string getCurrentBookId(); string getCurrentBookId();
bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookIndex(const string id, const string path, const supportedIndexType type);
bool setBookPath(const string id, const string path); bool setBookPath(const string id, const string path);
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
const bool checkMetaData = false); const bool checkMetaData = false);
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
const bool checkMetaData = false); const bool checkMetaData = false);
Library cloneLibrary(); Library cloneLibrary();
bool getBookById(const string id, Book &book); bool getBookById(const string id, Book &book);
@ -65,7 +65,7 @@ namespace kiwix {
unsigned int getBookCount(const bool localBooks, const bool remoteBooks); unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
bool updateBookLastOpenDateById(const string id); bool updateBookLastOpenDateById(const string id);
void removeBookPaths(); void removeBookPaths();
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize,
const string language, const string creator, const string publisher, const string search); const string language, const string creator, const string publisher, const string search);
vector<string> getBooksLanguages(); vector<string> getBooksLanguages();
vector<string> getBooksCreators(); vector<string> getBooksCreators();
@ -75,10 +75,10 @@ namespace kiwix {
string writableLibraryPath; string writableLibraryPath;
vector<std::string> bookIdList; vector<std::string> bookIdList;
protected: protected:
kiwix::Library library; kiwix::Library library;
bool readBookFromPath(const string path, Book *book = NULL); bool readBookFromPath(const string path, Book *book = NULL);
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath); bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath);

View File

@ -19,6 +19,38 @@
#include "reader.h" #include "reader.h"
inline char hi(char v) {
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}
inline char lo(char v) {
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}
std::string hexUUID (std::string in) {
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 4; n < 6; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 6; n < 8; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 8; n < 10; ++n)
out << hi(in[n]) << lo(in[n]);
out << '-';
for (unsigned n = 10; n < 16; ++n)
out << hi(in[n]) << lo(in[n]);
std::string op=out.str();
return op;
}
static char charFromHex(std::string a) { static char charFromHex(std::string a) {
std::istringstream Blat (a); std::istringstream Blat (a);
int Z; int Z;
@ -28,9 +60,10 @@ static char charFromHex(std::string a) {
void unescapeUrl(string &url) { void unescapeUrl(string &url) {
std::string::size_type pos = 0; std::string::size_type pos = 0;
while ((pos = url.find('%', pos + 1)) != std::string::npos && while ((pos = url.find('%', pos)) != std::string::npos &&
pos + 3 <= url.length()) { pos + 2 < url.length()) {
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
++pos;
} }
return; return;
} }
@ -38,14 +71,14 @@ void unescapeUrl(string &url) {
namespace kiwix { namespace kiwix {
/* Constructor */ /* Constructor */
Reader::Reader(const string zimFilePath) Reader::Reader(const string zimFilePath)
: zimFileHandler(NULL) { : zimFileHandler(NULL) {
string tmpZimFilePath = zimFilePath; string tmpZimFilePath = zimFilePath;
/* Remove potential trailing zimaa */ /* Remove potential trailing zimaa */
size_t found = tmpZimFilePath.rfind("zimaa"); size_t found = tmpZimFilePath.rfind("zimaa");
if (found != string::npos && if (found != string::npos &&
tmpZimFilePath.size() > 5 && tmpZimFilePath.size() > 5 &&
found == tmpZimFilePath.size() - 5) { found == tmpZimFilePath.size() - 5) {
tmpZimFilePath.resize(tmpZimFilePath.size() - 2); tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
} }
@ -63,7 +96,7 @@ namespace kiwix {
/* initialize random seed: */ /* initialize random seed: */
srand ( time(NULL) ); srand ( time(NULL) );
} }
/* Destructor */ /* Destructor */
Reader::~Reader() { Reader::~Reader() {
if (this->zimFileHandler != NULL) { if (this->zimFileHandler != NULL) {
@ -74,7 +107,7 @@ namespace kiwix {
zim::File* Reader::getZimFileHandler() { zim::File* Reader::getZimFileHandler() {
return this->zimFileHandler; return this->zimFileHandler;
} }
/* Reset the cursor for GetNextArticle() */ /* Reset the cursor for GetNextArticle() */
void Reader::reset() { void Reader::reset() {
this->currentArticleOffset = this->firstArticleOffset; this->currentArticleOffset = this->firstArticleOffset;
@ -101,12 +134,12 @@ namespace kiwix {
return counters; return counters;
} }
/* Get the count of articles which can be indexed/displayed */ /* Get the count of articles which can be indexed/displayed */
unsigned int Reader::getArticleCount() { unsigned int Reader::getArticleCount() {
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata(); std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) { if (counterMap.empty()) {
counter = this->nsACount; counter = this->nsACount;
} else { } else {
@ -114,7 +147,7 @@ namespace kiwix {
if (it != counterMap.end()) if (it != counterMap.end())
counter = it->second; counter = it->second;
} }
return counter; return counter;
} }
@ -140,10 +173,10 @@ namespace kiwix {
if (it != counterMap.end()) if (it != counterMap.end())
counter += it->second; counter += it->second;
} }
return counter; return counter;
} }
/* Get the total of all items of a ZIM file, redirects included */ /* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() { unsigned int Reader::getGlobalCount() {
return this->zimFileHandler->getCountArticles(); return this->zimFileHandler->getCountArticles();
@ -155,7 +188,7 @@ namespace kiwix {
s << this->zimFileHandler->getFileheader().getUuid(); s << this->zimFileHandler->getFileheader().getUuid();
return s.str(); return s.str();
} }
/* Return a page url from a title */ /* Return a page url from a title */
bool Reader::getPageUrlFromTitle(const string &title, string &url) { bool Reader::getPageUrlFromTitle(const string &title, string &url) {
/* Extract the content from the zim file */ /* Extract the content from the zim file */
@ -163,7 +196,7 @@ namespace kiwix {
/* Test if the article was found */ /* Test if the article was found */
if (resultPair.first == true) { if (resultPair.first == true) {
/* Get the article */ /* Get the article */
zim::Article article = *resultPair.second; zim::Article article = *resultPair.second;
@ -172,7 +205,7 @@ namespace kiwix {
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++<42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
url = article.getLongUrl(); url = article.getLongUrl();
return true; return true;
} }
@ -182,53 +215,53 @@ namespace kiwix {
/* Return an URL from a title*/ /* Return an URL from a title*/
string Reader::getRandomPageUrl() { string Reader::getRandomPageUrl() {
zim::size_type idx = this->firstArticleOffset + zim::size_type idx = this->firstArticleOffset +
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount);
zim::Article article = zimFileHandler->getArticle(idx); zim::Article article = zimFileHandler->getArticle(idx);
return article.getLongUrl().c_str(); return article.getLongUrl().c_str();
} }
/* Return the welcome page URL */ /* Return the welcome page URL */
string Reader::getMainPageUrl() { string Reader::getMainPageUrl() {
string url = ""; string url = "";
if (this->zimFileHandler->getFileheader().hasMainPage()) { if (this->zimFileHandler->getFileheader().hasMainPage()) {
zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage());
url = article.getLongUrl(); url = article.getLongUrl();
if (url.empty()) { if (url.empty()) {
url = getFirstPageUrl(); url = getFirstPageUrl();
} }
} else { } else {
url = getFirstPageUrl(); url = getFirstPageUrl();
} }
return url; return url;
} }
bool Reader::getFavicon(string &content, string &mimeType) { bool Reader::getFavicon(string &content, string &mimeType) {
unsigned int contentLength = 0; unsigned int contentLength = 0;
this->getContentByUrl( "/-/favicon.png", content, this->getContentByUrl( "/-/favicon.png", content,
contentLength, mimeType); contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/I/favicon.png", content, this->getContentByUrl( "/I/favicon.png", content,
contentLength, mimeType); contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/I/favicon", content, this->getContentByUrl( "/I/favicon", content,
contentLength, mimeType); contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/-/favicon", content, this->getContentByUrl( "/-/favicon", content,
contentLength, mimeType); contentLength, mimeType);
} }
} }
} }
return content.empty() ? false : true; return content.empty() ? false : true;
} }
@ -236,11 +269,11 @@ namespace kiwix {
bool Reader::getMetatag(const string &name, string &value) { bool Reader::getMetatag(const string &name, string &value) {
unsigned int contentLength = 0; unsigned int contentLength = 0;
string contentType = ""; string contentType = "";
return this->getContentByUrl( "/M/" + name, value, return this->getContentByUrl( "/M/" + name, value,
contentLength, contentType); contentLength, contentType);
} }
string Reader::getTitle() { string Reader::getTitle() {
string value; string value;
this->getMetatag("Title", value); this->getMetatag("Title", value);
@ -256,7 +289,7 @@ namespace kiwix {
string Reader::getDescription() { string Reader::getDescription() {
string value; string value;
this->getMetatag("Description", value); this->getMetatag("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */ /* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) { if (value.empty()) {
this->getMetatag("Subtitle", value); this->getMetatag("Subtitle", value);
@ -289,34 +322,61 @@ namespace kiwix {
return value; return value;
} }
string Reader::getOrigId() {
string value;
this->getMetatag("startfileuid", value);
if(value.empty())
return "";
std::string id=value;
std::string origID;
std::string temp="";
unsigned int k=0;
char tempArray[16]="";
for(unsigned int i=0; i<id.size(); i++)
{
if(id[i]=='\n')
{
tempArray[k]= atoi(temp.c_str());
temp="";
k++;
}
else
{
temp+=id[i];
}
}
origID=hexUUID(tempArray);
return origID;
}
/* Return the first page URL */ /* Return the first page URL */
string Reader::getFirstPageUrl() { string Reader::getFirstPageUrl() {
string url; string url;
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
zim::Article article = zimFileHandler->getArticle(firstPageOffset); zim::Article article = zimFileHandler->getArticle(firstPageOffset);
url = article.getLongUrl(); url = article.getLongUrl();
return url; return url;
} }
bool Reader::parseUrl(const string &url, char *ns, string &title) { bool Reader::parseUrl(const string &url, char *ns, string &title) {
/* Offset to visit the url */ /* Offset to visit the url */
unsigned int urlLength = url.size(); unsigned int urlLength = url.size();
unsigned int offset = 0; unsigned int offset = 0;
/* Ignore the '/' */ /* Ignore the '/' */
while ((offset < urlLength) && (url[offset] == '/')) offset++; while ((offset < urlLength) && (url[offset] == '/')) offset++;
/* Get namespace */ /* Get namespace */
while ((offset < urlLength) && (url[offset] != '/')) { while ((offset < urlLength) && (url[offset] != '/')) {
*ns= url[offset]; *ns= url[offset];
offset++; offset++;
} }
/* Ignore the '/' */ /* Ignore the '/' */
while ((offset < urlLength) && (url[offset] == '/')) offset++; while ((offset < urlLength) && (url[offset] == '/')) offset++;
/* Get content title */ /* Get content title */
unsigned int titleOffset = offset; unsigned int titleOffset = offset;
while (offset < urlLength) { while (offset < urlLength) {
@ -338,7 +398,7 @@ namespace kiwix {
contentLength = 0; contentLength = 0;
if (this->zimFileHandler != NULL) { if (this->zimFileHandler != NULL) {
/* Parse the url */ /* Parse the url */
char ns = 0; char ns = 0;
string titleStr; string titleStr;
@ -348,68 +408,72 @@ namespace kiwix {
if (titleStr.empty() && ns == 0) { if (titleStr.empty() && ns == 0) {
this->parseUrl(this->getMainPageUrl(), &ns, titleStr); this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
} }
/* Extract the content from the zim file */ /* Extract the content from the zim file */
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr); std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
/* Test if the article was found */ /* Test if the article was found */
if (resultPair.first == true) { if (resultPair.first == true) {
/* Get the article */ /* Get the article */
zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex()); zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex());
/* If redirect */ /* If redirect */
unsigned int loopCounter = 0; unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++<42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
/* Get the content mime-type */ /* Get the content mime-type */
contentType = string(article.getMimeType().data(), article.getMimeType().size()); contentType = string(article.getMimeType().data(), article.getMimeType().size());
/* Get the data */ /* Get the data */
content = string(article.getData().data(), article.getArticleSize()); content = string(article.getData().data(), article.getArticleSize());
/* Try to set a stub HTML header/footer if necesssary */ /* Try to set a stub HTML header/footer if necesssary */
if (contentType == "text/html" && std::string::npos == content.find("<body>")) { if (contentType == "text/html" && std::string::npos == content.find("<body>")) {
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>"; content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
} }
/* Get the data length */ /* Get the data length */
contentLength = article.getArticleSize(); contentLength = article.getArticleSize();
/* Set return value */ /* Set return value */
retVal = true; retVal = true;
} }
} }
return retVal; return retVal;
} }
/* Search titles by prefix */ /* Search titles by prefix */
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) {
bool retVal = false; bool retVal = false;
zim::File::const_iterator articleItr; zim::File::const_iterator articleItr;
std::vector<std::string>::iterator suggestionItr; std::vector<std::string>::iterator suggestionItr;
int result; int result;
/* Reset the suggestions */ /* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */
if (reset) { if (reset) {
this->suggestions.clear(); this->suggestions.clear();
} else {
if (this->suggestions.size() > suggestionsCount) {
return false;
}
} }
if (prefix.size()) { if (prefix.size()) {
for (articleItr = zimFileHandler->findByTitle('A', prefix); for (articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end() && articleItr != zimFileHandler->end() &&
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 &&
this->suggestions.size() < suggestionsCount ; this->suggestions.size() < suggestionsCount ;
++articleItr) { ++articleItr) {
if (this->suggestions.size() == 0) { if (this->suggestions.size() == 0) {
this->suggestions.push_back(articleItr->getTitle()); this->suggestions.push_back(articleItr->getTitle());
} else { } else if (this->suggestions.size() < suggestionsCount) {
for (suggestionItr = this->suggestions.begin() ; for (suggestionItr = this->suggestions.begin() ;
suggestionItr != this->suggestions.end(); suggestionItr != this->suggestions.end();
++suggestionItr) { ++suggestionItr) {
result = articleItr->getTitle().compare(*suggestionItr); result = articleItr->getTitle().compare(*suggestionItr);
@ -425,25 +489,25 @@ namespace kiwix {
this->suggestions.push_back(articleItr->getTitle()); this->suggestions.push_back(articleItr->getTitle());
} }
} }
/* Suggestions where found */ /* Suggestions where found */
retVal = true; retVal = true;
} }
} }
/* Set the cursor to the begining */ /* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin(); this->suggestionsOffset = this->suggestions.begin();
return retVal; return retVal;
} }
/* Try also a few variations of the prefix to have better results */ /* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) { bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) {
std::string myPrefix = prefix; std::string myPrefix = prefix;
/* Normal suggestion request */ /* Normal suggestion request */
bool retVal = this->searchSuggestions(prefix, suggestionsCount, true); bool retVal = this->searchSuggestions(prefix, suggestionsCount, true);
/* Try with first letter uppercase */ /* Try with first letter uppercase */
myPrefix = kiwix::ucFirst(myPrefix); myPrefix = kiwix::ucFirst(myPrefix);
this->searchSuggestions(myPrefix, suggestionsCount, false); this->searchSuggestions(myPrefix, suggestionsCount, false);
@ -452,6 +516,10 @@ namespace kiwix {
myPrefix = kiwix::lcFirst(myPrefix); myPrefix = kiwix::lcFirst(myPrefix);
this->searchSuggestions(myPrefix, suggestionsCount, false); this->searchSuggestions(myPrefix, suggestionsCount, false);
/* Try with title words */
myPrefix = kiwix::toTitle(myPrefix);
this->searchSuggestions(myPrefix, suggestionsCount, false);
return retVal; return retVal;
} }
@ -460,10 +528,10 @@ namespace kiwix {
if (this->suggestionsOffset != this->suggestions.end()) { if (this->suggestionsOffset != this->suggestions.end()) {
/* title */ /* title */
title = *(this->suggestionsOffset); title = *(this->suggestionsOffset);
/* increment the cursor for the next call */ /* increment the cursor for the next call */
this->suggestionsOffset++; this->suggestionsOffset++;
return true; return true;
} }
@ -492,7 +560,7 @@ namespace kiwix {
unsigned int Reader::getFileSize() { unsigned int Reader::getFileSize() {
zim::File *file = this->getZimFileHandler(); zim::File *file = this->getZimFileHandler();
zim::offset_type size = 0; zim::offset_type size = 0;
if (file != NULL) { if (file != NULL) {
size = file->getFilesize(); size = file->getFilesize();
} }

View File

@ -38,7 +38,7 @@ using namespace std;
namespace kiwix { namespace kiwix {
class Reader { class Reader {
public: public:
Reader(const string zimFilePath); Reader(const string zimFilePath);
~Reader(); ~Reader();
@ -58,6 +58,7 @@ namespace kiwix {
string getDate(); string getDate();
string getCreator(); string getCreator();
string getPublisher(); string getPublisher();
string getOrigId();
bool getFavicon(string &content, string &mimeType); bool getFavicon(string &content, string &mimeType);
bool getPageUrlFromTitle(const string &title, string &url); bool getPageUrlFromTitle(const string &title, string &url);
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
@ -69,7 +70,7 @@ namespace kiwix {
bool parseUrl(const string &url, char *ns, string &title); bool parseUrl(const string &url, char *ns, string &title);
unsigned int getFileSize(); unsigned int getFileSize();
zim::File* getZimFileHandler(); zim::File* getZimFileHandler();
protected: protected:
zim::File* zimFileHandler; zim::File* zimFileHandler;
zim::size_type firstArticleOffset; zim::size_type firstArticleOffset;
@ -77,7 +78,7 @@ namespace kiwix {
zim::size_type currentArticleOffset; zim::size_type currentArticleOffset;
zim::size_type nsACount; zim::size_type nsACount;
zim::size_type nsICount; zim::size_type nsICount;
std::vector<std::string> suggestions; std::vector<std::string> suggestions;
std::vector<std::string>::iterator suggestionsOffset; std::vector<std::string>::iterator suggestionsOffset;

View File

@ -174,36 +174,40 @@ std::string kiwix::ucFirst (const std::string &word) {
if (word.empty()) if (word.empty())
return ""; return "";
std::string ucFirstWord; std::string result;
#ifdef __ANDROID__ UnicodeString unicodeWord(word.c_str());
ucFirstWord = word; UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toUpper();
ucFirstWord[0] = toupper(ucFirstWord[0]); unicodeWord.replace(0, 1, unicodeFirstLetter);
#else unicodeWord.toUTF8String(result);
UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str());
UnicodeString ucFirstLetter = firstLetter.toUpper();
ucFirstLetter.toUTF8String(ucFirstWord);
ucFirstWord += word.substr(1);
#endif
return ucFirstWord; return result;
} }
std::string kiwix::lcFirst (const std::string &word) { std::string kiwix::lcFirst (const std::string &word) {
if (word.empty()) if (word.empty())
return ""; return "";
std::string ucFirstWord; std::string result;
#ifdef __ANDROID__ UnicodeString unicodeWord(word.c_str());
ucFirstWord = word; UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toLower();
ucFirstWord[0] = tolower(ucFirstWord[0]); unicodeWord.replace(0, 1, unicodeFirstLetter);
#else unicodeWord.toUTF8String(result);
UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str());
UnicodeString ucFirstLetter = firstLetter.toLower();
ucFirstLetter.toUTF8String(ucFirstWord);
ucFirstWord += word.substr(1);
#endif
return ucFirstWord; return result;
}
std::string kiwix::toTitle (const std::string &word) {
if (word.empty())
return "";
std::string result;
UnicodeString unicodeWord(word.c_str());
unicodeWord = unicodeWord.toTitle(0);
unicodeWord.toUTF8String(result);
return result;
} }

View File

@ -20,7 +20,6 @@
#ifndef KIWIX_STRINGTOOLS_H #ifndef KIWIX_STRINGTOOLS_H
#define KIWIX_STRINGTOOLS_H #define KIWIX_STRINGTOOLS_H
#ifndef __ANDROID__
#include <unicode/translit.h> #include <unicode/translit.h>
#include <unicode/normlzr.h> #include <unicode/normlzr.h>
#include <unicode/unistr.h> #include <unicode/unistr.h>
@ -29,7 +28,6 @@
#include <unicode/uniset.h> #include <unicode/uniset.h>
#include <unicode/ustring.h> #include <unicode/ustring.h>
#include <unicode/ucnv.h> #include <unicode/ucnv.h>
#endif
#include <iostream> #include <iostream>
#include <vector> #include <vector>
@ -58,6 +56,7 @@ namespace kiwix {
std::string ucFirst(const std::string &word); std::string ucFirst(const std::string &word);
std::string lcFirst(const std::string &word); std::string lcFirst(const std::string &word);
std::string toTitle(const std::string &word);
} }
#endif #endif