From 7fd2dce1fa00cd9d50c5163cb572bc968a956e0b Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Fri, 6 Sep 2013 04:09:35 +0530 Subject: [PATCH 01/15] Modified Kiwix-manage to incorporate origID parameter for diff files in the kiwix library. Book class and Manager class modified. -Kiran --- src/common/kiwix/library.h | 3 +- src/common/kiwix/manager.cpp | 113 ++++++++++++++++++----------------- src/common/kiwix/manager.h | 12 ++-- 3 files changed, 67 insertions(+), 61 deletions(-) diff --git a/src/common/kiwix/library.h b/src/common/kiwix/library.h index 130890a8d..5631fe88d 100644 --- a/src/common/kiwix/library.h +++ b/src/common/kiwix/library.h @@ -67,6 +67,7 @@ namespace kiwix { string publisher; string date; string url; + string origID; string articleCount; string mediaCount; bool readOnly; @@ -76,7 +77,7 @@ namespace kiwix { }; class Library { - + public: Library(); ~Library(); diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index abf2d2868..1cd59ce97 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -25,19 +25,19 @@ namespace kiwix { Manager::Manager() : writableLibraryPath("") { } - + /* Destructor */ Manager::~Manager() { } bool Manager::parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath) { pugi::xml_node libraryNode = doc.child("library"); - + if (strlen(libraryNode.attribute("current").value())) this->setCurrentBookId(libraryNode.attribute("current").value()); string libraryVersion = libraryNode.attribute("version").value(); - + for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode; bookNode = bookNode.next_sibling("book")) { bool ok = true; kiwix::Book book; @@ -45,7 +45,7 @@ namespace kiwix { book.readOnly = readOnly; book.id = bookNode.attribute("id").value(); book.path = bookNode.attribute("path").value(); - book.last = (std::string(bookNode.attribute("last").value()) != "undefined" ? + book.last = (std::string(bookNode.attribute("last").value()) != "undefined" ? bookNode.attribute("last").value() : ""); book.indexPath = bookNode.attribute("indexPath").value(); book.indexType = (std::string(bookNode.attribute("indexType").value()) == "xapian" ? XAPIAN : CLUCENE); @@ -56,14 +56,15 @@ namespace kiwix { book.creator = bookNode.attribute("creator").value(); book.publisher = bookNode.attribute("publisher").value(); book.url = bookNode.attribute("url").value(); + book.origID = bookNode.attribute("origId").value(); book.articleCount = bookNode.attribute("articleCount").value(); book.mediaCount = bookNode.attribute("mediaCount").value(); book.size = bookNode.attribute("size").value(); book.favicon = bookNode.attribute("favicon").value(); book.faviconMimeType = bookNode.attribute("faviconMimeType").value(); - + /* Check absolute and relative paths */ - this->checkAndCleanBookPaths(book, libraryPath); + this->checkAndCleanBookPaths(book, libraryPath); /* Update the book properties with the new importer */ if (libraryVersion.empty() || atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) { @@ -76,7 +77,7 @@ namespace kiwix { library.addBook(book); } } - + return true; } @@ -128,7 +129,7 @@ namespace kiwix { if (!library.version.empty()) libraryNode.append_attribute("version") = library.version.c_str(); - + /* Add each book */ std::vector::iterator itr; for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { @@ -141,11 +142,11 @@ namespace kiwix { if (!itr->path.empty()) bookNode.append_attribute("path") = itr->path.c_str(); - + if (!itr->last.empty() && itr->last != "undefined") { bookNode.append_attribute("last") = itr->last.c_str(); } - + if (!itr->indexPath.empty()) bookNode.append_attribute("indexPath") = itr->indexPath.c_str(); @@ -155,31 +156,34 @@ namespace kiwix { else if (itr->indexType == CLUCENE) bookNode.append_attribute("indexType") = "clucene"; } - + if (!itr->title.empty()) bookNode.append_attribute("title") = itr->title.c_str(); - + if (itr->description != "") bookNode.append_attribute("description") = itr->description.c_str(); - + if (itr->language != "") bookNode.append_attribute("language") = itr->language.c_str(); - + if (itr->date != "") bookNode.append_attribute("date") = itr->date.c_str(); - + if (itr->creator != "") bookNode.append_attribute("creator") = itr->creator.c_str(); if (itr->publisher != "") bookNode.append_attribute("publisher") = itr->publisher.c_str(); - + if (itr->url != "") bookNode.append_attribute("url") = itr->url.c_str(); - + + if (itr->origID != "") + bookNode.append_attribute("origId") = itr->origID.c_str(); + if (itr->articleCount != "") bookNode.append_attribute("articleCount") = itr->articleCount.c_str(); - + if (itr->mediaCount != "") bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str(); @@ -211,13 +215,13 @@ namespace kiwix { } string Manager::getCurrentBookId() { - return library.current.empty() ? + return library.current.empty() ? "" : library.current.top(); } /* Add a book to the library. Return empty string if failed, book id otherwise */ - string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, - const string url, const bool checkMetaData) { + string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, + const string url, const string origId, const bool checkMetaData) { kiwix::Book book; if (this->readBookFromPath(pathToOpen, &book)) { @@ -228,9 +232,10 @@ namespace kiwix { computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), pathToSave) : pathToSave; } - if (!checkMetaData || + if (!checkMetaData || (checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) { book.url = url; + book.origID=origId; library.addBook(book); return book.id; } @@ -238,16 +243,16 @@ namespace kiwix { return ""; } - + /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ - bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) { - return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty()); + bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const string origId, const bool checkMetaData) { + return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, origId, checkMetaData).empty()); } bool Manager::readBookFromPath(const string path, kiwix::Book *book) { try { kiwix::Reader *reader = new kiwix::Reader(path); - + if (book != NULL) { book->path = path; book->pathAbsolute = path; @@ -258,18 +263,18 @@ namespace kiwix { book->creator = reader->getCreator(); book->publisher = reader->getPublisher(); book->title = reader->getTitle(); - + std::ostringstream articleCountStream; articleCountStream << reader->getArticleCount(); book->articleCount = articleCountStream.str(); - + std::ostringstream mediaCountStream; mediaCountStream << reader->getMediaCount(); book->mediaCount = mediaCountStream.str(); - + ostringstream convert; convert << reader->getFileSize(); book->size = convert.str(); - + string favicon; string faviconMimeType; if (reader->getFavicon(favicon, faviconMimeType)) { @@ -277,7 +282,7 @@ namespace kiwix { book->faviconMimeType = faviconMimeType; } } - + delete reader; } catch (const std::exception& e) { std::cerr << e.what() << std::endl; @@ -294,7 +299,7 @@ namespace kiwix { bool Manager::removeBookById(const string id) { unsigned int bookIndex = 0; std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { return this->library.removeBookByIndex(bookIndex); } @@ -308,14 +313,14 @@ namespace kiwix { std::vector::iterator itr; std::map booksLanguagesMap; - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); + std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { booksLanguagesMap[itr->language] = true; booksLanguages.push_back(itr->language); } } - + return booksLanguages; } @@ -324,14 +329,14 @@ namespace kiwix { std::vector::iterator itr; std::map booksCreatorsMap; - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); + std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { booksCreatorsMap[itr->creator] = true; booksCreators.push_back(itr->creator); } } - + return booksCreators; } @@ -343,7 +348,7 @@ namespace kiwix { for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { booksIds.push_back(itr->id); } - + return booksIds; } @@ -352,14 +357,14 @@ namespace kiwix { std::vector::iterator itr; std::map booksPublishersMap; - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); + std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { booksPublishersMap[itr->publisher] = true; booksPublishers.push_back(itr->publisher); } } - + return booksPublishers; } @@ -379,7 +384,7 @@ namespace kiwix { bool Manager::getBookById(const string id, Book &book) { std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { book = *itr; return true; @@ -390,7 +395,7 @@ namespace kiwix { bool Manager::updateBookLastOpenDateById(const string id) { std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { char unixdate[12]; sprintf (unixdate, "%d", (int)time(NULL)); @@ -404,7 +409,7 @@ namespace kiwix { bool Manager::setBookIndex(const string id, const string path, const supportedIndexType type) { std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { itr->indexPath = path; itr->indexPathAbsolute = isRelativePath(path) ? @@ -419,7 +424,7 @@ namespace kiwix { bool Manager::setBookPath(const string id, const string path) { std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { itr->path = path; itr->pathAbsolute = isRelativePath(path) ? @@ -433,7 +438,7 @@ namespace kiwix { void Manager::removeBookPaths() { std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { itr->path = ""; itr->pathAbsolute = ""; } @@ -449,7 +454,7 @@ namespace kiwix { return result; } - bool Manager::listBooks(const supportedListMode mode, const supportedListSortBy sortBy, + bool Manager::listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, const string language, const string creator, const string publisher, const string search) { this->bookIdList.clear(); @@ -457,7 +462,7 @@ namespace kiwix { /* Sort */ if (sortBy == TITLE) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByTitle); + std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByTitle); } else if (sortBy == SIZE) { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortBySize); } else if (sortBy == DATE) { @@ -467,7 +472,7 @@ namespace kiwix { } else if (sortBy == PUBLISHER) { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); } - + /* Special sort for LASTOPEN */ if (mode == LASTOPEN) { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLastOpen); @@ -482,9 +487,9 @@ namespace kiwix { if (mode == LOCAL && itr->path.empty()) ok = false; - + if (ok == true && mode == REMOTE && (!itr->path.empty() || itr->url.empty())) - ok = false; + ok = false; if (ok == true && maxSize != 0 && (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024) ok = false; @@ -497,7 +502,7 @@ namespace kiwix { if (ok == true && !publisher.empty() && itr->publisher != publisher) ok = false; - + if ((ok == true && !search.empty()) && !(matchRegex(itr->title, search) || matchRegex(itr->description, search))) ok = false; @@ -506,7 +511,7 @@ namespace kiwix { } } } - + return true; } @@ -519,14 +524,14 @@ namespace kiwix { book.path = computeRelativePath(removeLastPathElement(libraryPath, true, false), book.pathAbsolute); } } - + if (!book.indexPath.empty()) { if (isRelativePath(book.indexPath)) { - book.indexPathAbsolute = + book.indexPathAbsolute = computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.indexPath); } else { book.indexPathAbsolute = book.indexPath; - book.indexPath = + book.indexPath = computeRelativePath(removeLastPathElement(libraryPath, true, false), book.indexPathAbsolute); } } diff --git a/src/common/kiwix/manager.h b/src/common/kiwix/manager.h index 6253eece6..063fca0bd 100644 --- a/src/common/kiwix/manager.h +++ b/src/common/kiwix/manager.h @@ -40,7 +40,7 @@ namespace kiwix { enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER }; class Manager { - + public: Manager(); ~Manager(); @@ -55,9 +55,9 @@ namespace kiwix { string getCurrentBookId(); bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookPath(const string id, const string path); - string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", + string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", const string origID="", const bool checkMetaData = false); - bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", + bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", const string origID="", const bool checkMetaData = false); Library cloneLibrary(); bool getBookById(const string id, Book &book); @@ -65,7 +65,7 @@ namespace kiwix { unsigned int getBookCount(const bool localBooks, const bool remoteBooks); bool updateBookLastOpenDateById(const string id); void removeBookPaths(); - bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, + bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, const string language, const string creator, const string publisher, const string search); vector getBooksLanguages(); vector getBooksCreators(); @@ -75,10 +75,10 @@ namespace kiwix { string writableLibraryPath; vector bookIdList; - + protected: kiwix::Library library; - + bool readBookFromPath(const string path, Book *book = NULL); bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath); From 7fbf20936ec0e728683f0b507fa6755374085fa2 Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Fri, 13 Sep 2013 14:59:44 +0530 Subject: [PATCH 02/15] Updated kiwix::manager class to prevent it from returning a diff file as a normal book. --- src/common/kiwix/manager.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index 1cd59ce97..f988b8655 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -316,8 +316,10 @@ namespace kiwix { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { + if(itr->origID=="") { booksLanguagesMap[itr->language] = true; booksLanguages.push_back(itr->language); + } } } @@ -332,8 +334,10 @@ namespace kiwix { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { + if(itr->origID=="") { booksCreatorsMap[itr->creator] = true; booksCreators.push_back(itr->creator); + } } } @@ -346,7 +350,9 @@ namespace kiwix { std::vector::iterator itr; for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + if(itr->origID=="") { booksIds.push_back(itr->id); + } } return booksIds; @@ -360,8 +366,10 @@ namespace kiwix { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { + if(itr->origID=="") { booksPublishersMap[itr->publisher] = true; booksPublishers.push_back(itr->publisher); + } } } @@ -386,8 +394,10 @@ namespace kiwix { std::vector::iterator itr; for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { + if(itr->origID=="") { book = *itr; return true; + } } } return false; From c22b5a1d4cb8302f179947246a6c08b9c72fffeb Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Wed, 18 Sep 2013 15:55:29 +0530 Subject: [PATCH 03/15] Rmoved title, favicon, faviconmimetype,description,author, publisher, date, language parameters from library file for diff files. --- src/common/kiwix/manager.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index f988b8655..ac311c423 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -157,7 +157,9 @@ namespace kiwix { bookNode.append_attribute("indexType") = "clucene"; } - if (!itr->title.empty()) + if (itr->origID == "") + { + if (!itr->title.empty()) bookNode.append_attribute("title") = itr->title.c_str(); if (itr->description != "") @@ -175,6 +177,14 @@ namespace kiwix { if (itr->publisher != "") bookNode.append_attribute("publisher") = itr->publisher.c_str(); + if (itr->favicon != "") + bookNode.append_attribute("favicon") = itr->favicon.c_str(); + + if (itr->faviconMimeType != "") + bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); + + } + if (itr->url != "") bookNode.append_attribute("url") = itr->url.c_str(); @@ -189,12 +199,6 @@ namespace kiwix { if (itr->size != "") bookNode.append_attribute("size") = itr->size.c_str(); - - if (itr->favicon != "") - bookNode.append_attribute("favicon") = itr->favicon.c_str(); - - if (itr->faviconMimeType != "") - bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); } } From 2f899266163df5a68a561f3a40b3b75628b0c152 Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Fri, 20 Sep 2013 02:52:29 +0530 Subject: [PATCH 04/15] Updated Kiwix::Manager class to accept origID as the last optional parameter, so as not to break existing tools which use Kiwix::Manager --- src/common/kiwix/manager.cpp | 6 +++--- src/common/kiwix/manager.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index ac311c423..b4f017954 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -225,7 +225,7 @@ namespace kiwix { /* Add a book to the library. Return empty string if failed, book id otherwise */ string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, - const string url, const string origId, const bool checkMetaData) { + const string url, const bool checkMetaData, const string origId) { kiwix::Book book; if (this->readBookFromPath(pathToOpen, &book)) { @@ -249,8 +249,8 @@ namespace kiwix { } /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ - bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const string origId, const bool checkMetaData) { - return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, origId, checkMetaData).empty()); + bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData, const string origId) { + return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData, origId).empty()); } bool Manager::readBookFromPath(const string path, kiwix::Book *book) { diff --git a/src/common/kiwix/manager.h b/src/common/kiwix/manager.h index 063fca0bd..dc2544866 100644 --- a/src/common/kiwix/manager.h +++ b/src/common/kiwix/manager.h @@ -55,10 +55,10 @@ namespace kiwix { string getCurrentBookId(); bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookPath(const string id, const string path); - string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", const string origID="", - const bool checkMetaData = false); - bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", const string origID="", - const bool checkMetaData = false); + string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", + const bool checkMetaData = false, const string origID=""); + bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", + const bool checkMetaData = false, const string origID=""); Library cloneLibrary(); bool getBookById(const string id, Book &book); bool getCurrentBook(Book &book); From 14a4394f6d8f4cb5d40793ac1be80459e9b89fdd Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Fri, 20 Sep 2013 18:35:20 +0530 Subject: [PATCH 05/15] Permanant fix for bug651. Kiwix manager class is back to original state, additional function for obtaining origID has been added to Kiwix::Reader class. Kiwix-manage and Kiwix-serve back to normal. --- src/common/kiwix/manager.cpp | 9 +- src/common/kiwix/manager.h | 4 +- src/common/kiwix/reader.cpp | 197 +++++++++++++++++++++++------------ src/common/kiwix/reader.h | 7 +- 4 files changed, 138 insertions(+), 79 deletions(-) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index b4f017954..e0da1e423 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -225,7 +225,7 @@ namespace kiwix { /* Add a book to the library. Return empty string if failed, book id otherwise */ string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, - const string url, const bool checkMetaData, const string origId) { + const string url, const bool checkMetaData) { kiwix::Book book; if (this->readBookFromPath(pathToOpen, &book)) { @@ -239,7 +239,6 @@ namespace kiwix { if (!checkMetaData || (checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) { book.url = url; - book.origID=origId; library.addBook(book); return book.id; } @@ -249,8 +248,8 @@ namespace kiwix { } /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ - bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData, const string origId) { - return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData, origId).empty()); + bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) { + return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty()); } bool Manager::readBookFromPath(const string path, kiwix::Book *book) { @@ -267,7 +266,7 @@ namespace kiwix { book->creator = reader->getCreator(); book->publisher = reader->getPublisher(); book->title = reader->getTitle(); - + book->origID=reader->getOrigID(); std::ostringstream articleCountStream; articleCountStream << reader->getArticleCount(); book->articleCount = articleCountStream.str(); diff --git a/src/common/kiwix/manager.h b/src/common/kiwix/manager.h index dc2544866..6190a1c65 100644 --- a/src/common/kiwix/manager.h +++ b/src/common/kiwix/manager.h @@ -56,9 +56,9 @@ namespace kiwix { bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookPath(const string id, const string path); string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", - const bool checkMetaData = false, const string origID=""); + const bool checkMetaData = false); bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", - const bool checkMetaData = false, const string origID=""); + const bool checkMetaData = false); Library cloneLibrary(); bool getBookById(const string id, Book &book); bool getCurrentBook(Book &book); diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 2a928c080..c355994ef 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -19,6 +19,38 @@ #include "reader.h" +inline char hi(char v) { + char hex[] = "0123456789abcdef"; + return hex[(v >> 4) & 0xf]; +} + +inline char lo(char v) { + char hex[] = "0123456789abcdef"; + return hex[v & 0xf]; +} + +std::string hexUUID (std::string in) { + std::ostringstream out; + for (unsigned n = 0; n < 4; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 4; n < 6; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 6; n < 8; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 8; n < 10; ++n) + out << hi(in[n]) << lo(in[n]); + out << '-'; + for (unsigned n = 10; n < 16; ++n) + out << hi(in[n]) << lo(in[n]); + std::string op=out.str(); + return op; +} + + + static char charFromHex(std::string a) { std::istringstream Blat (a); int Z; @@ -28,7 +60,7 @@ static char charFromHex(std::string a) { void unescapeUrl(string &url) { std::string::size_type pos = 0; - while ((pos = url.find('%', pos + 1)) != std::string::npos && + while ((pos = url.find('%', pos + 1)) != std::string::npos && pos + 3 <= url.length()) { url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); } @@ -38,14 +70,14 @@ void unescapeUrl(string &url) { namespace kiwix { /* Constructor */ - Reader::Reader(const string zimFilePath) + Reader::Reader(const string zimFilePath) : zimFileHandler(NULL) { string tmpZimFilePath = zimFilePath; /* Remove potential trailing zimaa */ size_t found = tmpZimFilePath.rfind("zimaa"); - if (found != string::npos && - tmpZimFilePath.size() > 5 && + if (found != string::npos && + tmpZimFilePath.size() > 5 && found == tmpZimFilePath.size() - 5) { tmpZimFilePath.resize(tmpZimFilePath.size() - 2); } @@ -63,7 +95,7 @@ namespace kiwix { /* initialize random seed: */ srand ( time(NULL) ); } - + /* Destructor */ Reader::~Reader() { if (this->zimFileHandler != NULL) { @@ -74,7 +106,7 @@ namespace kiwix { zim::File* Reader::getZimFileHandler() { return this->zimFileHandler; } - + /* Reset the cursor for GetNextArticle() */ void Reader::reset() { this->currentArticleOffset = this->firstArticleOffset; @@ -101,12 +133,12 @@ namespace kiwix { return counters; } - + /* Get the count of articles which can be indexed/displayed */ unsigned int Reader::getArticleCount() { std::map counterMap = this->parseCounterMetadata(); unsigned int counter = 0; - + if (counterMap.empty()) { counter = this->nsACount; } else { @@ -114,7 +146,7 @@ namespace kiwix { if (it != counterMap.end()) counter = it->second; } - + return counter; } @@ -140,10 +172,10 @@ namespace kiwix { if (it != counterMap.end()) counter += it->second; } - + return counter; } - + /* Get the total of all items of a ZIM file, redirects included */ unsigned int Reader::getGlobalCount() { return this->zimFileHandler->getCountArticles(); @@ -155,7 +187,7 @@ namespace kiwix { s << this->zimFileHandler->getFileheader().getUuid(); return s.str(); } - + /* Return a page url from a title */ bool Reader::getPageUrlFromTitle(const string &title, string &url) { /* Extract the content from the zim file */ @@ -163,7 +195,7 @@ namespace kiwix { /* Test if the article was found */ if (resultPair.first == true) { - + /* Get the article */ zim::Article article = *resultPair.second; @@ -172,7 +204,7 @@ namespace kiwix { while (article.isRedirect() && loopCounter++<42) { article = article.getRedirectArticle(); } - + url = article.getLongUrl(); return true; } @@ -182,53 +214,53 @@ namespace kiwix { /* Return an URL from a title*/ string Reader::getRandomPageUrl() { - zim::size_type idx = this->firstArticleOffset + - (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); + zim::size_type idx = this->firstArticleOffset + + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); zim::Article article = zimFileHandler->getArticle(idx); return article.getLongUrl().c_str(); } - + /* Return the welcome page URL */ string Reader::getMainPageUrl() { string url = ""; - + if (this->zimFileHandler->getFileheader().hasMainPage()) { zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); url = article.getLongUrl(); if (url.empty()) { - url = getFirstPageUrl(); + url = getFirstPageUrl(); } } else { - url = getFirstPageUrl(); + url = getFirstPageUrl(); } - + return url; } - + bool Reader::getFavicon(string &content, string &mimeType) { unsigned int contentLength = 0; - - this->getContentByUrl( "/-/favicon.png", content, + + this->getContentByUrl( "/-/favicon.png", content, contentLength, mimeType); - + if (content.empty()) { - this->getContentByUrl( "/I/favicon.png", content, + this->getContentByUrl( "/I/favicon.png", content, contentLength, mimeType); if (content.empty()) { - this->getContentByUrl( "/I/favicon", content, + this->getContentByUrl( "/I/favicon", content, contentLength, mimeType); - + if (content.empty()) { - this->getContentByUrl( "/-/favicon", content, + this->getContentByUrl( "/-/favicon", content, contentLength, mimeType); } } } - + return content.empty() ? false : true; } @@ -236,11 +268,11 @@ namespace kiwix { bool Reader::getMetatag(const string &name, string &value) { unsigned int contentLength = 0; string contentType = ""; - - return this->getContentByUrl( "/M/" + name, value, + + return this->getContentByUrl( "/M/" + name, value, contentLength, contentType); } - + string Reader::getTitle() { string value; this->getMetatag("Title", value); @@ -256,7 +288,7 @@ namespace kiwix { string Reader::getDescription() { string value; this->getMetatag("Description", value); - + /* Mediawiki Collection tends to use the "Subtitle" name */ if (value.empty()) { this->getMetatag("Subtitle", value); @@ -289,34 +321,61 @@ namespace kiwix { return value; } + string Reader::getOrigID() { + string value; + this->getMetatag("startfileuid", value); + if(value.empty()) + return ""; + std::string id=value; + std::string origID; + std::string temp=""; + unsigned int k=0; + char tempArray[16]=""; + for(unsigned int i=0; igetNamespaceBeginOffset('A'); zim::Article article = zimFileHandler->getArticle(firstPageOffset); url = article.getLongUrl(); - + return url; } - + bool Reader::parseUrl(const string &url, char *ns, string &title) { /* Offset to visit the url */ unsigned int urlLength = url.size(); unsigned int offset = 0; - + /* Ignore the '/' */ while ((offset < urlLength) && (url[offset] == '/')) offset++; - + /* Get namespace */ while ((offset < urlLength) && (url[offset] != '/')) { *ns= url[offset]; offset++; } - + /* Ignore the '/' */ - while ((offset < urlLength) && (url[offset] == '/')) offset++; - + while ((offset < urlLength) && (url[offset] == '/')) offset++; + /* Get content title */ unsigned int titleOffset = offset; while (offset < urlLength) { @@ -338,7 +397,7 @@ namespace kiwix { contentLength = 0; if (this->zimFileHandler != NULL) { - + /* Parse the url */ char ns = 0; string titleStr; @@ -348,48 +407,48 @@ namespace kiwix { if (titleStr.empty() && ns == 0) { this->parseUrl(this->getMainPageUrl(), &ns, titleStr); } - + /* Extract the content from the zim file */ std::pair resultPair = zimFileHandler->findx(ns, titleStr); - + /* Test if the article was found */ if (resultPair.first == true) { - + /* Get the article */ zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex()); - + /* If redirect */ unsigned int loopCounter = 0; while (article.isRedirect() && loopCounter++<42) { article = article.getRedirectArticle(); } - + /* Get the content mime-type */ - contentType = string(article.getMimeType().data(), article.getMimeType().size()); - + contentType = string(article.getMimeType().data(), article.getMimeType().size()); + /* Get the data */ content = string(article.getData().data(), article.getArticleSize()); - + /* Try to set a stub HTML header/footer if necesssary */ if (contentType == "text/html" && std::string::npos == content.find("")) { content = "" + article.getTitle() + "" + content + ""; } - + /* Get the data length */ contentLength = article.getArticleSize(); - + /* Set return value */ retVal = true; } } - + return retVal; } - + /* Search titles by prefix */ bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool retVal = false; - zim::File::const_iterator articleItr; + zim::File::const_iterator articleItr; std::vector::iterator suggestionItr; int result; @@ -400,16 +459,16 @@ namespace kiwix { if (prefix.size()) { for (articleItr = zimFileHandler->findByTitle('A', prefix); - articleItr != zimFileHandler->end() && - articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && - this->suggestions.size() < suggestionsCount ; + articleItr != zimFileHandler->end() && + articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && + this->suggestions.size() < suggestionsCount ; ++articleItr) { if (this->suggestions.size() == 0) { this->suggestions.push_back(articleItr->getTitle()); } else { - for (suggestionItr = this->suggestions.begin() ; - suggestionItr != this->suggestions.end(); + for (suggestionItr = this->suggestions.begin() ; + suggestionItr != this->suggestions.end(); ++suggestionItr) { result = articleItr->getTitle().compare(*suggestionItr); @@ -425,25 +484,25 @@ namespace kiwix { this->suggestions.push_back(articleItr->getTitle()); } } - + /* Suggestions where found */ retVal = true; } } - + /* Set the cursor to the begining */ this->suggestionsOffset = this->suggestions.begin(); - + return retVal; } - + /* Try also a few variations of the prefix to have better results */ bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) { std::string myPrefix = prefix; /* Normal suggestion request */ bool retVal = this->searchSuggestions(prefix, suggestionsCount, true); - + /* Try with first letter uppercase */ myPrefix = kiwix::ucFirst(myPrefix); this->searchSuggestions(myPrefix, suggestionsCount, false); @@ -460,10 +519,10 @@ namespace kiwix { if (this->suggestionsOffset != this->suggestions.end()) { /* title */ title = *(this->suggestionsOffset); - + /* increment the cursor for the next call */ this->suggestionsOffset++; - + return true; } @@ -492,7 +551,7 @@ namespace kiwix { unsigned int Reader::getFileSize() { zim::File *file = this->getZimFileHandler(); zim::offset_type size = 0; - + if (file != NULL) { size = file->getFilesize(); } diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index dec5c8be7..5325e5be0 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -38,7 +38,7 @@ using namespace std; namespace kiwix { class Reader { - + public: Reader(const string zimFilePath); ~Reader(); @@ -58,6 +58,7 @@ namespace kiwix { string getDate(); string getCreator(); string getPublisher(); + string getOrigID(); bool getFavicon(string &content, string &mimeType); bool getPageUrlFromTitle(const string &title, string &url); bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); @@ -69,7 +70,7 @@ namespace kiwix { bool parseUrl(const string &url, char *ns, string &title); unsigned int getFileSize(); zim::File* getZimFileHandler(); - + protected: zim::File* zimFileHandler; zim::size_type firstArticleOffset; @@ -77,7 +78,7 @@ namespace kiwix { zim::size_type currentArticleOffset; zim::size_type nsACount; zim::size_type nsICount; - + std::vector suggestions; std::vector::iterator suggestionsOffset; From 1299c4c264ad46399a4765af13682ce961d49cf8 Mon Sep 17 00:00:00 2001 From: Kiran Mathew Koshy Date: Sun, 22 Sep 2013 15:09:23 +0530 Subject: [PATCH 06/15] wix::Manager::getBooksIDs() now returns diff files too. --- src/common/kiwix/manager.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index e0da1e423..fe167c0d5 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -353,9 +353,7 @@ namespace kiwix { std::vector::iterator itr; for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if(itr->origID=="") { booksIds.push_back(itr->id); - } } return booksIds; @@ -397,10 +395,8 @@ namespace kiwix { std::vector::iterator itr; for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if ( itr->id == id) { - if(itr->origID=="") { book = *itr; return true; - } } } return false; From c9d1f562fffad471f7a41a47bdb833e3978e5a69 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sun, 22 Sep 2013 13:11:19 +0200 Subject: [PATCH 07/15] + fix a few typo/style stuff --- src/common/kiwix/library.h | 2 +- src/common/kiwix/manager.cpp | 92 ++++++++++++++++++------------------ src/common/kiwix/reader.cpp | 10 ++-- src/common/kiwix/reader.h | 2 +- 4 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/common/kiwix/library.h b/src/common/kiwix/library.h index 5631fe88d..be82ab44d 100644 --- a/src/common/kiwix/library.h +++ b/src/common/kiwix/library.h @@ -67,7 +67,7 @@ namespace kiwix { string publisher; string date; string url; - string origID; + string origId; string articleCount; string mediaCount; bool readOnly; diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index fe167c0d5..1fc2c619c 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -56,7 +56,7 @@ namespace kiwix { book.creator = bookNode.attribute("creator").value(); book.publisher = bookNode.attribute("publisher").value(); book.url = bookNode.attribute("url").value(); - book.origID = bookNode.attribute("origId").value(); + book.origId = bookNode.attribute("origId").value(); book.articleCount = bookNode.attribute("articleCount").value(); book.mediaCount = bookNode.attribute("mediaCount").value(); book.size = bookNode.attribute("size").value(); @@ -157,47 +157,45 @@ namespace kiwix { bookNode.append_attribute("indexType") = "clucene"; } - if (itr->origID == "") - { - if (!itr->title.empty()) - bookNode.append_attribute("title") = itr->title.c_str(); - - if (itr->description != "") - bookNode.append_attribute("description") = itr->description.c_str(); - - if (itr->language != "") - bookNode.append_attribute("language") = itr->language.c_str(); - - if (itr->date != "") - bookNode.append_attribute("date") = itr->date.c_str(); - - if (itr->creator != "") - bookNode.append_attribute("creator") = itr->creator.c_str(); - - if (itr->publisher != "") - bookNode.append_attribute("publisher") = itr->publisher.c_str(); - - if (itr->favicon != "") - bookNode.append_attribute("favicon") = itr->favicon.c_str(); - - if (itr->faviconMimeType != "") - bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); - + if (itr->origId.empty()) { + if (!itr->title.empty()) + bookNode.append_attribute("title") = itr->title.c_str(); + + if (!itr->description.empty()) + bookNode.append_attribute("description") = itr->description.c_str(); + + if (!itr->language.empty()) + bookNode.append_attribute("language") = itr->language.c_str(); + + if (!itr->creator.empty()) + bookNode.append_attribute("creator") = itr->creator.c_str(); + + if (!itr->publisher.empty()) + bookNode.append_attribute("publisher") = itr->publisher.c_str(); + + if (!itr->favicon.empty()) + bookNode.append_attribute("favicon") = itr->favicon.c_str(); + + if (itr->faviconMimeType != "") + bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); } - if (itr->url != "") + if (!itr->date.empty()) + bookNode.append_attribute("date") = itr->date.c_str(); + + if (!itr->url.empty()) bookNode.append_attribute("url") = itr->url.c_str(); - - if (itr->origID != "") - bookNode.append_attribute("origId") = itr->origID.c_str(); - - if (itr->articleCount != "") + + if (!itr->origId.empty()) + bookNode.append_attribute("origId") = itr->origId.c_str(); + + if (!itr->articleCount.empty()) bookNode.append_attribute("articleCount") = itr->articleCount.c_str(); - if (itr->mediaCount != "") + if (!itr->mediaCount.empty()) bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str(); - if (itr->size != "") + if (!itr->size.empty()) bookNode.append_attribute("size") = itr->size.c_str(); } } @@ -266,7 +264,7 @@ namespace kiwix { book->creator = reader->getCreator(); book->publisher = reader->getPublisher(); book->title = reader->getTitle(); - book->origID=reader->getOrigID(); + book->origId = reader->getOrigId(); std::ostringstream articleCountStream; articleCountStream << reader->getArticleCount(); book->articleCount = articleCountStream.str(); @@ -317,11 +315,11 @@ namespace kiwix { std::map booksLanguagesMap; std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { - if(itr->origID=="") { - booksLanguagesMap[itr->language] = true; - booksLanguages.push_back(itr->language); + if (itr->origId.empty()) { + booksLanguagesMap[itr->language] = true; + booksLanguages.push_back(itr->language); } } } @@ -335,11 +333,11 @@ namespace kiwix { std::map booksCreatorsMap; std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { - if(itr->origID=="") { - booksCreatorsMap[itr->creator] = true; - booksCreators.push_back(itr->creator); + if (itr->origId.empty()) { + booksCreatorsMap[itr->creator] = true; + booksCreators.push_back(itr->creator); } } } @@ -367,9 +365,9 @@ namespace kiwix { std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { - if(itr->origID=="") { - booksPublishersMap[itr->publisher] = true; - booksPublishers.push_back(itr->publisher); + if (itr->origId.empty()) { + booksPublishersMap[itr->publisher] = true; + booksPublishers.push_back(itr->publisher); } } } diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index c355994ef..a8c673a3f 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -321,11 +321,11 @@ namespace kiwix { return value; } - string Reader::getOrigID() { - string value; - this->getMetatag("startfileuid", value); - if(value.empty()) - return ""; + string Reader::getOrigId() { + string origId; + this->getMetatag("startfileuid", origId); + + if (!origId.empty()) { std::string id=value; std::string origID; std::string temp=""; diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index 5325e5be0..f4458c717 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -58,7 +58,7 @@ namespace kiwix { string getDate(); string getCreator(); string getPublisher(); - string getOrigID(); + string getOrigId(); bool getFavicon(string &content, string &mimeType); bool getPageUrlFromTitle(const string &title, string &url); bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); From 0bab8902036b8f1b03702a5716fb5cc6d348a75f Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sun, 22 Sep 2013 13:17:11 +0200 Subject: [PATCH 08/15] + fix small regression --- src/common/kiwix/reader.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index a8c673a3f..8e52f8a5e 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -322,10 +322,10 @@ namespace kiwix { } string Reader::getOrigId() { - string origId; - this->getMetatag("startfileuid", origId); - - if (!origId.empty()) { + string value; + this->getMetatag("startfileuid", value); + if(value.empty()) + return ""; std::string id=value; std::string origID; std::string temp=""; From 696cfc90c31fc8078a8f758c7c1edcf27d329790 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sun, 22 Sep 2013 16:56:16 +0200 Subject: [PATCH 09/15] + small beautifying of the code --- src/common/kiwix/manager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/kiwix/manager.cpp b/src/common/kiwix/manager.cpp index 1fc2c619c..86bf71289 100644 --- a/src/common/kiwix/manager.cpp +++ b/src/common/kiwix/manager.cpp @@ -176,7 +176,7 @@ namespace kiwix { if (!itr->favicon.empty()) bookNode.append_attribute("favicon") = itr->favicon.c_str(); - if (itr->faviconMimeType != "") + if (!itr->faviconMimeType.empty()) bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); } From 48fa26028f1e21a3c11842b76ebee9032336abf3 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sun, 6 Oct 2013 20:58:20 +0200 Subject: [PATCH 10/15] + fix small regression in the unescape_url() code --- src/common/kiwix/reader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 8e52f8a5e..0f7998673 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -60,7 +60,7 @@ static char charFromHex(std::string a) { void unescapeUrl(string &url) { std::string::size_type pos = 0; - while ((pos = url.find('%', pos + 1)) != std::string::npos && + while ((pos = url.find('%', pos ? pos + 1 : pos)) != std::string::npos && pos + 3 <= url.length()) { url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); } From 835388daf9416157affaa5301e5395fe5e452143 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Mon, 7 Oct 2013 13:10:37 +0200 Subject: [PATCH 11/15] cleaner unescapeUrl() --- src/common/kiwix/reader.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 0f7998673..220e12fec 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -60,9 +60,10 @@ static char charFromHex(std::string a) { void unescapeUrl(string &url) { std::string::size_type pos = 0; - while ((pos = url.find('%', pos ? pos + 1 : pos)) != std::string::npos && - pos + 3 <= url.length()) { + while ((pos = url.find('%', pos)) != std::string::npos && + pos + 2 < url.length()) { url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); + ++pos; } return; } From 2ad238f0956a8dee81ee5f3bab44bcade480ad44 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Tue, 5 Nov 2013 12:08:43 +0100 Subject: [PATCH 12/15] + fix kiwix:ucFirst and kiwix:lcFirst --- src/common/stringTools.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index cdc07c281..6352d9e88 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -174,36 +174,36 @@ std::string kiwix::ucFirst (const std::string &word) { if (word.empty()) return ""; - std::string ucFirstWord; + std::string result; #ifdef __ANDROID__ - ucFirstWord = word; - ucFirstWord[0] = toupper(ucFirstWord[0]); + result = word; + result[0] = toupper(result[0]); #else - UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str()); - UnicodeString ucFirstLetter = firstLetter.toUpper(); - ucFirstLetter.toUTF8String(ucFirstWord); - ucFirstWord += word.substr(1); + UnicodeString unicodeWord(word.c_str()); + UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toUpper(); + unicodeWord.replace(0, 1, unicodeFirstLetter); + unicodeWord.toUTF8String(result); #endif - return ucFirstWord; + return result; } std::string kiwix::lcFirst (const std::string &word) { if (word.empty()) return ""; - std::string ucFirstWord; + std::string result; #ifdef __ANDROID__ - ucFirstWord = word; - ucFirstWord[0] = tolower(ucFirstWord[0]); + result = word; + result[0] = tolower(result[0]); #else - UnicodeString firstLetter = UnicodeString(word.substr(0, 1).c_str()); - UnicodeString ucFirstLetter = firstLetter.toLower(); - ucFirstLetter.toUTF8String(ucFirstWord); - ucFirstWord += word.substr(1); + UnicodeString unicodeWord(word.c_str()); + UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toLower(); + unicodeWord.replace(0, 1, unicodeFirstLetter); + unicodeWord.toUTF8String(result); #endif - return ucFirstWord; + return result; } From 987d6f672fc5d07cecec219824ace48b20f84604 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sun, 10 Nov 2013 17:42:52 +0100 Subject: [PATCH 13/15] + take care the nore than maxsuggestions is delivered --- src/common/kiwix/reader.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 220e12fec..6506807af 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -453,9 +453,13 @@ namespace kiwix { std::vector::iterator suggestionItr; int result; - /* Reset the suggestions */ + /* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */ if (reset) { this->suggestions.clear(); + } else { + if (this->suggestions.size() > suggestionsCount) { + return false; + } } if (prefix.size()) { @@ -467,7 +471,7 @@ namespace kiwix { if (this->suggestions.size() == 0) { this->suggestions.push_back(articleItr->getTitle()); - } else { + } else if (this->suggestions.size() < suggestionsCount) { for (suggestionItr = this->suggestions.begin() ; suggestionItr != this->suggestions.end(); ++suggestionItr) { From 39fa510af5afddfb6e20fba0d5f11b4832fd1f94 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Sun, 10 Nov 2013 17:54:11 +0100 Subject: [PATCH 14/15] + improve suggestions search, search also entitled words --- src/common/kiwix/reader.cpp | 4 ++++ src/common/stringTools.cpp | 18 ++++++++++++++++++ src/common/stringTools.h | 1 + 3 files changed, 23 insertions(+) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 6506807af..8a6ece163 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -516,6 +516,10 @@ namespace kiwix { myPrefix = kiwix::lcFirst(myPrefix); this->searchSuggestions(myPrefix, suggestionsCount, false); + /* Try with title words */ + myPrefix = kiwix::toTitle(myPrefix); + this->searchSuggestions(myPrefix, suggestionsCount, false); + return retVal; } diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 6352d9e88..68527ad49 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -207,3 +207,21 @@ std::string kiwix::lcFirst (const std::string &word) { return result; } + + +std::string kiwix::toTitle (const std::string &word) { + if (word.empty()) + return ""; + + std::string result; + +#ifdef __ANDROID__ + result = word; +#else + UnicodeString unicodeWord(word.c_str()); + unicodeWord = unicodeWord.toTitle(0); + unicodeWord.toUTF8String(result); +#endif + + return result; +} diff --git a/src/common/stringTools.h b/src/common/stringTools.h index 47cbc6f9e..8993c65f5 100644 --- a/src/common/stringTools.h +++ b/src/common/stringTools.h @@ -58,6 +58,7 @@ namespace kiwix { std::string ucFirst(const std::string &word); std::string lcFirst(const std::string &word); + std::string toTitle(const std::string &word); } #endif From ad20fde08f434944c0332eba8a48736b3a2b6ea7 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Tue, 12 Nov 2013 02:03:34 +0100 Subject: [PATCH 15/15] + android integrates now libicu --- src/common/stringTools.cpp | 14 -------------- src/common/stringTools.h | 2 -- 2 files changed, 16 deletions(-) diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 68527ad49..1553c5ba8 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -176,15 +176,10 @@ std::string kiwix::ucFirst (const std::string &word) { std::string result; -#ifdef __ANDROID__ - result = word; - result[0] = toupper(result[0]); -#else UnicodeString unicodeWord(word.c_str()); UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toUpper(); unicodeWord.replace(0, 1, unicodeFirstLetter); unicodeWord.toUTF8String(result); -#endif return result; } @@ -195,15 +190,10 @@ std::string kiwix::lcFirst (const std::string &word) { std::string result; -#ifdef __ANDROID__ - result = word; - result[0] = tolower(result[0]); -#else UnicodeString unicodeWord(word.c_str()); UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toLower(); unicodeWord.replace(0, 1, unicodeFirstLetter); unicodeWord.toUTF8String(result); -#endif return result; } @@ -215,13 +205,9 @@ std::string kiwix::toTitle (const std::string &word) { std::string result; -#ifdef __ANDROID__ - result = word; -#else UnicodeString unicodeWord(word.c_str()); unicodeWord = unicodeWord.toTitle(0); unicodeWord.toUTF8String(result); -#endif return result; } diff --git a/src/common/stringTools.h b/src/common/stringTools.h index 8993c65f5..a3f6da718 100644 --- a/src/common/stringTools.h +++ b/src/common/stringTools.h @@ -20,7 +20,6 @@ #ifndef KIWIX_STRINGTOOLS_H #define KIWIX_STRINGTOOLS_H -#ifndef __ANDROID__ #include #include #include @@ -29,7 +28,6 @@ #include #include #include -#endif #include #include