From b1508c0b98fb1785a865af30791ca290c61a4ef4 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 24 Oct 2018 11:50:11 +0200 Subject: [PATCH 1/4] Better listBooksIds supported mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only have REMOTE or LOCAL is a bit restrictive. By using flags a user can specify for complex request. --- include/library.h | 25 +++++++++++++++++++------ src/library.cpp | 17 ++++++++++++++--- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/include/library.h b/include/library.h index bf919ccc0..e24bf9d7e 100644 --- a/include/library.h +++ b/include/library.h @@ -33,7 +33,15 @@ class Book; class OPDSDumper; enum supportedListSortBy { UNSORTED, TITLE, SIZE, DATE, CREATOR, PUBLISHER }; -enum supportedListMode { ALL, REMOTE, LOCAL }; +enum supportedListMode { + ALL = 0, + LOCAL = 1, + REMOTE = 1 << 1, + NOLOCAL = 1 << 2, + NOREMOTE = 1 << 3, + VALID = 1 << 4, + NOVALID = 1 << 5 +}; /** * A Library store several books. */ @@ -127,10 +135,15 @@ class Library * List books in the library. * * @param mode The mode of listing : - * - ALL list all books. - * (LOCAL and REMOTE. Other filters are applied). - * - LOCAL list only local books. - * - REMOTE list only remote books. + * - LOCAL  : list only local books (with a path). + * - REMOTE : list only remote books (with an url). + * - VALID  : list only valid books (without a path or with a + * path pointing to a valid zim file). + * - NOLOCAL : list only books without valid path. + * - NOREMOTE : list only books without url. + * - NOVALID : list only books not valid. + * - ALL : Do not do any filter (LOCAL or REMOTE) + * - Flags can be combined. * @param sortBy Attribute to sort by the book list. * @param search List only books with search in the title, description. * @param language List only books in this language. @@ -141,7 +154,7 @@ class Library * @return The list of bookIds corresponding to the query. */ std::vector listBooksIds( - supportedListMode = ALL, + int supportedListMode = ALL, supportedListSortBy sortBy = UNSORTED, const std::string& search = "", const std::string& language = "", diff --git a/src/library.cpp b/src/library.cpp index 9a4ec60d6..bd37e4127 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -303,7 +303,7 @@ std::string Comparator::get_keys(const std::string& id) std::vector Library::listBooksIds( - supportedListMode mode, + int mode, supportedListSortBy sortBy, const std::string& search, const std::string& language, @@ -314,9 +314,20 @@ std::vector Library::listBooksIds( std::vector bookIds; for(auto& pair:books) { auto& book = pair.second; - if (mode == LOCAL && book.getPath().empty()) + auto local = !book.getPath().empty(); + if (mode & LOCAL && !local) continue; - if (mode == REMOTE && (!book.getPath().empty() || book.getUrl().empty())) + if (mode & NOLOCAL && local) + continue; + auto valid = book.isPathValid(); + if (mode & VALID && !valid) + continue; + if (mode & NOVALID && valid) + continue; + auto remote = !book.getUrl().empty(); + if (mode & REMOTE && !remote) + continue; + if (mode & NOREMOTE && remote) continue; if (maxSize != 0 && book.getSize() > maxSize) continue; From c20ae18bff480ba84d2f6e7adae0f66d5a920607 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 24 Oct 2018 11:51:38 +0200 Subject: [PATCH 2/4] An opds feed can also be the openSearch result. We must be able to set the correct entry in the feed for a searchResult. --- include/opds_dumper.h | 13 +++++++++++++ src/opds_dumper.cpp | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/include/opds_dumper.h b/include/opds_dumper.h index ebfa4f8df..c4b9e9095 100644 --- a/include/opds_dumper.h +++ b/include/opds_dumper.h @@ -84,6 +84,15 @@ class OPDSDumper */ void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; } + /** + * Set some informations about the search results. + * + * @param totalResult the total number of results of the search. + * @param startIndex the start index of the result. + * @param count the number of result of the current set (or page). + */ + void setOpenSearchInfo(int totalResult, int startIndex, int count); + /** * Set the library to dump. * @@ -98,6 +107,10 @@ class OPDSDumper std::string date; std::string rootLocation; std::string searchDescriptionUrl; + int m_totalResults; + int m_startIndex; + int m_count; + bool m_isSearchResult = false; private: pugi::xml_node handleBook(Book book, pugi::xml_node root_node); diff --git a/src/opds_dumper.cpp b/src/opds_dumper.cpp index 1ce76b05b..fcfde0287 100644 --- a/src/opds_dumper.cpp +++ b/src/opds_dumper.cpp @@ -50,6 +50,14 @@ std::string gen_date_str() return is.str(); } +void OPDSDumper::setOpenSearchInfo(int totalResults, int startIndex, int count) +{ + m_totalResults = totalResults; + m_startIndex = startIndex, + m_count = count; + m_isSearchResult = true; +} + #define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str()) pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) { @@ -98,6 +106,12 @@ string OPDSDumper::dumpOPDSFeed(const std::vector& bookIds) ADD_TEXT_ENTRY(root_node, "title", title); ADD_TEXT_ENTRY(root_node, "updated", date); + if (m_isSearchResult) { + ADD_TEXT_ENTRY(root_node, "totalResults", to_string(m_totalResults)); + ADD_TEXT_ENTRY(root_node, "startIndex", to_string(m_startIndex)); + ADD_TEXT_ENTRY(root_node, "itemsPerPage", to_string(m_count)); + } + auto self_link_node = root_node.append_child("link"); self_link_node.append_attribute("rel") = "self"; self_link_node.append_attribute("href") = ""; From c6206edfb4feb909df9b04d61a01fe046e5fb5bc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 24 Oct 2018 11:56:05 +0200 Subject: [PATCH 3/4] Do not always download the favicon of a book. Download as needed. When parsing a opds feed, the favicon is a url, not a dataurl. If we download the favicon all the times, it may take a lot of time to parse the feed. We store the url and download the favicon only when needed (when displayed) --- include/book.h | 7 ++++--- src/book.cpp | 19 +++++++++++++++++-- src/manager.cpp | 18 +----------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/book.h b/include/book.h index 434ba745e..cffe2dfc7 100644 --- a/include/book.h +++ b/include/book.h @@ -45,7 +45,7 @@ class Book bool update(const Book& other); void update(const Reader& reader); void updateFromXml(const pugi::xml_node& node, const std::string& baseDir); - void updateFromOpds(const pugi::xml_node& node); + void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost); std::string getHumanReadableIdFromPath(); bool readOnly() const { return m_readOnly; } @@ -67,7 +67,7 @@ class Book const uint64_t& getArticleCount() const { return m_articleCount; } const uint64_t& getMediaCount() const { return m_mediaCount; } const uint64_t& getSize() const { return m_size; } - const std::string& getFavicon() const { return m_favicon; } + const std::string& getFavicon() const; const std::string& getFaviconMimeType() const { return m_faviconMimeType; } const std::string& getDownloadId() const { return m_downloadId; } @@ -115,7 +115,8 @@ class Book uint64_t m_mediaCount; bool m_readOnly; uint64_t m_size; - std::string m_favicon; + mutable std::string m_favicon; + std::string m_faviconUrl; std::string m_faviconMimeType; }; diff --git a/src/book.cpp b/src/book.cpp index d8241f5f0..49318a7c6 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -22,6 +22,7 @@ #include "common/base64.h" #include "common/regexTools.h" +#include "common/networkTools.h" #include @@ -131,7 +132,7 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir) #define VALUE(name) node.child(name).child_value() -void Book::updateFromOpds(const pugi::xml_node& node) +void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost) { m_id = VALUE("id"); if (!m_id.compare(0, 9, "urn:uuid:")) { @@ -149,7 +150,10 @@ void Book::updateFromOpds(const pugi::xml_node& node) if (rel == "http://opds-spec.org/acquisition/open-access") { m_url = linkNode.attribute("href").value(); m_size = strtoull(linkNode.attribute("length").value(), 0, 0); - break; + } + if (rel == "http://opds-spec.org/image/thumbnail") { + m_faviconUrl = urlHost + linkNode.attribute("href").value(); + m_faviconMimeType = linkNode.attribute("type").value(); } } @@ -189,4 +193,15 @@ void Book::setIndexPath(const std::string& indexPath) : indexPath; } +const std::string& Book::getFavicon() const { + if (m_favicon.empty() && !m_faviconUrl.empty()) { + try { + m_favicon = download(m_faviconUrl); + } catch(...) { + std::cerr << "Cannot download favicon from " << m_faviconUrl; + } + } + return m_favicon; +} + } diff --git a/src/manager.cpp b/src/manager.cpp index ce841a146..582d8ec49 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -18,7 +18,6 @@ */ #include "manager.h" -#include "common/networkTools.h" #include @@ -101,22 +100,7 @@ bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& url kiwix::Book book; book.setReadOnly(false); - book.updateFromOpds(entryNode); - for(pugi::xml_node linkNode = entryNode.child("link"); linkNode; - linkNode = linkNode.next_sibling("link")) { - std::string rel = linkNode.attribute("rel").value(); - - if (rel == "http://opds-spec.org/image/thumbnail") { - auto faviconUrl = urlHost + linkNode.attribute("href").value(); - try { - book.setFavicon(download(faviconUrl)); - book.setFaviconMimeType(linkNode.attribute("type").value()); - } catch (...) { - std::cerr << "Cannot get favicon content from " << faviconUrl << std::endl; - } - break; - } - } + book.updateFromOpds(entryNode, urlHost); /* Update the book properties with the new importer */ manipulator->addBookToLibrary(book); From ad654ead08330564bc80bf24ae78c9c1dd9a2fe2 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 24 Oct 2018 11:56:38 +0200 Subject: [PATCH 4/4] Do not force the download port to be 80. We may want to use url with port != 80. --- src/common/networkTools.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/networkTools.cpp b/src/common/networkTools.cpp index 0066b3d48..d71f1c50b 100644 --- a/src/common/networkTools.cpp +++ b/src/common/networkTools.cpp @@ -191,7 +191,6 @@ std::string kiwix::download(const std::string& url) { auto curl = curl_easy_init(); std::stringstream ss; curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl, CURLOPT_PORT, 80); curl_easy_setopt(curl, CURLOPT_HTTPGET, 1L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ss);