From 47ce044e3e866b9c1f05087470b94b78a89dc08e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 27 Mar 2018 16:51:01 +0200 Subject: [PATCH] Add method to `Manager` to populate the library from a opds stream. The library's books are created in the metadata in the opds. As the opds stream is by definition a distant "library", there is no zim to read to complete missing information. This can lead to incomplete `library.xml`. --- include/manager.h | 13 ++++++++++ src/manager.cpp | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/include/manager.h b/include/manager.h index 513366bd2..d1154090f 100644 --- a/include/manager.h +++ b/include/manager.h @@ -88,6 +88,17 @@ class Manager const bool readOnly = true, const string libraryPath = ""); + /** + * Load a library content stored in a OPDS stream. + * + * @param content The content of the OPDS stream. + * @param readOnly Set if the library path could be overwritten later with + * updated content. + * @param libraryPath The library path (used to resolve relative path) + * @return True if the content has been properly parsed. + */ + bool readOpds(const string& content, const std::string& urlHost); + /** * Write the library to a file. * @@ -303,6 +314,8 @@ class Manager bool parseXmlDom(const pugi::xml_document& doc, const bool readOnly, const string libraryPath); + bool parseOpdsDom(const pugi::xml_document& doc, + const std::string& urlHost); private: void checkAndCleanBookPaths(Book& book, const string& libraryPath); diff --git a/src/manager.cpp b/src/manager.cpp index 79ca21d7d..446616995 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -18,6 +18,7 @@ */ #include "manager.h" +#include "downloader.h" namespace kiwix { @@ -103,6 +104,67 @@ bool Manager::readXml(const string& xml, return true; } + + +bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& urlHost) +{ + pugi::xml_node libraryNode = doc.child("feed"); + + for (pugi::xml_node entryNode = libraryNode.child("entry"); entryNode; + entryNode = entryNode.next_sibling("entry")) { + kiwix::Book book; + + book.readOnly = false; + book.id = entryNode.child("id").child_value(); + book.title = entryNode.child("title").child_value(); + book.description = entryNode.child("summary").child_value(); + book.language = entryNode.child("language").child_value(); + book.date = entryNode.child("updated").child_value(); + book.creator = entryNode.child("author").child("name").child_value(); + for(pugi::xml_node linkNode = entryNode.child("link"); linkNode; + linkNode = linkNode.next_sibling("link")) { + std::string rel = linkNode.attribute("rel").value(); + + if (rel == "http://opds-spec.org/image/thumbnail") { + auto faviconUrl = urlHost + linkNode.attribute("href").value(); + auto downloader = Downloader(); + auto fileHandle = downloader.download(faviconUrl); + if (fileHandle.success) { + auto content = getFileContent(fileHandle.path); + book.favicon = base64_encode((const unsigned char*)content.data(), content.size()); + book.faviconMimeType = linkNode.attribute("type").value(); + } else { + std::cerr << "Cannot get favicon content from " << faviconUrl << std::endl; + } + + } else if (rel == "http://opds-spec.org/acquisition/open-access") { + book.url = linkNode.attribute("href").value(); + } + } + + /* Update the book properties with the new importer */ + library.addBook(book); + } + + return true; +} + + + +bool Manager::readOpds(const string& content, const std::string& urlHost) +{ + pugi::xml_document doc; + pugi::xml_parse_result result + = doc.load_buffer_inplace((void*)content.data(), content.size()); + + if (result) { + this->parseOpdsDom(doc, urlHost); + return true; + } + + return false; +} + bool Manager::readFile(const string path, const bool readOnly) { return this->readFile(path, path, readOnly);