From ee51c470b44fd8532b247d9bfc6bc803e5922289 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 26 Mar 2018 18:24:33 +0200 Subject: [PATCH 1/8] Allow the manager to dump the opds feed of the whole library. --- include/manager.h | 4 +- include/meson.build | 1 + include/opds_dumper.h | 91 ++++++++++++++++++++++++++++++ src/manager.cpp | 3 +- src/meson.build | 1 + src/opds_dumper.cpp | 127 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 include/opds_dumper.h create mode 100644 src/opds_dumper.cpp diff --git a/include/manager.h b/include/manager.h index 6fb74d220..a2f4ea1c6 100644 --- a/include/manager.h +++ b/include/manager.h @@ -84,7 +84,7 @@ class Manager * @param libraryPath The library path (used to resolve relative path) * @return True if the content has been properly parsed. */ - bool readXml(const string xml, + bool readXml(const string& xml, const bool readOnly = true, const string libraryPath = ""); @@ -97,8 +97,6 @@ class Manager bool writeFile(const string path); - string write_OPDS_feed(const string& id, const string& title); - /** * Remove a book from the library. * diff --git a/include/meson.build b/include/meson.build index 4746d8373..ac79e21ee 100644 --- a/include/meson.build +++ b/include/meson.build @@ -1,6 +1,7 @@ headers = [ 'library.h', 'manager.h', + 'opds_dumper.h', 'reader.h', 'searcher.h' ] diff --git a/include/opds_dumper.h b/include/opds_dumper.h new file mode 100644 index 000000000..b76f7abdc --- /dev/null +++ b/include/opds_dumper.h @@ -0,0 +1,91 @@ +/* + * Copyright 2017 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#ifndef KIWIX_OPDS_DUMPER_H +#define KIWIX_OPDS_DUMPER_H + +#include +#include +#include + +#include + +#include "common/base64.h" +#include "common/pathTools.h" +#include "common/regexTools.h" +#include "library.h" +#include "reader.h" + +using namespace std; + +namespace kiwix +{ + +/** + * A tool to dump a `Library` into a opds stream. + * + */ +class OPDSDumper +{ + public: + OPDSDumper(Library library); + ~OPDSDumper(); + + /** + * Dump the OPDS feed. + * + * @param id The id of the library. + * @return The OPDS feed. + */ + std::string dumpOPDSFeed(); + + /** + * Set the id of the opds stream. + * + * @param id the id to use. + */ + void setId(const std::string& id) { this->id = id;} + + /** + * Set the title oft the opds stream. + * + * @param title the title to use. + */ + void setTitle(const std::string& title) { this->title = title; } + + /** + * Set the root location used when generating url. + * + * @param rootLocation the root location to use. + */ + void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; } + + protected: + kiwix::Library library; + std::string id; + std::string title; + std::string date; + std::string rootLocation; + + private: + pugi::xml_node handleBook(Book book, pugi::xml_node root_node); +}; +} + +#endif // KIWIX_OPDS_DUMPER_H diff --git a/src/manager.cpp b/src/manager.cpp index b0e307c53..4caa57307 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -88,7 +88,7 @@ bool Manager::parseXmlDom(const pugi::xml_document& doc, return true; } -bool Manager::readXml(const string xml, +bool Manager::readXml(const string& xml, const bool readOnly, const string libraryPath) { @@ -231,6 +231,7 @@ bool Manager::writeFile(const string path) return true; } + bool Manager::setCurrentBookId(const string id) { if (library.current.empty() || library.current.top() != id) { diff --git a/src/meson.build b/src/meson.build index 4a1e0266f..e977f2021 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,6 +1,7 @@ kiwix_sources = [ 'library.cpp', 'manager.cpp', + 'opds_dumper.cpp', 'reader.cpp', 'searcher.cpp', 'common/base64.cpp', diff --git a/src/opds_dumper.cpp b/src/opds_dumper.cpp new file mode 100644 index 000000000..d5db57ebe --- /dev/null +++ b/src/opds_dumper.cpp @@ -0,0 +1,127 @@ +/* + * Copyright 2017 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "opds_dumper.h" + +namespace kiwix +{ +/* Constructor */ +OPDSDumper::OPDSDumper(Library library) + : library(library) +{ +} +/* Destructor */ +OPDSDumper::~OPDSDumper() +{ +} + +struct xml_string_writer: pugi::xml_writer +{ + std::string result; + + virtual void write(const void* data, size_t size) + { + result.append(static_cast(data), size); + } +}; + +std::string node_to_string(pugi::xml_node node) +{ + xml_string_writer writer; + node.print(writer, " "); + + return writer.result; +} + +std::string gen_date_str() +{ + auto now = time(0); + auto tm = localtime(&now); + + std::stringstream is; + is << std::setw(2) << std::setfill('0') + << 1900+tm->tm_year << "-" + << std::setw(2) << std::setfill('0') << tm->tm_mon << "-" + << std::setw(2) << std::setfill('0') << tm->tm_mday << "T" + << std::setw(2) << std::setfill('0') << tm->tm_hour << ":" + << std::setw(2) << std::setfill('0') << tm->tm_min << ":" + << std::setw(2) << std::setfill('0') << tm->tm_sec << "Z"; + return is.str(); +} + +#define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str()) + +pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) { + auto entry_node = root_node.append_child("entry"); + ADD_TEXT_ENTRY(entry_node, "title", book.title); + ADD_TEXT_ENTRY(entry_node, "id", "urn:uuid:"+book.id); + ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()); + ADD_TEXT_ENTRY(entry_node, "updated", date); + ADD_TEXT_ENTRY(entry_node, "summary", book.description); + + auto content_node = entry_node.append_child("link"); + content_node.append_attribute("type") = "text/html"; + content_node.append_attribute("href") = (rootLocation + "/" + book.getHumanReadableIdFromPath()).c_str(); + + auto author_node = entry_node.append_child("author"); + ADD_TEXT_ENTRY(author_node, "name", book.creator); + + if (! book.url.empty()) { + auto acquisition_link = entry_node.append_child("link"); + acquisition_link.append_attribute("rel") = "http://opds-spec.org/acquisition/open-access"; + acquisition_link.append_attribute("type") = "application/x-zim"; + acquisition_link.append_attribute("href") = book.url.c_str(); + } + + if (! book.faviconMimeType.empty() ) { + auto image_link = entry_node.append_child("link"); + image_link.append_attribute("rel") = "http://opds-spec.org/image/thumbnail"; + image_link.append_attribute("type") = book.faviconMimeType.c_str(); + image_link.append_attribute("href") = (rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()).c_str(); + } + return entry_node; +} + +string OPDSDumper::dumpOPDSFeed() +{ + date = gen_date_str(); + pugi::xml_document doc; + + auto root_node = doc.append_child("feed"); + root_node.append_attribute("xmlns") = "http://www.w3.org/2005/Atom"; + root_node.append_attribute("xmlns:opds") = "http://opds-spec.org/2010/catalog"; + + ADD_TEXT_ENTRY(root_node, "id", id); + + ADD_TEXT_ENTRY(root_node, "title", title); + ADD_TEXT_ENTRY(root_node, "updated", date); + + auto self_link_node = root_node.append_child("link"); + self_link_node.append_attribute("rel") = "self"; + self_link_node.append_attribute("href") = ""; + self_link_node.append_attribute("type") = "application/atom+xml"; + + for (auto book: library.books) { + handleBook(book, root_node); + } + + return node_to_string(root_node); +} + +} From ad92af928ba079ce62d42558d465706ac4712e92 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 26 Mar 2018 21:25:01 +0200 Subject: [PATCH 2/8] Be able to filter a library. This generate a new library only with the corresponding books. --- include/manager.h | 10 ++++++++++ src/manager.cpp | 18 ++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/manager.h b/include/manager.h index a2f4ea1c6..513366bd2 100644 --- a/include/manager.h +++ b/include/manager.h @@ -254,6 +254,16 @@ class Manager const string creator, const string publisher, const string search); + + /** + * Filter the library and generate a new one with the keep elements. + * + * @param search List only books with search in the title or description. + * @return A `Library`. + */ + Library filter(const string& search); + + /** * Get all langagues of the books in the library. * diff --git a/src/manager.cpp b/src/manager.cpp index 4caa57307..79ca21d7d 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -626,6 +626,24 @@ bool Manager::listBooks(const supportedListMode mode, return true; } + +Library Manager::filter(const std::string& search) { + Library library; + + if (search.empty()) { + return library; + } + + for(auto book:this->library.books) { + if (matchRegex(book.title, "\\Q" + search + "\\E") + || matchRegex(book.description, "\\Q" + search + "\\E")) { + library.addBook(book); + } + } + + return library; +} + void Manager::checkAndCleanBookPaths(Book& book, const string& libraryPath) { if (!book.path.empty()) { From b48428e4436a66dd28a9c7279eb3e6efbc681348 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 26 Mar 2018 21:25:57 +0200 Subject: [PATCH 3/8] Be able to create a OPDSDumper without library and associate it later. --- include/opds_dumper.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/opds_dumper.h b/include/opds_dumper.h index b76f7abdc..9c5c4beaa 100644 --- a/include/opds_dumper.h +++ b/include/opds_dumper.h @@ -44,6 +44,7 @@ namespace kiwix class OPDSDumper { public: + OPDSDumper() = default; OPDSDumper(Library library); ~OPDSDumper(); @@ -76,6 +77,13 @@ class OPDSDumper */ void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; } + /** + * Set the library to dump. + * + * @param library The library to dump. + */ + void setLibrary(Library library) { this->library = library; } + protected: kiwix::Library library; std::string id; From 2164faba44a2f1f813275b4cb28ec9ecf9c9e087 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 26 Mar 2018 21:28:44 +0200 Subject: [PATCH 4/8] Add a potential search description link in the opds stream. --- include/opds_dumper.h | 8 ++++++++ src/opds_dumper.cpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/include/opds_dumper.h b/include/opds_dumper.h index 9c5c4beaa..5c60b8c6a 100644 --- a/include/opds_dumper.h +++ b/include/opds_dumper.h @@ -77,6 +77,13 @@ class OPDSDumper */ void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; } + /** + * Set the search url. + * + * @param searchUrl the search url to use. + */ + void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; } + /** * Set the library to dump. * @@ -90,6 +97,7 @@ class OPDSDumper std::string title; std::string date; std::string rootLocation; + std::string searchDescriptionUrl; private: pugi::xml_node handleBook(Book book, pugi::xml_node root_node); diff --git a/src/opds_dumper.cpp b/src/opds_dumper.cpp index d5db57ebe..63dce8542 100644 --- a/src/opds_dumper.cpp +++ b/src/opds_dumper.cpp @@ -117,6 +117,14 @@ string OPDSDumper::dumpOPDSFeed() self_link_node.append_attribute("href") = ""; self_link_node.append_attribute("type") = "application/atom+xml"; + + if (!searchDescriptionUrl.empty() ) { + auto search_link = root_node.append_child("link"); + search_link.append_attribute("rel") = "search"; + search_link.append_attribute("type") = "application/opensearchdescription+xml"; + search_link.append_attribute("href") = searchDescriptionUrl.c_str(); + } + for (auto book: library.books) { handleBook(book, root_node); } From 9f86b59d1d998aef6170b9492b0487b5379e0105 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 27 Mar 2018 16:36:53 +0200 Subject: [PATCH 5/8] Add a function to get the content of a file. --- include/common/pathTools.h | 1 + src/common/pathTools.cpp | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/common/pathTools.h b/include/common/pathTools.h index 82f41e737..5d3c065e7 100644 --- a/include/common/pathTools.h +++ b/include/common/pathTools.h @@ -51,6 +51,7 @@ string appendToDirectory(const string& directoryPath, const string& filename); unsigned int getFileSize(const string& path); string getFileSizeAsString(const string& path); +string getFileContent(const string& path); bool fileExists(const string& path); bool makeDirectory(const string& path); bool copyFile(const string& sourcePath, const string& destPath); diff --git a/src/common/pathTools.cpp b/src/common/pathTools.cpp index 1d46bf87a..cfdc64d79 100644 --- a/src/common/pathTools.cpp +++ b/src/common/pathTools.cpp @@ -188,6 +188,20 @@ string getFileSizeAsString(const string& path) return convert.str(); } +string getFileContent(const string& path) +{ + std::ifstream f(path, std::ios::in|std::ios::ate); + std::string content; + if (f.is_open()) { + auto size = f.tellg(); + content.reserve(size); + f.seekg(0, std::ios::beg); + content.assign((std::istreambuf_iterator(f)), + std::istreambuf_iterator()); + } + return content; +} + bool fileExists(const string& path) { #ifdef _WIN32 From d4fefd1a5700c2c8f9ae37cacdb0206a40e9e048 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 27 Mar 2018 16:37:17 +0200 Subject: [PATCH 6/8] Add a function to create a temporary directory. --- include/common/pathTools.h | 1 + src/common/pathTools.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/common/pathTools.h b/include/common/pathTools.h index 5d3c065e7..c63fad275 100644 --- a/include/common/pathTools.h +++ b/include/common/pathTools.h @@ -54,6 +54,7 @@ string getFileSizeAsString(const string& path); string getFileContent(const string& path); bool fileExists(const string& path); bool makeDirectory(const string& path); +string makeTmpDirectory(); bool copyFile(const string& sourcePath, const string& destPath); string getLastPathElement(const string& path); string getExecutablePath(); diff --git a/src/common/pathTools.cpp b/src/common/pathTools.cpp index cfdc64d79..876263c04 100644 --- a/src/common/pathTools.cpp +++ b/src/common/pathTools.cpp @@ -228,6 +228,30 @@ bool makeDirectory(const string& path) return status == 0; } +string makeTmpDirectory() +{ +#ifdef _WIN32 + char cbase[MAX_PATH+1]; + int base_len = GetTempPath(MAX_PATH+1, cbase); + UUID uuid; + UuidCreate(&uuid); + char* dir_name; + UuidToString(&uuid, reinterpret_cast(&dir_name)); + string dir(cbase, base_len); + dir += dir_name; + _mkdir(dir.c_str()); + RpcStringFree(reinterpret_cast(&dir_name)); +#else + string base = "/tmp"; + auto _template = base + "/kiwix-lib_XXXXXX"; + char* _template_array = new char[_template.size()+1]; + memcpy(_template_array, _template.c_str(), _template.size()); + string dir = mkdtemp(_template_array); + delete[] _template_array; +#endif + return dir; +} + /* Try to create a link and if does not work then make a copy */ bool copyFile(const string& sourcePath, const string& destPath) { From 1f091da3f47073bb4c7b0d692abc0c18576c718e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 27 Mar 2018 16:46:59 +0200 Subject: [PATCH 7/8] Add a downloader tools to download files. The downloader is using libaria2. For now, only one download can be run a the time. A download will start only if (and as soon as) no download is running. --- include/downloader.h | 70 +++++++++++++++++++++++++++ include/meson.build | 1 + meson.build | 5 +- src/downloader.cpp | 112 +++++++++++++++++++++++++++++++++++++++++++ src/meson.build | 1 + 5 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 include/downloader.h create mode 100644 src/downloader.cpp diff --git a/include/downloader.h b/include/downloader.h new file mode 100644 index 000000000..347fb1f19 --- /dev/null +++ b/include/downloader.h @@ -0,0 +1,70 @@ +/* + * Copyright 2018 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#ifndef KIWIX_DOWNLOADER_H +#define KIWIX_DOWNLOADER_H + +#include +#include +#include + +namespace kiwix +{ + + +struct DownloadedFile { + DownloadedFile() + : success(false) {} + bool success; + std::string path; +}; + +/** + * A tool to download things. + * + */ +class Downloader +{ + public: + Downloader(); + ~Downloader(); + + /** + * Download a content. + * + * @param url the url to download + * @return the content downloaded. + */ + DownloadedFile download(const std::string& url); + + private: + static pthread_mutex_t globalLock; + + aria2::Session* session; + DownloadedFile* fileHandle; + std::string tmpDir; + + static int downloadEventCallback(aria2::Session* session, + aria2::DownloadEvent event, + aria2::A2Gid gid, + void* userData); +}; +} + +#endif diff --git a/include/meson.build b/include/meson.build index ac79e21ee..a3d6b1ea1 100644 --- a/include/meson.build +++ b/include/meson.build @@ -2,6 +2,7 @@ headers = [ 'library.h', 'manager.h', 'opds_dumper.h', + 'downloader.h', 'reader.h', 'searcher.h' ] diff --git a/meson.build b/meson.build index 43a76926d..6f875dc87 100644 --- a/meson.build +++ b/meson.build @@ -12,6 +12,7 @@ thread_dep = dependency('threads') libicu_dep = dependency('icu-i18n', static:static_deps) libzim_dep = dependency('libzim', version : '>=3.2.0', static:static_deps) pugixml_dep = dependency('pugixml', static:static_deps) +libaria2_dep = dependency('libaria2', static:static_deps) ctpp2_include_path = '' has_ctpp2_dep = false @@ -72,7 +73,7 @@ endif xapian_dep = dependency('xapian-core', required:false, static:static_deps) -all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep] +all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep, libaria2_dep] if has_ctpp2_dep all_deps += [ctpp2_dep] endif @@ -88,7 +89,7 @@ subdir('scripts') subdir('static') subdir('src') -pkg_requires = ['libzim', 'icu-i18n', 'pugixml'] +pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libaria2'] if xapian_dep.found() pkg_requires += ['xapian-core'] endif diff --git a/src/downloader.cpp b/src/downloader.cpp new file mode 100644 index 000000000..b22429008 --- /dev/null +++ b/src/downloader.cpp @@ -0,0 +1,112 @@ +/* + * Copyright 2018 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "downloader.h" +#include "common/pathTools.h" + +#include +#include + +namespace kiwix +{ + +pthread_mutex_t Downloader::globalLock = PTHREAD_MUTEX_INITIALIZER; + + +/* Constructor */ +Downloader::Downloader() +{ + aria2::SessionConfig config; + config.downloadEventCallback = Downloader::downloadEventCallback; + config.userData = this; + tmpDir = makeTmpDirectory(); + aria2::KeyVals options; + options.push_back(std::pair("dir", tmpDir)); + session = aria2::sessionNew(options, config); +} + + +/* Destructor */ +Downloader::~Downloader() +{ + aria2::sessionFinal(session); + rmdir(tmpDir.c_str()); +} + + +int Downloader::downloadEventCallback(aria2::Session* session, + aria2::DownloadEvent event, + aria2::A2Gid gid, + void* userData) +{ + Downloader* downloader = static_cast(userData); + + auto fileHandle = downloader->fileHandle; + auto dh = aria2::getDownloadHandle(session, gid); + + if (!dh) { + return 0; + } + + switch (event) { + case aria2::EVENT_ON_DOWNLOAD_COMPLETE: + { + if (dh->getNumFiles() > 0) { + auto f = dh->getFile(1); + fileHandle->path = f.path; + fileHandle->success = true; + } + } + break; + case aria2::EVENT_ON_DOWNLOAD_ERROR: + { + fileHandle->success = false; + } + break; + default: + break; + } + aria2::deleteDownloadHandle(dh); + return 0; +} + +DownloadedFile Downloader::download(const std::string& url) { + pthread_mutex_lock(&globalLock); + DownloadedFile fileHandle; + try { + std::vector uris = {url}; + aria2::KeyVals options; + aria2::A2Gid gid; + int ret; + DownloadedFile fileHandle; + + ret = aria2::addUri(session, &gid, uris, options); + if (ret < 0) { + std::cerr << "Failed to download" << std::endl; + } else { + this->fileHandle = &fileHandle; + aria2::run(session, aria2::RUN_DEFAULT); + } + } catch (...) {}; + this->fileHandle = nullptr; + pthread_mutex_unlock(&globalLock); + return fileHandle; +} + +} diff --git a/src/meson.build b/src/meson.build index e977f2021..d3df0f4ad 100644 --- a/src/meson.build +++ b/src/meson.build @@ -2,6 +2,7 @@ kiwix_sources = [ 'library.cpp', 'manager.cpp', 'opds_dumper.cpp', + 'downloader.cpp', 'reader.cpp', 'searcher.cpp', 'common/base64.cpp', From 47ce044e3e866b9c1f05087470b94b78a89dc08e Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Tue, 27 Mar 2018 16:51:01 +0200 Subject: [PATCH 8/8] Add method to `Manager` to populate the library from a opds stream. The library's books are created in the metadata in the opds. As the opds stream is by definition a distant "library", there is no zim to read to complete missing information. This can lead to incomplete `library.xml`. --- include/manager.h | 13 ++++++++++ src/manager.cpp | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/include/manager.h b/include/manager.h index 513366bd2..d1154090f 100644 --- a/include/manager.h +++ b/include/manager.h @@ -88,6 +88,17 @@ class Manager const bool readOnly = true, const string libraryPath = ""); + /** + * Load a library content stored in a OPDS stream. + * + * @param content The content of the OPDS stream. + * @param readOnly Set if the library path could be overwritten later with + * updated content. + * @param libraryPath The library path (used to resolve relative path) + * @return True if the content has been properly parsed. + */ + bool readOpds(const string& content, const std::string& urlHost); + /** * Write the library to a file. * @@ -303,6 +314,8 @@ class Manager bool parseXmlDom(const pugi::xml_document& doc, const bool readOnly, const string libraryPath); + bool parseOpdsDom(const pugi::xml_document& doc, + const std::string& urlHost); private: void checkAndCleanBookPaths(Book& book, const string& libraryPath); diff --git a/src/manager.cpp b/src/manager.cpp index 79ca21d7d..446616995 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -18,6 +18,7 @@ */ #include "manager.h" +#include "downloader.h" namespace kiwix { @@ -103,6 +104,67 @@ bool Manager::readXml(const string& xml, return true; } + + +bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& urlHost) +{ + pugi::xml_node libraryNode = doc.child("feed"); + + for (pugi::xml_node entryNode = libraryNode.child("entry"); entryNode; + entryNode = entryNode.next_sibling("entry")) { + kiwix::Book book; + + book.readOnly = false; + book.id = entryNode.child("id").child_value(); + book.title = entryNode.child("title").child_value(); + book.description = entryNode.child("summary").child_value(); + book.language = entryNode.child("language").child_value(); + book.date = entryNode.child("updated").child_value(); + book.creator = entryNode.child("author").child("name").child_value(); + for(pugi::xml_node linkNode = entryNode.child("link"); linkNode; + linkNode = linkNode.next_sibling("link")) { + std::string rel = linkNode.attribute("rel").value(); + + if (rel == "http://opds-spec.org/image/thumbnail") { + auto faviconUrl = urlHost + linkNode.attribute("href").value(); + auto downloader = Downloader(); + auto fileHandle = downloader.download(faviconUrl); + if (fileHandle.success) { + auto content = getFileContent(fileHandle.path); + book.favicon = base64_encode((const unsigned char*)content.data(), content.size()); + book.faviconMimeType = linkNode.attribute("type").value(); + } else { + std::cerr << "Cannot get favicon content from " << faviconUrl << std::endl; + } + + } else if (rel == "http://opds-spec.org/acquisition/open-access") { + book.url = linkNode.attribute("href").value(); + } + } + + /* Update the book properties with the new importer */ + library.addBook(book); + } + + return true; +} + + + +bool Manager::readOpds(const string& content, const std::string& urlHost) +{ + pugi::xml_document doc; + pugi::xml_parse_result result + = doc.load_buffer_inplace((void*)content.data(), content.size()); + + if (result) { + this->parseOpdsDom(doc, urlHost); + return true; + } + + return false; +} + bool Manager::readFile(const string path, const bool readOnly) { return this->readFile(path, path, readOnly);