Merge pull request #129 from kiwix/opds

Opds
This commit is contained in:
Matthieu Gautier 2018-04-19 18:02:59 +02:00 committed by GitHub
commit 6e13d44459
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 577 additions and 6 deletions

View File

@ -51,8 +51,10 @@ string appendToDirectory(const string& directoryPath, const string& filename);
unsigned int getFileSize(const string& path);
string getFileSizeAsString(const string& path);
string getFileContent(const string& path);
bool fileExists(const string& path);
bool makeDirectory(const string& path);
string makeTmpDirectory();
bool copyFile(const string& sourcePath, const string& destPath);
string getLastPathElement(const string& path);
string getExecutablePath();

70
include/downloader.h Normal file
View File

@ -0,0 +1,70 @@
/*
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_DOWNLOADER_H
#define KIWIX_DOWNLOADER_H
#include <string>
#include <aria2/aria2.h>
#include <pthread.h>
namespace kiwix
{
struct DownloadedFile {
DownloadedFile()
: success(false) {}
bool success;
std::string path;
};
/**
* A tool to download things.
*
*/
class Downloader
{
public:
Downloader();
~Downloader();
/**
* Download a content.
*
* @param url the url to download
* @return the content downloaded.
*/
DownloadedFile download(const std::string& url);
private:
static pthread_mutex_t globalLock;
aria2::Session* session;
DownloadedFile* fileHandle;
std::string tmpDir;
static int downloadEventCallback(aria2::Session* session,
aria2::DownloadEvent event,
aria2::A2Gid gid,
void* userData);
};
}
#endif

View File

@ -84,10 +84,21 @@ class Manager
* @param libraryPath The library path (used to resolve relative path)
* @return True if the content has been properly parsed.
*/
bool readXml(const string xml,
bool readXml(const string& xml,
const bool readOnly = true,
const string libraryPath = "");
/**
* Load a library content stored in a OPDS stream.
*
* @param content The content of the OPDS stream.
* @param readOnly Set if the library path could be overwritten later with
* updated content.
* @param libraryPath The library path (used to resolve relative path)
* @return True if the content has been properly parsed.
*/
bool readOpds(const string& content, const std::string& urlHost);
/**
* Write the library to a file.
*
@ -97,8 +108,6 @@ class Manager
bool writeFile(const string path);
string write_OPDS_feed(const string& id, const string& title);
/**
* Remove a book from the library.
*
@ -256,6 +265,16 @@ class Manager
const string creator,
const string publisher,
const string search);
/**
* Filter the library and generate a new one with the keep elements.
*
* @param search List only books with search in the title or description.
* @return A `Library`.
*/
Library filter(const string& search);
/**
* Get all langagues of the books in the library.
*
@ -295,6 +314,8 @@ class Manager
bool parseXmlDom(const pugi::xml_document& doc,
const bool readOnly,
const string libraryPath);
bool parseOpdsDom(const pugi::xml_document& doc,
const std::string& urlHost);
private:
void checkAndCleanBookPaths(Book& book, const string& libraryPath);

View File

@ -1,6 +1,8 @@
headers = [
'library.h',
'manager.h',
'opds_dumper.h',
'downloader.h',
'reader.h',
'searcher.h'
]

107
include/opds_dumper.h Normal file
View File

@ -0,0 +1,107 @@
/*
* Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_OPDS_DUMPER_H
#define KIWIX_OPDS_DUMPER_H
#include <time.h>
#include <sstream>
#include <string>
#include <pugixml.hpp>
#include "common/base64.h"
#include "common/pathTools.h"
#include "common/regexTools.h"
#include "library.h"
#include "reader.h"
using namespace std;
namespace kiwix
{
/**
* A tool to dump a `Library` into a opds stream.
*
*/
class OPDSDumper
{
public:
OPDSDumper() = default;
OPDSDumper(Library library);
~OPDSDumper();
/**
* Dump the OPDS feed.
*
* @param id The id of the library.
* @return The OPDS feed.
*/
std::string dumpOPDSFeed();
/**
* Set the id of the opds stream.
*
* @param id the id to use.
*/
void setId(const std::string& id) { this->id = id;}
/**
* Set the title oft the opds stream.
*
* @param title the title to use.
*/
void setTitle(const std::string& title) { this->title = title; }
/**
* Set the root location used when generating url.
*
* @param rootLocation the root location to use.
*/
void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; }
/**
* Set the search url.
*
* @param searchUrl the search url to use.
*/
void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; }
/**
* Set the library to dump.
*
* @param library The library to dump.
*/
void setLibrary(Library library) { this->library = library; }
protected:
kiwix::Library library;
std::string id;
std::string title;
std::string date;
std::string rootLocation;
std::string searchDescriptionUrl;
private:
pugi::xml_node handleBook(Book book, pugi::xml_node root_node);
};
}
#endif // KIWIX_OPDS_DUMPER_H

View File

@ -12,6 +12,7 @@ thread_dep = dependency('threads')
libicu_dep = dependency('icu-i18n', static:static_deps)
libzim_dep = dependency('libzim', version : '>=3.2.0', static:static_deps)
pugixml_dep = dependency('pugixml', static:static_deps)
libaria2_dep = dependency('libaria2', static:static_deps)
ctpp2_include_path = ''
has_ctpp2_dep = false
@ -72,7 +73,7 @@ endif
xapian_dep = dependency('xapian-core', required:false, static:static_deps)
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep, libaria2_dep]
if has_ctpp2_dep
all_deps += [ctpp2_dep]
endif
@ -88,7 +89,7 @@ subdir('scripts')
subdir('static')
subdir('src')
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libaria2']
if xapian_dep.found()
pkg_requires += ['xapian-core']
endif

View File

@ -188,6 +188,20 @@ string getFileSizeAsString(const string& path)
return convert.str();
}
string getFileContent(const string& path)
{
std::ifstream f(path, std::ios::in|std::ios::ate);
std::string content;
if (f.is_open()) {
auto size = f.tellg();
content.reserve(size);
f.seekg(0, std::ios::beg);
content.assign((std::istreambuf_iterator<char>(f)),
std::istreambuf_iterator<char>());
}
return content;
}
bool fileExists(const string& path)
{
#ifdef _WIN32
@ -214,6 +228,30 @@ bool makeDirectory(const string& path)
return status == 0;
}
string makeTmpDirectory()
{
#ifdef _WIN32
char cbase[MAX_PATH+1];
int base_len = GetTempPath(MAX_PATH+1, cbase);
UUID uuid;
UuidCreate(&uuid);
char* dir_name;
UuidToString(&uuid, reinterpret_cast<unsigned char**>(&dir_name));
string dir(cbase, base_len);
dir += dir_name;
_mkdir(dir.c_str());
RpcStringFree(reinterpret_cast<unsigned char**>(&dir_name));
#else
string base = "/tmp";
auto _template = base + "/kiwix-lib_XXXXXX";
char* _template_array = new char[_template.size()+1];
memcpy(_template_array, _template.c_str(), _template.size());
string dir = mkdtemp(_template_array);
delete[] _template_array;
#endif
return dir;
}
/* Try to create a link and if does not work then make a copy */
bool copyFile(const string& sourcePath, const string& destPath)
{

112
src/downloader.cpp Normal file
View File

@ -0,0 +1,112 @@
/*
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "downloader.h"
#include "common/pathTools.h"
#include <unistd.h>
#include <iostream>
namespace kiwix
{
pthread_mutex_t Downloader::globalLock = PTHREAD_MUTEX_INITIALIZER;
/* Constructor */
Downloader::Downloader()
{
aria2::SessionConfig config;
config.downloadEventCallback = Downloader::downloadEventCallback;
config.userData = this;
tmpDir = makeTmpDirectory();
aria2::KeyVals options;
options.push_back(std::pair<std::string, std::string>("dir", tmpDir));
session = aria2::sessionNew(options, config);
}
/* Destructor */
Downloader::~Downloader()
{
aria2::sessionFinal(session);
rmdir(tmpDir.c_str());
}
int Downloader::downloadEventCallback(aria2::Session* session,
aria2::DownloadEvent event,
aria2::A2Gid gid,
void* userData)
{
Downloader* downloader = static_cast<Downloader*>(userData);
auto fileHandle = downloader->fileHandle;
auto dh = aria2::getDownloadHandle(session, gid);
if (!dh) {
return 0;
}
switch (event) {
case aria2::EVENT_ON_DOWNLOAD_COMPLETE:
{
if (dh->getNumFiles() > 0) {
auto f = dh->getFile(1);
fileHandle->path = f.path;
fileHandle->success = true;
}
}
break;
case aria2::EVENT_ON_DOWNLOAD_ERROR:
{
fileHandle->success = false;
}
break;
default:
break;
}
aria2::deleteDownloadHandle(dh);
return 0;
}
DownloadedFile Downloader::download(const std::string& url) {
pthread_mutex_lock(&globalLock);
DownloadedFile fileHandle;
try {
std::vector<std::string> uris = {url};
aria2::KeyVals options;
aria2::A2Gid gid;
int ret;
DownloadedFile fileHandle;
ret = aria2::addUri(session, &gid, uris, options);
if (ret < 0) {
std::cerr << "Failed to download" << std::endl;
} else {
this->fileHandle = &fileHandle;
aria2::run(session, aria2::RUN_DEFAULT);
}
} catch (...) {};
this->fileHandle = nullptr;
pthread_mutex_unlock(&globalLock);
return fileHandle;
}
}

View File

@ -18,6 +18,7 @@
*/
#include "manager.h"
#include "downloader.h"
namespace kiwix
{
@ -88,7 +89,7 @@ bool Manager::parseXmlDom(const pugi::xml_document& doc,
return true;
}
bool Manager::readXml(const string xml,
bool Manager::readXml(const string& xml,
const bool readOnly,
const string libraryPath)
{
@ -103,6 +104,67 @@ bool Manager::readXml(const string xml,
return true;
}
bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& urlHost)
{
pugi::xml_node libraryNode = doc.child("feed");
for (pugi::xml_node entryNode = libraryNode.child("entry"); entryNode;
entryNode = entryNode.next_sibling("entry")) {
kiwix::Book book;
book.readOnly = false;
book.id = entryNode.child("id").child_value();
book.title = entryNode.child("title").child_value();
book.description = entryNode.child("summary").child_value();
book.language = entryNode.child("language").child_value();
book.date = entryNode.child("updated").child_value();
book.creator = entryNode.child("author").child("name").child_value();
for(pugi::xml_node linkNode = entryNode.child("link"); linkNode;
linkNode = linkNode.next_sibling("link")) {
std::string rel = linkNode.attribute("rel").value();
if (rel == "http://opds-spec.org/image/thumbnail") {
auto faviconUrl = urlHost + linkNode.attribute("href").value();
auto downloader = Downloader();
auto fileHandle = downloader.download(faviconUrl);
if (fileHandle.success) {
auto content = getFileContent(fileHandle.path);
book.favicon = base64_encode((const unsigned char*)content.data(), content.size());
book.faviconMimeType = linkNode.attribute("type").value();
} else {
std::cerr << "Cannot get favicon content from " << faviconUrl << std::endl;
}
} else if (rel == "http://opds-spec.org/acquisition/open-access") {
book.url = linkNode.attribute("href").value();
}
}
/* Update the book properties with the new importer */
library.addBook(book);
}
return true;
}
bool Manager::readOpds(const string& content, const std::string& urlHost)
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer_inplace((void*)content.data(), content.size());
if (result) {
this->parseOpdsDom(doc, urlHost);
return true;
}
return false;
}
bool Manager::readFile(const string path, const bool readOnly)
{
return this->readFile(path, path, readOnly);
@ -231,6 +293,7 @@ bool Manager::writeFile(const string path)
return true;
}
bool Manager::setCurrentBookId(const string id)
{
if (library.current.empty() || library.current.top() != id) {
@ -625,6 +688,24 @@ bool Manager::listBooks(const supportedListMode mode,
return true;
}
Library Manager::filter(const std::string& search) {
Library library;
if (search.empty()) {
return library;
}
for(auto book:this->library.books) {
if (matchRegex(book.title, "\\Q" + search + "\\E")
|| matchRegex(book.description, "\\Q" + search + "\\E")) {
library.addBook(book);
}
}
return library;
}
void Manager::checkAndCleanBookPaths(Book& book, const string& libraryPath)
{
if (!book.path.empty()) {

View File

@ -1,6 +1,8 @@
kiwix_sources = [
'library.cpp',
'manager.cpp',
'opds_dumper.cpp',
'downloader.cpp',
'reader.cpp',
'searcher.cpp',
'common/base64.cpp',

135
src/opds_dumper.cpp Normal file
View File

@ -0,0 +1,135 @@
/*
* Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "opds_dumper.h"
namespace kiwix
{
/* Constructor */
OPDSDumper::OPDSDumper(Library library)
: library(library)
{
}
/* Destructor */
OPDSDumper::~OPDSDumper()
{
}
struct xml_string_writer: pugi::xml_writer
{
std::string result;
virtual void write(const void* data, size_t size)
{
result.append(static_cast<const char*>(data), size);
}
};
std::string node_to_string(pugi::xml_node node)
{
xml_string_writer writer;
node.print(writer, " ");
return writer.result;
}
std::string gen_date_str()
{
auto now = time(0);
auto tm = localtime(&now);
std::stringstream is;
is << std::setw(2) << std::setfill('0')
<< 1900+tm->tm_year << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mon << "-"
<< std::setw(2) << std::setfill('0') << tm->tm_mday << "T"
<< std::setw(2) << std::setfill('0') << tm->tm_hour << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_min << ":"
<< std::setw(2) << std::setfill('0') << tm->tm_sec << "Z";
return is.str();
}
#define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str())
pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
auto entry_node = root_node.append_child("entry");
ADD_TEXT_ENTRY(entry_node, "title", book.title);
ADD_TEXT_ENTRY(entry_node, "id", "urn:uuid:"+book.id);
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());
ADD_TEXT_ENTRY(entry_node, "updated", date);
ADD_TEXT_ENTRY(entry_node, "summary", book.description);
auto content_node = entry_node.append_child("link");
content_node.append_attribute("type") = "text/html";
content_node.append_attribute("href") = (rootLocation + "/" + book.getHumanReadableIdFromPath()).c_str();
auto author_node = entry_node.append_child("author");
ADD_TEXT_ENTRY(author_node, "name", book.creator);
if (! book.url.empty()) {
auto acquisition_link = entry_node.append_child("link");
acquisition_link.append_attribute("rel") = "http://opds-spec.org/acquisition/open-access";
acquisition_link.append_attribute("type") = "application/x-zim";
acquisition_link.append_attribute("href") = book.url.c_str();
}
if (! book.faviconMimeType.empty() ) {
auto image_link = entry_node.append_child("link");
image_link.append_attribute("rel") = "http://opds-spec.org/image/thumbnail";
image_link.append_attribute("type") = book.faviconMimeType.c_str();
image_link.append_attribute("href") = (rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()).c_str();
}
return entry_node;
}
string OPDSDumper::dumpOPDSFeed()
{
date = gen_date_str();
pugi::xml_document doc;
auto root_node = doc.append_child("feed");
root_node.append_attribute("xmlns") = "http://www.w3.org/2005/Atom";
root_node.append_attribute("xmlns:opds") = "http://opds-spec.org/2010/catalog";
ADD_TEXT_ENTRY(root_node, "id", id);
ADD_TEXT_ENTRY(root_node, "title", title);
ADD_TEXT_ENTRY(root_node, "updated", date);
auto self_link_node = root_node.append_child("link");
self_link_node.append_attribute("rel") = "self";
self_link_node.append_attribute("href") = "";
self_link_node.append_attribute("type") = "application/atom+xml";
if (!searchDescriptionUrl.empty() ) {
auto search_link = root_node.append_child("link");
search_link.append_attribute("rel") = "search";
search_link.append_attribute("type") = "application/opensearchdescription+xml";
search_link.append_attribute("href") = searchDescriptionUrl.c_str();
}
for (auto book: library.books) {
handleBook(book, root_node);
}
return node_to_string(root_node);
}
}