From 7d68926539b73325d363fb78936e00b1ebb4cf85 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sat, 22 May 2021 22:40:20 +0530 Subject: [PATCH] Drop usage of Reader from InternalServer::handle_meta This is essentially a code move of meta handlers from using Reader functions to directly using Archive. --- format_code.sh | 2 + include/meson.build | 1 + include/tools/archiveTools.h | 45 +++++++++++++++ src/meson.build | 1 + src/reader.cpp | 47 ++++------------ src/server/internalServer.cpp | 99 ++++++++++++++++++++++++++++---- src/tools/archiveTools.cpp | 103 ++++++++++++++++++++++++++++++++++ 7 files changed, 249 insertions(+), 49 deletions(-) create mode 100644 include/tools/archiveTools.h create mode 100644 src/tools/archiveTools.cpp diff --git a/format_code.sh b/format_code.sh index b844eec8c..9e61dacb0 100755 --- a/format_code.sh +++ b/format_code.sh @@ -7,6 +7,7 @@ files=( "include/common/otherTools.h" "include/common/regexTools.h" "include/common/networkTools.h" +"include/common/archiveTools.h" "include/manager.h" "include/reader.h" "include/kiwix.h" @@ -22,6 +23,7 @@ files=( "src/common/pathTools.cpp" "src/common/regexTools.cpp" "src/common/otherTools.cpp" +"src/common/archiveTools.cpp" "src/common/networkTools.cpp" "src/common/stringTools.cpp" "src/xapianSearcher.cpp" diff --git a/include/meson.build b/include/meson.build index 4157970e4..6c0e46cc6 100644 --- a/include/meson.build +++ b/include/meson.build @@ -25,6 +25,7 @@ install_headers( 'tools/pathTools.h', 'tools/regexTools.h', 'tools/stringTools.h', + 'tools/archiveTools.h', subdir:'kiwix/tools' ) diff --git a/include/tools/archiveTools.h b/include/tools/archiveTools.h new file mode 100644 index 000000000..08c36de16 --- /dev/null +++ b/include/tools/archiveTools.h @@ -0,0 +1,45 @@ +/* + * Copyright 2021 Maneesh P M + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#ifndef KIWIX_ARCHIVETOOLS_H +#define KIWIX_ARCHIVETOOLS_H + +#include + +/** + * This file contains all the functions that would make handling data related to + * an archive easier. + **/ + +namespace kiwix +{ + std::string getMetadata(const zim::Archive* const archive, const std::string& name); + std::string getArchiveTitle(const zim::Archive* const archive); + std::string getMetaDescription(const zim::Archive* const archive); + std::string getMetaTags(const zim::Archive* const archive, bool original = false); + bool getArchiveFavicon(const zim::Archive* const archive, + std::string& content, std::string& mimeType); + std::string getMetaLanguage(const zim::Archive* const archive); + std::string getMetaName(const zim::Archive* const archive); + std::string getMetaDate(const zim::Archive* const archive); + std::string getMetaCreator(const zim::Archive* const archive); + std::string getMetaPublisher(const zim::Archive* const archive); +} + +#endif diff --git a/src/meson.build b/src/meson.build index 7d6dab9c1..43f863e07 100644 --- a/src/meson.build +++ b/src/meson.build @@ -19,6 +19,7 @@ kiwix_sources = [ 'tools/stringTools.cpp', 'tools/networkTools.cpp', 'tools/otherTools.cpp', + 'tools/archiveTools.cpp', 'kiwixserve.cpp', 'name_mapper.cpp', 'server/byte_range.cpp', diff --git a/src/reader.cpp b/src/reader.cpp index 4a2d84234..972d416ad 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -25,6 +25,7 @@ #include #include "tools/otherTools.h" +#include "tools/archiveTools.h" inline char hi(char v) { @@ -188,14 +189,7 @@ Entry Reader::getMainPage() const bool Reader::getFavicon(string& content, string& mimeType) const { - try { - auto item = zimArchive->getIllustrationItem(); - content = item.getData(); - mimeType = item.getMimetype(); - return true; - } catch(zim::EntryNotFound& e) {}; - - return false; + return kiwix::getArchiveFavicon(zimArchive.get(), content, mimeType); } string Reader::getZimFilePath() const @@ -217,47 +211,32 @@ bool Reader::getMetadata(const string& name, string& value) const string Reader::getName() const { - METADATA("Name") + return kiwix::getMetaName(zimArchive.get()); } string Reader::getTitle() const { - string value = zimArchive->getMetadata("Title"); - if (value.empty()) { - value = getLastPathElement(zimFilePath); - std::replace(value.begin(), value.end(), '_', ' '); - size_t pos = value.find(".zim"); - value = value.substr(0, pos); - } - return value; + return kiwix::getArchiveTitle(zimArchive.get()); } string Reader::getCreator() const { - METADATA("Creator") + return kiwix::getMetaCreator(zimArchive.get()); } string Reader::getPublisher() const { - METADATA("Publisher") + return kiwix::getMetaPublisher(zimArchive.get()); } string Reader::getDate() const { - METADATA("Date") + return kiwix::getMetaDate(zimArchive.get()); } string Reader::getDescription() const { - string value; - this->getMetadata("Description", value); - - /* Mediawiki Collection tends to use the "Subtitle" name */ - if (value.empty()) { - this->getMetadata("Subtitle", value); - } - - return value; + return kiwix::getMetaDescription(zimArchive.get()); } string Reader::getLongDescription() const @@ -267,7 +246,7 @@ string Reader::getLongDescription() const string Reader::getLanguage() const { - METADATA("Language") + return kiwix::getMetaLanguage(zimArchive.get()); } string Reader::getLicense() const @@ -277,13 +256,7 @@ string Reader::getLicense() const string Reader::getTags(bool original) const { - string tags_str; - getMetadata("Tags", tags_str); - if (original) { - return tags_str; - } - auto tags = convertTags(tags_str); - return join(tags, ";"); + return kiwix::getMetaTags(zimArchive.get(), original); } diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 78d538f6e..61a9bad17 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -47,6 +47,7 @@ extern "C" { #include "tools/pathTools.h" #include "tools/regexTools.h" #include "tools/stringTools.h" +#include "tools/archiveTools.h" #include "library.h" #include "name_mapper.h" #include "entry.h" @@ -55,6 +56,7 @@ extern "C" { #include "opds_dumper.h" #include +#include #include @@ -323,22 +325,95 @@ std::unique_ptr InternalServer::build_homepage(const RequestContext& r return ContentResponse::build(*this, RESOURCE::templates::index_html, get_default_data(), "text/html; charset=utf-8", true); } +/** + * Archive and Zim handlers begin + **/ + +std::vector getTitleVariants(const std::string& title) +{ + std::vector variants; + variants.push_back(title); + variants.push_back(kiwix::ucFirst(title)); + variants.push_back(kiwix::lcFirst(title)); + variants.push_back(kiwix::toTitle(title)); + return variants; +} + +// TODO: retrieve searcher from caching mechanism +SuggestionsList_t getSuggestions(const zim::Archive* const archive, + const std::string& queryString, int suggestionCount) +{ + SuggestionsList_t suggestions; + if (archive->hasTitleIndex()) { + auto searcher = zim::Searcher(*archive); + zim::Query suggestionQuery; + suggestionQuery.setQuery(queryString, true); + auto suggestionSearch = searcher.search(suggestionQuery); + auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); + + for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { + SuggestionItem suggestion(it.getTitle(), it.getPath(), + kiwix::normalize(it.getTitle()), it.getSnippet()); + suggestions.push_back(suggestion); + } + } else { + // TODO: This case should be handled by libzim + std::vector variants = getTitleVariants(queryString); + int currCount = 0; + for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { + for (auto& entry: archive->findByTitle(*it)) { + SuggestionItem suggestion(entry.getTitle(), entry.getPath(), + kiwix::normalize(entry.getTitle())); + suggestions.push_back(suggestion); + currCount++; + } + } + } + return suggestions; +} + +zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) +{ + int loopCounter = 42; + auto final_entry = entry; + while (final_entry.isRedirect() && loopCounter--) { + final_entry = final_entry.getRedirectEntry(); + } + // Prevent infinite loops. + if (final_entry.isRedirect()) { + throw zim::EntryNotFound("Unable to resolve entry redirects."); + } + return final_entry; +} + +zim::Entry getEntryFromPath(const zim::Archive* const archive, const std::string& path) +{ + if (path.empty() || path == "/") { + return archive->getMainEntry(); + } + return archive->getEntryByPath(path); +} + +/** + * Archive and Zim handlers end + **/ + std::unique_ptr InternalServer::handle_meta(const RequestContext& request) { std::string bookName; std::string bookId; std::string meta_name; - std::shared_ptr reader; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); meta_name = request.get_argument("name"); - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range& e) { return Response::build_404(*this, request, bookName, ""); } - if (reader == nullptr) { + if (archive == nullptr) { return Response::build_404(*this, request, bookName, ""); } @@ -346,23 +421,23 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ std::string mimeType = "text"; if (meta_name == "title") { - content = reader->getTitle(); + content = getArchiveTitle(archive.get()); } else if (meta_name == "description") { - content = reader->getDescription(); + content = getMetaDescription(archive.get()); } else if (meta_name == "language") { - content = reader->getLanguage(); + content = getMetaLanguage(archive.get()); } else if (meta_name == "name") { - content = reader->getName(); + content = getMetaName(archive.get()); } else if (meta_name == "tags") { - content = reader->getTags(); + content = getMetaTags(archive.get()); } else if (meta_name == "date") { - content = reader->getDate(); + content = getMetaDate(archive.get()); } else if (meta_name == "creator") { - content = reader->getCreator(); + content = getMetaCreator(archive.get()); } else if (meta_name == "publisher") { - content = reader->getPublisher(); + content = getMetaPublisher(archive.get()); } else if (meta_name == "favicon") { - reader->getFavicon(content, mimeType); + getArchiveFavicon(archive.get(), content, mimeType); } else { return Response::build_404(*this, request, bookName, ""); } diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp new file mode 100644 index 000000000..930bc8916 --- /dev/null +++ b/src/tools/archiveTools.cpp @@ -0,0 +1,103 @@ +/* + * Copyright 2021 Maneesh P M + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include +#include + +namespace kiwix +{ +std::string getMetadata(const zim::Archive* const archive, const std::string& name) { + try { + return archive->getMetadata(name); + } catch (zim::EntryNotFound& e) { + return ""; + } +} + +std::string getArchiveTitle(const zim::Archive* const archive) { + std::string value = getMetadata(archive, "Title"); + if (value.empty()) { + value = getLastPathElement(archive->getFilename()); + std::replace(value.begin(), value.end(), '_', ' '); + size_t pos = value.find(".zim"); + value = value.substr(0, pos); + } + return value; +} + +std::string getMetaDescription(const zim::Archive* const archive) { + std::string value; + value = getMetadata(archive, "Description"); + + /* Mediawiki Collection tends to use the "Subtitle" name */ + if (value.empty()) { + value = getMetadata(archive, "Subtitle"); + } + + return value; +} + +std::string getMetaTags(const zim::Archive* const archive, bool original) { + std::string tags_str = getMetadata(archive, "Tags"); + if (original) { + return tags_str; + } + auto tags = convertTags(tags_str); + return join(tags, ";"); +} + +bool getArchiveFavicon(const zim::Archive* const archive, + std::string& content, std::string& mimeType){ + try { + auto entry = archive->getFaviconEntry(); + auto item = entry.getItem(true); + content = item.getData(); + mimeType = item.getMimetype(); + return true; + } catch(zim::EntryNotFound& e) {}; + + return false; +} + +std::string getMetaLanguage(const zim::Archive* const archive) { + return getMetadata(archive, "Language"); +} + +std::string getMetaName(const zim::Archive* const archive) { + return getMetadata(archive, "Name"); +} + +std::string getMetaDate(const zim::Archive* const archive) { + return getMetadata(archive, "Date"); +} + +std::string getMetaCreator(const zim::Archive* const archive) { + return getMetadata(archive, "Creator"); +} + +std::string getMetaPublisher(const zim::Archive* const archive) { + return getMetadata(archive, "Publisher"); +} + +} // kiwix