diff --git a/include/book.h b/include/book.h index f9e685f5f..a9db6df2c 100644 --- a/include/book.h +++ b/include/book.h @@ -26,6 +26,10 @@ namespace pugi { class xml_node; } +namespace zim { +class Archive; +} + namespace kiwix { @@ -43,6 +47,7 @@ class Book bool update(const Book& other); void update(const Reader& reader); + void update(const zim::Archive& archive); void updateFromXml(const pugi::xml_node& node, const std::string& baseDir); void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost); std::string getHumanReadableIdFromPath() const; diff --git a/src/book.cpp b/src/book.cpp index 4b2111271..da37ea042 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -27,6 +27,9 @@ #include "tools/otherTools.h" #include "tools/stringTools.h" #include "tools/pathTools.h" +#include "tools/archiveTools.h" + +#include #include @@ -80,26 +83,28 @@ bool Book::update(const kiwix::Book& other) void Book::update(const kiwix::Reader& reader) { - m_path = reader.getZimFilePath(); - m_pathValid = true; - m_id = reader.getId(); - m_title = reader.getTitle(); - m_description = reader.getDescription(); - m_language = reader.getLanguage(); - m_creator = reader.getCreator(); - m_publisher = reader.getPublisher(); - m_date = reader.getDate(); - m_name = reader.getName(); - m_flavour = reader.getFlavour(); - m_tags = reader.getTags(); - m_category = getCategoryFromTags(); - m_origId = reader.getOrigId(); - m_articleCount = reader.getArticleCount(); - m_mediaCount = reader.getMediaCount(); - m_size = static_cast(reader.getFileSize()) << 10; - m_pathValid = true; + update(*reader.getZimArchive()); +} - reader.getFavicon(m_favicon, m_faviconMimeType); +void Book::update(const zim::Archive& archive) { + m_path = archive.getFilename(); + m_pathValid = true; + m_id = getArchiveId(archive); + m_title = getArchiveTitle(archive); + m_description = getMetaDescription(archive); + m_language = getMetaLanguage(archive); + m_creator = getMetaCreator(archive); + m_publisher = getMetaPublisher(archive); + m_date = getMetaDate(archive); + m_name = getMetaName(archive); + m_flavour = getMetaFlavour(archive); + m_tags = getMetaTags(archive); + m_category = getCategoryFromTags(); + m_articleCount = archive.getArticleCount(); + m_mediaCount = getArchiveMediaCount(archive); + m_size = static_cast(getArchiveFileSize(archive)) << 10; + + getArchiveFavicon(archive, m_favicon, m_faviconMimeType); } #define ATTR(name) node.attribute(name).value() diff --git a/src/reader.cpp b/src/reader.cpp index 44cb312a6..76ce4f4cc 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -29,44 +29,6 @@ #include "tools/otherTools.h" #include "tools/archiveTools.h" -inline char hi(char v) -{ - char hex[] = "0123456789abcdef"; - return hex[(v >> 4) & 0xf]; -} - -inline char lo(char v) -{ - char hex[] = "0123456789abcdef"; - return hex[v & 0xf]; -} - -std::string hexUUID(std::string in) -{ - std::ostringstream out; - for (unsigned n = 0; n < 4; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 4; n < 6; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 6; n < 8; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 8; n < 10; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 10; n < 16; ++n) { - out << hi(in[n]) << lo(in[n]); - } - std::string op = out.str(); - return op; -} - namespace kiwix { /* Constructor */ @@ -119,12 +81,7 @@ zim::Archive* Reader::getZimArchive() const MimeCounterType Reader::parseCounterMetadata() const { - try { - auto counterContent = zimArchive->getMetadata("Counter"); - return parseMimetypeCounter(counterContent); - } catch (zim::EntryNotFound& e) { - return {}; - } + return kiwix::parseArchiveCounter(*zimArchive); } /* Get the count of articles which can be indexed/displayed */ @@ -146,19 +103,7 @@ unsigned int Reader::getArticleCount() const /* Get the count of medias content in the ZIM file */ unsigned int Reader::getMediaCount() const { - std::map counterMap - = this->parseCounterMetadata(); - unsigned int counter = 0; - - for (auto &pair:counterMap) { - if (startsWith(pair.first, "image/") || - startsWith(pair.first, "video/") || - startsWith(pair.first, "audio/")) { - counter += pair.second; - } - } - - return counter; + return kiwix::getArchiveMediaCount(*zimArchive); } /* Get the total of all items of a ZIM file, redirects included */ @@ -170,9 +115,7 @@ unsigned int Reader::getGlobalCount() const /* Return the UID of the ZIM file */ string Reader::getId() const { - std::ostringstream s; - s << zimArchive->getUuid(); - return s.str(); + return kiwix::getArchiveId(*zimArchive); } Entry Reader::getRandomPage() const @@ -281,7 +224,7 @@ string Reader::getRelation() const string Reader::getFlavour() const { - METADATA("Flavour") + return kiwix::getMetaFlavour(*zimArchive); } string Reader::getSource() const @@ -297,27 +240,7 @@ string Reader::getScraper() const string Reader::getOrigId() const { - string value; - this->getMetadata("startfileuid", value); - if (value.empty()) { - return ""; - } - std::string id = value; - std::string origID; - std::string temp = ""; - unsigned int k = 0; - char tempArray[16] = ""; - for (unsigned int i = 0; i < id.size(); i++) { - if (id[i] == '\n') { - tempArray[k] = atoi(temp.c_str()); - temp = ""; - k++; - } else { - temp += id[i]; - } - } - origID = hexUUID(tempArray); - return origID; + return kiwix::getArchiveOrigId(*zimArchive); } Entry Reader::getEntryFromPath(const std::string& path) const @@ -546,7 +469,7 @@ bool Reader::isCorrupted() const /* Return the file size, works also for splitted files */ unsigned int Reader::getFileSize() const { - return zimArchive->getFilesize() / 1024; + return kiwix::getArchiveFileSize(*zimArchive); } } diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 5d22b8ef2..6a2c06696 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -69,18 +69,6 @@ std::string getMetaTags(const zim::Archive& archive, bool original) { return join(tags, ";"); } -bool getArchiveFavicon(const zim::Archive& archive, - std::string& content, std::string& mimeType){ - try { - auto item = archive.getIllustrationItem(); - content = item.getData(); - mimeType = item.getMimetype(); - return true; - } catch(zim::EntryNotFound& e) {}; - - return false; -} - std::string getMetaLanguage(const zim::Archive& archive) { return getMetadata(archive, "Language"); } @@ -101,6 +89,71 @@ std::string getMetaPublisher(const zim::Archive& archive) { return getMetadata(archive, "Publisher"); } +std::string getMetaFlavour(const zim::Archive& archive) { + return getMetadata(archive, "Flavour"); +} + +std::string getArchiveId(const zim::Archive& archive) { + std::ostringstream s; + s << archive.getUuid(); + return s.str(); +} + +std::string getArchiveOrigId(const zim::Archive& archive) { + std::string value = getMetadata(archive, "startfileuid"); + if (value.empty()) { + return ""; + } + std::string id = value; + std::string origID; + std::string temp = ""; + unsigned int k = 0; + char tempArray[16] = ""; + for (unsigned int i = 0; i < id.size(); i++) { + if (id[i] == '\n') { + tempArray[k] = atoi(temp.c_str()); + temp = ""; + k++; + } else { + temp += id[i]; + } + } + origID = (std::string) zim::Uuid::generate(tempArray); + return origID; +} + +bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType){ + try { + auto item = archive.getIllustrationItem(); + content = item.getData(); + mimeType = item.getMimetype(); + return true; + } catch(zim::EntryNotFound& e) {}; + + return false; +} + +// should this be in libzim +unsigned int getArchiveMediaCount(const zim::Archive& archive) { + std::map counterMap = parseArchiveCounter(archive); + unsigned int counter = 0; + + for (auto &pair:counterMap) { + if (startsWith(pair.first, "image/") || + startsWith(pair.first, "video/") || + startsWith(pair.first, "audio/")) { + counter += pair.second; + } + } + + return counter; +} + +unsigned int getArchiveFileSize(const zim::Archive& archive) { + return archive.getFilesize() / 1024; +} + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry) { return entry.getItem(true); @@ -118,4 +171,13 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path throw zim::EntryNotFound("Cannot find entry for non empty path"); } +MimeCounterType parseArchiveCounter(const zim::Archive& archive) { + try { + auto counterContent = archive.getMetadata("Counter"); + return parseMimetypeCounter(counterContent); + } catch (zim::EntryNotFound& e) { + return {}; + } +} + } // kiwix diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 1429457ba..0a6638cd4 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -21,6 +21,7 @@ #define KIWIX_ARCHIVETOOLS_H #include +#include /** * This file contains all the functions that would make handling data related to @@ -33,15 +34,27 @@ namespace kiwix std::string getArchiveTitle(const zim::Archive& archive); std::string getMetaDescription(const zim::Archive& archive); std::string getMetaTags(const zim::Archive& archive, bool original = false); - bool getArchiveFavicon(const zim::Archive& archive, - std::string& content, std::string& mimeType); std::string getMetaLanguage(const zim::Archive& archive); std::string getMetaName(const zim::Archive& archive); std::string getMetaDate(const zim::Archive& archive); std::string getMetaCreator(const zim::Archive& archive); std::string getMetaPublisher(const zim::Archive& archive); + std::string getMetaFlavour(const zim::Archive& archive); + std::string getArchiveId(const zim::Archive& archive); + std::string getArchiveOrigId(const zim::Archive& archive); + + bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType); + + unsigned int getArchiveMediaCount(const zim::Archive& archive); + unsigned int getArchiveFileSize(const zim::Archive& archive); + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry); + zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path); + + MimeCounterType parseArchiveCounter(const zim::Archive& archive); + } #endif