diff --git a/include/book.h b/include/book.h index f9e685f5f..a9db6df2c 100644 --- a/include/book.h +++ b/include/book.h @@ -26,6 +26,10 @@ namespace pugi { class xml_node; } +namespace zim { +class Archive; +} + namespace kiwix { @@ -43,6 +47,7 @@ class Book bool update(const Book& other); void update(const Reader& reader); + void update(const zim::Archive& archive); void updateFromXml(const pugi::xml_node& node, const std::string& baseDir); void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost); std::string getHumanReadableIdFromPath() const; diff --git a/include/reader.h b/include/reader.h index 9630252c2..24d8d02fc 100644 --- a/include/reader.h +++ b/include/reader.h @@ -292,16 +292,6 @@ class Reader */ string getScraper() const; - /** - * Get the origId of the zim file. - * - * The origId is only used in the case of patch zim file and is the Id - * of the original zim file. - * - * @return The origId of the zim file as specified in the zim metadata. - */ - string getOrigId() const; - /** * Get the favicon of the zim file. * diff --git a/src/book.cpp b/src/book.cpp index 4b2111271..da37ea042 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -27,6 +27,9 @@ #include "tools/otherTools.h" #include "tools/stringTools.h" #include "tools/pathTools.h" +#include "tools/archiveTools.h" + +#include #include @@ -80,26 +83,28 @@ bool Book::update(const kiwix::Book& other) void Book::update(const kiwix::Reader& reader) { - m_path = reader.getZimFilePath(); - m_pathValid = true; - m_id = reader.getId(); - m_title = reader.getTitle(); - m_description = reader.getDescription(); - m_language = reader.getLanguage(); - m_creator = reader.getCreator(); - m_publisher = reader.getPublisher(); - m_date = reader.getDate(); - m_name = reader.getName(); - m_flavour = reader.getFlavour(); - m_tags = reader.getTags(); - m_category = getCategoryFromTags(); - m_origId = reader.getOrigId(); - m_articleCount = reader.getArticleCount(); - m_mediaCount = reader.getMediaCount(); - m_size = static_cast(reader.getFileSize()) << 10; - m_pathValid = true; + update(*reader.getZimArchive()); +} - reader.getFavicon(m_favicon, m_faviconMimeType); +void Book::update(const zim::Archive& archive) { + m_path = archive.getFilename(); + m_pathValid = true; + m_id = getArchiveId(archive); + m_title = getArchiveTitle(archive); + m_description = getMetaDescription(archive); + m_language = getMetaLanguage(archive); + m_creator = getMetaCreator(archive); + m_publisher = getMetaPublisher(archive); + m_date = getMetaDate(archive); + m_name = getMetaName(archive); + m_flavour = getMetaFlavour(archive); + m_tags = getMetaTags(archive); + m_category = getCategoryFromTags(); + m_articleCount = archive.getArticleCount(); + m_mediaCount = getArchiveMediaCount(archive); + m_size = static_cast(getArchiveFileSize(archive)) << 10; + + getArchiveFavicon(archive, m_favicon, m_faviconMimeType); } #define ATTR(name) node.attribute(name).value() diff --git a/src/manager.cpp b/src/manager.cpp index c632744b2..550842cd0 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -215,8 +215,8 @@ bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book) tmp_path = computeAbsolutePath(getCurrentDirectory(), path); } try { - kiwix::Reader reader(tmp_path); - book->update(reader); + zim::Archive archive(tmp_path); + book->update(archive); book->setPathValid(true); } catch (const std::exception& e) { book->setPathValid(false); diff --git a/src/reader.cpp b/src/reader.cpp index 44cb312a6..4f0ed922f 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -29,44 +29,6 @@ #include "tools/otherTools.h" #include "tools/archiveTools.h" -inline char hi(char v) -{ - char hex[] = "0123456789abcdef"; - return hex[(v >> 4) & 0xf]; -} - -inline char lo(char v) -{ - char hex[] = "0123456789abcdef"; - return hex[v & 0xf]; -} - -std::string hexUUID(std::string in) -{ - std::ostringstream out; - for (unsigned n = 0; n < 4; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 4; n < 6; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 6; n < 8; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 8; n < 10; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 10; n < 16; ++n) { - out << hi(in[n]) << lo(in[n]); - } - std::string op = out.str(); - return op; -} - namespace kiwix { /* Constructor */ @@ -119,12 +81,7 @@ zim::Archive* Reader::getZimArchive() const MimeCounterType Reader::parseCounterMetadata() const { - try { - auto counterContent = zimArchive->getMetadata("Counter"); - return parseMimetypeCounter(counterContent); - } catch (zim::EntryNotFound& e) { - return {}; - } + return kiwix::parseArchiveCounter(*zimArchive); } /* Get the count of articles which can be indexed/displayed */ @@ -146,19 +103,7 @@ unsigned int Reader::getArticleCount() const /* Get the count of medias content in the ZIM file */ unsigned int Reader::getMediaCount() const { - std::map counterMap - = this->parseCounterMetadata(); - unsigned int counter = 0; - - for (auto &pair:counterMap) { - if (startsWith(pair.first, "image/") || - startsWith(pair.first, "video/") || - startsWith(pair.first, "audio/")) { - counter += pair.second; - } - } - - return counter; + return kiwix::getArchiveMediaCount(*zimArchive); } /* Get the total of all items of a ZIM file, redirects included */ @@ -170,9 +115,7 @@ unsigned int Reader::getGlobalCount() const /* Return the UID of the ZIM file */ string Reader::getId() const { - std::ostringstream s; - s << zimArchive->getUuid(); - return s.str(); + return kiwix::getArchiveId(*zimArchive); } Entry Reader::getRandomPage() const @@ -281,7 +224,7 @@ string Reader::getRelation() const string Reader::getFlavour() const { - METADATA("Flavour") + return kiwix::getMetaFlavour(*zimArchive); } string Reader::getSource() const @@ -295,31 +238,6 @@ string Reader::getScraper() const } #undef METADATA -string Reader::getOrigId() const -{ - string value; - this->getMetadata("startfileuid", value); - if (value.empty()) { - return ""; - } - std::string id = value; - std::string origID; - std::string temp = ""; - unsigned int k = 0; - char tempArray[16] = ""; - for (unsigned int i = 0; i < id.size(); i++) { - if (id[i] == '\n') { - tempArray[k] = atoi(temp.c_str()); - temp = ""; - k++; - } else { - temp += id[i]; - } - } - origID = hexUUID(tempArray); - return origID; -} - Entry Reader::getEntryFromPath(const std::string& path) const { try { @@ -546,7 +464,7 @@ bool Reader::isCorrupted() const /* Return the file size, works also for splitted files */ unsigned int Reader::getFileSize() const { - return zimArchive->getFilesize() / 1024; + return kiwix::getArchiveFileSize(*zimArchive); } } diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 5d22b8ef2..ebd136557 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -69,18 +69,6 @@ std::string getMetaTags(const zim::Archive& archive, bool original) { return join(tags, ";"); } -bool getArchiveFavicon(const zim::Archive& archive, - std::string& content, std::string& mimeType){ - try { - auto item = archive.getIllustrationItem(); - content = item.getData(); - mimeType = item.getMimetype(); - return true; - } catch(zim::EntryNotFound& e) {}; - - return false; -} - std::string getMetaLanguage(const zim::Archive& archive) { return getMetadata(archive, "Language"); } @@ -101,6 +89,46 @@ std::string getMetaPublisher(const zim::Archive& archive) { return getMetadata(archive, "Publisher"); } +std::string getMetaFlavour(const zim::Archive& archive) { + return getMetadata(archive, "Flavour"); +} + +std::string getArchiveId(const zim::Archive& archive) { + return (std::string) archive.getUuid(); +} + +bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType){ + try { + auto item = archive.getIllustrationItem(); + content = item.getData(); + mimeType = item.getMimetype(); + return true; + } catch(zim::EntryNotFound& e) {}; + + return false; +} + +// should this be in libzim +unsigned int getArchiveMediaCount(const zim::Archive& archive) { + std::map counterMap = parseArchiveCounter(archive); + unsigned int counter = 0; + + for (auto &pair:counterMap) { + if (startsWith(pair.first, "image/") || + startsWith(pair.first, "video/") || + startsWith(pair.first, "audio/")) { + counter += pair.second; + } + } + + return counter; +} + +unsigned int getArchiveFileSize(const zim::Archive& archive) { + return archive.getFilesize() / 1024; +} + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry) { return entry.getItem(true); @@ -118,4 +146,13 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path throw zim::EntryNotFound("Cannot find entry for non empty path"); } +MimeCounterType parseArchiveCounter(const zim::Archive& archive) { + try { + auto counterContent = archive.getMetadata("Counter"); + return parseMimetypeCounter(counterContent); + } catch (zim::EntryNotFound& e) { + return {}; + } +} + } // kiwix diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 1429457ba..456e749e7 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -21,6 +21,7 @@ #define KIWIX_ARCHIVETOOLS_H #include +#include /** * This file contains all the functions that would make handling data related to @@ -33,15 +34,26 @@ namespace kiwix std::string getArchiveTitle(const zim::Archive& archive); std::string getMetaDescription(const zim::Archive& archive); std::string getMetaTags(const zim::Archive& archive, bool original = false); - bool getArchiveFavicon(const zim::Archive& archive, - std::string& content, std::string& mimeType); std::string getMetaLanguage(const zim::Archive& archive); std::string getMetaName(const zim::Archive& archive); std::string getMetaDate(const zim::Archive& archive); std::string getMetaCreator(const zim::Archive& archive); std::string getMetaPublisher(const zim::Archive& archive); + std::string getMetaFlavour(const zim::Archive& archive); + std::string getArchiveId(const zim::Archive& archive); + + bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType); + + unsigned int getArchiveMediaCount(const zim::Archive& archive); + unsigned int getArchiveFileSize(const zim::Archive& archive); + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry); + zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path); + + MimeCounterType parseArchiveCounter(const zim::Archive& archive); + } #endif