From 8b12434ff2165e0ad5b2fe4c1a6bbc2d056d46ab Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sat, 3 Jul 2021 15:16:42 +0530 Subject: [PATCH 1/3] Update kiwix::book to use libzim structure Some methods in kiwix::Book uses wrapper structure reader. This usage should be extended from the native libzim structure zim::Archive --- include/book.h | 5 +++ src/book.cpp | 43 ++++++++++-------- src/reader.cpp | 89 +++----------------------------------- src/tools/archiveTools.cpp | 86 +++++++++++++++++++++++++++++++----- src/tools/archiveTools.h | 17 +++++++- 5 files changed, 124 insertions(+), 116 deletions(-) diff --git a/include/book.h b/include/book.h index f9e685f5f..a9db6df2c 100644 --- a/include/book.h +++ b/include/book.h @@ -26,6 +26,10 @@ namespace pugi { class xml_node; } +namespace zim { +class Archive; +} + namespace kiwix { @@ -43,6 +47,7 @@ class Book bool update(const Book& other); void update(const Reader& reader); + void update(const zim::Archive& archive); void updateFromXml(const pugi::xml_node& node, const std::string& baseDir); void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost); std::string getHumanReadableIdFromPath() const; diff --git a/src/book.cpp b/src/book.cpp index 4b2111271..da37ea042 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -27,6 +27,9 @@ #include "tools/otherTools.h" #include "tools/stringTools.h" #include "tools/pathTools.h" +#include "tools/archiveTools.h" + +#include #include @@ -80,26 +83,28 @@ bool Book::update(const kiwix::Book& other) void Book::update(const kiwix::Reader& reader) { - m_path = reader.getZimFilePath(); - m_pathValid = true; - m_id = reader.getId(); - m_title = reader.getTitle(); - m_description = reader.getDescription(); - m_language = reader.getLanguage(); - m_creator = reader.getCreator(); - m_publisher = reader.getPublisher(); - m_date = reader.getDate(); - m_name = reader.getName(); - m_flavour = reader.getFlavour(); - m_tags = reader.getTags(); - m_category = getCategoryFromTags(); - m_origId = reader.getOrigId(); - m_articleCount = reader.getArticleCount(); - m_mediaCount = reader.getMediaCount(); - m_size = static_cast(reader.getFileSize()) << 10; - m_pathValid = true; + update(*reader.getZimArchive()); +} - reader.getFavicon(m_favicon, m_faviconMimeType); +void Book::update(const zim::Archive& archive) { + m_path = archive.getFilename(); + m_pathValid = true; + m_id = getArchiveId(archive); + m_title = getArchiveTitle(archive); + m_description = getMetaDescription(archive); + m_language = getMetaLanguage(archive); + m_creator = getMetaCreator(archive); + m_publisher = getMetaPublisher(archive); + m_date = getMetaDate(archive); + m_name = getMetaName(archive); + m_flavour = getMetaFlavour(archive); + m_tags = getMetaTags(archive); + m_category = getCategoryFromTags(); + m_articleCount = archive.getArticleCount(); + m_mediaCount = getArchiveMediaCount(archive); + m_size = static_cast(getArchiveFileSize(archive)) << 10; + + getArchiveFavicon(archive, m_favicon, m_faviconMimeType); } #define ATTR(name) node.attribute(name).value() diff --git a/src/reader.cpp b/src/reader.cpp index 44cb312a6..76ce4f4cc 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -29,44 +29,6 @@ #include "tools/otherTools.h" #include "tools/archiveTools.h" -inline char hi(char v) -{ - char hex[] = "0123456789abcdef"; - return hex[(v >> 4) & 0xf]; -} - -inline char lo(char v) -{ - char hex[] = "0123456789abcdef"; - return hex[v & 0xf]; -} - -std::string hexUUID(std::string in) -{ - std::ostringstream out; - for (unsigned n = 0; n < 4; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 4; n < 6; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 6; n < 8; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 8; n < 10; ++n) { - out << hi(in[n]) << lo(in[n]); - } - out << '-'; - for (unsigned n = 10; n < 16; ++n) { - out << hi(in[n]) << lo(in[n]); - } - std::string op = out.str(); - return op; -} - namespace kiwix { /* Constructor */ @@ -119,12 +81,7 @@ zim::Archive* Reader::getZimArchive() const MimeCounterType Reader::parseCounterMetadata() const { - try { - auto counterContent = zimArchive->getMetadata("Counter"); - return parseMimetypeCounter(counterContent); - } catch (zim::EntryNotFound& e) { - return {}; - } + return kiwix::parseArchiveCounter(*zimArchive); } /* Get the count of articles which can be indexed/displayed */ @@ -146,19 +103,7 @@ unsigned int Reader::getArticleCount() const /* Get the count of medias content in the ZIM file */ unsigned int Reader::getMediaCount() const { - std::map counterMap - = this->parseCounterMetadata(); - unsigned int counter = 0; - - for (auto &pair:counterMap) { - if (startsWith(pair.first, "image/") || - startsWith(pair.first, "video/") || - startsWith(pair.first, "audio/")) { - counter += pair.second; - } - } - - return counter; + return kiwix::getArchiveMediaCount(*zimArchive); } /* Get the total of all items of a ZIM file, redirects included */ @@ -170,9 +115,7 @@ unsigned int Reader::getGlobalCount() const /* Return the UID of the ZIM file */ string Reader::getId() const { - std::ostringstream s; - s << zimArchive->getUuid(); - return s.str(); + return kiwix::getArchiveId(*zimArchive); } Entry Reader::getRandomPage() const @@ -281,7 +224,7 @@ string Reader::getRelation() const string Reader::getFlavour() const { - METADATA("Flavour") + return kiwix::getMetaFlavour(*zimArchive); } string Reader::getSource() const @@ -297,27 +240,7 @@ string Reader::getScraper() const string Reader::getOrigId() const { - string value; - this->getMetadata("startfileuid", value); - if (value.empty()) { - return ""; - } - std::string id = value; - std::string origID; - std::string temp = ""; - unsigned int k = 0; - char tempArray[16] = ""; - for (unsigned int i = 0; i < id.size(); i++) { - if (id[i] == '\n') { - tempArray[k] = atoi(temp.c_str()); - temp = ""; - k++; - } else { - temp += id[i]; - } - } - origID = hexUUID(tempArray); - return origID; + return kiwix::getArchiveOrigId(*zimArchive); } Entry Reader::getEntryFromPath(const std::string& path) const @@ -546,7 +469,7 @@ bool Reader::isCorrupted() const /* Return the file size, works also for splitted files */ unsigned int Reader::getFileSize() const { - return zimArchive->getFilesize() / 1024; + return kiwix::getArchiveFileSize(*zimArchive); } } diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 5d22b8ef2..6a2c06696 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -69,18 +69,6 @@ std::string getMetaTags(const zim::Archive& archive, bool original) { return join(tags, ";"); } -bool getArchiveFavicon(const zim::Archive& archive, - std::string& content, std::string& mimeType){ - try { - auto item = archive.getIllustrationItem(); - content = item.getData(); - mimeType = item.getMimetype(); - return true; - } catch(zim::EntryNotFound& e) {}; - - return false; -} - std::string getMetaLanguage(const zim::Archive& archive) { return getMetadata(archive, "Language"); } @@ -101,6 +89,71 @@ std::string getMetaPublisher(const zim::Archive& archive) { return getMetadata(archive, "Publisher"); } +std::string getMetaFlavour(const zim::Archive& archive) { + return getMetadata(archive, "Flavour"); +} + +std::string getArchiveId(const zim::Archive& archive) { + std::ostringstream s; + s << archive.getUuid(); + return s.str(); +} + +std::string getArchiveOrigId(const zim::Archive& archive) { + std::string value = getMetadata(archive, "startfileuid"); + if (value.empty()) { + return ""; + } + std::string id = value; + std::string origID; + std::string temp = ""; + unsigned int k = 0; + char tempArray[16] = ""; + for (unsigned int i = 0; i < id.size(); i++) { + if (id[i] == '\n') { + tempArray[k] = atoi(temp.c_str()); + temp = ""; + k++; + } else { + temp += id[i]; + } + } + origID = (std::string) zim::Uuid::generate(tempArray); + return origID; +} + +bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType){ + try { + auto item = archive.getIllustrationItem(); + content = item.getData(); + mimeType = item.getMimetype(); + return true; + } catch(zim::EntryNotFound& e) {}; + + return false; +} + +// should this be in libzim +unsigned int getArchiveMediaCount(const zim::Archive& archive) { + std::map counterMap = parseArchiveCounter(archive); + unsigned int counter = 0; + + for (auto &pair:counterMap) { + if (startsWith(pair.first, "image/") || + startsWith(pair.first, "video/") || + startsWith(pair.first, "audio/")) { + counter += pair.second; + } + } + + return counter; +} + +unsigned int getArchiveFileSize(const zim::Archive& archive) { + return archive.getFilesize() / 1024; +} + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry) { return entry.getItem(true); @@ -118,4 +171,13 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path throw zim::EntryNotFound("Cannot find entry for non empty path"); } +MimeCounterType parseArchiveCounter(const zim::Archive& archive) { + try { + auto counterContent = archive.getMetadata("Counter"); + return parseMimetypeCounter(counterContent); + } catch (zim::EntryNotFound& e) { + return {}; + } +} + } // kiwix diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 1429457ba..0a6638cd4 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -21,6 +21,7 @@ #define KIWIX_ARCHIVETOOLS_H #include +#include /** * This file contains all the functions that would make handling data related to @@ -33,15 +34,27 @@ namespace kiwix std::string getArchiveTitle(const zim::Archive& archive); std::string getMetaDescription(const zim::Archive& archive); std::string getMetaTags(const zim::Archive& archive, bool original = false); - bool getArchiveFavicon(const zim::Archive& archive, - std::string& content, std::string& mimeType); std::string getMetaLanguage(const zim::Archive& archive); std::string getMetaName(const zim::Archive& archive); std::string getMetaDate(const zim::Archive& archive); std::string getMetaCreator(const zim::Archive& archive); std::string getMetaPublisher(const zim::Archive& archive); + std::string getMetaFlavour(const zim::Archive& archive); + std::string getArchiveId(const zim::Archive& archive); + std::string getArchiveOrigId(const zim::Archive& archive); + + bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType); + + unsigned int getArchiveMediaCount(const zim::Archive& archive); + unsigned int getArchiveFileSize(const zim::Archive& archive); + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry); + zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path); + + MimeCounterType parseArchiveCounter(const zim::Archive& archive); + } #endif From a3ba7619df52fb485e536e6e6e1735ebc60f72df Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sat, 3 Jul 2021 15:19:00 +0530 Subject: [PATCH 2/3] Update Manager to use Archive instead of Reader kiwix::Manager uses Reader to import a zim file, it should be using zim::Archive directly. --- src/manager.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/manager.cpp b/src/manager.cpp index c632744b2..550842cd0 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -215,8 +215,8 @@ bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book) tmp_path = computeAbsolutePath(getCurrentDirectory(), path); } try { - kiwix::Reader reader(tmp_path); - book->update(reader); + zim::Archive archive(tmp_path); + book->update(archive); book->setPathValid(true); } catch (const std::exception& e) { book->setPathValid(false); From 19afe9442f68c84c12112675ad1fc20961c338db Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Wed, 7 Jul 2021 23:44:45 +0530 Subject: [PATCH 3/3] Remove OriginId functions since they are not useful right now --- include/reader.h | 10 ---------- src/reader.cpp | 5 ----- src/tools/archiveTools.cpp | 27 +-------------------------- src/tools/archiveTools.h | 1 - 4 files changed, 1 insertion(+), 42 deletions(-) diff --git a/include/reader.h b/include/reader.h index 9630252c2..24d8d02fc 100644 --- a/include/reader.h +++ b/include/reader.h @@ -292,16 +292,6 @@ class Reader */ string getScraper() const; - /** - * Get the origId of the zim file. - * - * The origId is only used in the case of patch zim file and is the Id - * of the original zim file. - * - * @return The origId of the zim file as specified in the zim metadata. - */ - string getOrigId() const; - /** * Get the favicon of the zim file. * diff --git a/src/reader.cpp b/src/reader.cpp index 76ce4f4cc..4f0ed922f 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -238,11 +238,6 @@ string Reader::getScraper() const } #undef METADATA -string Reader::getOrigId() const -{ - return kiwix::getArchiveOrigId(*zimArchive); -} - Entry Reader::getEntryFromPath(const std::string& path) const { try { diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 6a2c06696..ebd136557 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -94,32 +94,7 @@ std::string getMetaFlavour(const zim::Archive& archive) { } std::string getArchiveId(const zim::Archive& archive) { - std::ostringstream s; - s << archive.getUuid(); - return s.str(); -} - -std::string getArchiveOrigId(const zim::Archive& archive) { - std::string value = getMetadata(archive, "startfileuid"); - if (value.empty()) { - return ""; - } - std::string id = value; - std::string origID; - std::string temp = ""; - unsigned int k = 0; - char tempArray[16] = ""; - for (unsigned int i = 0; i < id.size(); i++) { - if (id[i] == '\n') { - tempArray[k] = atoi(temp.c_str()); - temp = ""; - k++; - } else { - temp += id[i]; - } - } - origID = (std::string) zim::Uuid::generate(tempArray); - return origID; + return (std::string) archive.getUuid(); } bool getArchiveFavicon(const zim::Archive& archive, diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 0a6638cd4..456e749e7 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -41,7 +41,6 @@ namespace kiwix std::string getMetaPublisher(const zim::Archive& archive); std::string getMetaFlavour(const zim::Archive& archive); std::string getArchiveId(const zim::Archive& archive); - std::string getArchiveOrigId(const zim::Archive& archive); bool getArchiveFavicon(const zim::Archive& archive, std::string& content, std::string& mimeType);