Update kiwix::book to use libzim structure

Some methods in kiwix::Book uses wrapper structure reader. This usage should
be extended from the native libzim structure zim::Archive
This commit is contained in:
Maneesh P M 2021-07-03 15:16:42 +05:30 committed by Matthieu Gautier
parent b4f7dfa5a2
commit 8b12434ff2
5 changed files with 124 additions and 116 deletions

View File

@ -26,6 +26,10 @@ namespace pugi {
class xml_node;
}
namespace zim {
class Archive;
}
namespace kiwix
{
@ -43,6 +47,7 @@ class Book
bool update(const Book& other);
void update(const Reader& reader);
void update(const zim::Archive& archive);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
std::string getHumanReadableIdFromPath() const;

View File

@ -27,6 +27,9 @@
#include "tools/otherTools.h"
#include "tools/stringTools.h"
#include "tools/pathTools.h"
#include "tools/archiveTools.h"
#include <zim/archive.h>
#include <pugixml.hpp>
@ -80,26 +83,28 @@ bool Book::update(const kiwix::Book& other)
void Book::update(const kiwix::Reader& reader)
{
m_path = reader.getZimFilePath();
m_pathValid = true;
m_id = reader.getId();
m_title = reader.getTitle();
m_description = reader.getDescription();
m_language = reader.getLanguage();
m_creator = reader.getCreator();
m_publisher = reader.getPublisher();
m_date = reader.getDate();
m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags();
m_category = getCategoryFromTags();
m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount();
m_mediaCount = reader.getMediaCount();
m_size = static_cast<uint64_t>(reader.getFileSize()) << 10;
m_pathValid = true;
update(*reader.getZimArchive());
}
reader.getFavicon(m_favicon, m_faviconMimeType);
void Book::update(const zim::Archive& archive) {
m_path = archive.getFilename();
m_pathValid = true;
m_id = getArchiveId(archive);
m_title = getArchiveTitle(archive);
m_description = getMetaDescription(archive);
m_language = getMetaLanguage(archive);
m_creator = getMetaCreator(archive);
m_publisher = getMetaPublisher(archive);
m_date = getMetaDate(archive);
m_name = getMetaName(archive);
m_flavour = getMetaFlavour(archive);
m_tags = getMetaTags(archive);
m_category = getCategoryFromTags();
m_articleCount = archive.getArticleCount();
m_mediaCount = getArchiveMediaCount(archive);
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
getArchiveFavicon(archive, m_favicon, m_faviconMimeType);
}
#define ATTR(name) node.attribute(name).value()

View File

@ -29,44 +29,6 @@
#include "tools/otherTools.h"
#include "tools/archiveTools.h"
inline char hi(char v)
{
char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf];
}
inline char lo(char v)
{
char hex[] = "0123456789abcdef";
return hex[v & 0xf];
}
std::string hexUUID(std::string in)
{
std::ostringstream out;
for (unsigned n = 0; n < 4; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 4; n < 6; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 6; n < 8; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 8; n < 10; ++n) {
out << hi(in[n]) << lo(in[n]);
}
out << '-';
for (unsigned n = 10; n < 16; ++n) {
out << hi(in[n]) << lo(in[n]);
}
std::string op = out.str();
return op;
}
namespace kiwix
{
/* Constructor */
@ -119,12 +81,7 @@ zim::Archive* Reader::getZimArchive() const
MimeCounterType Reader::parseCounterMetadata() const
{
try {
auto counterContent = zimArchive->getMetadata("Counter");
return parseMimetypeCounter(counterContent);
} catch (zim::EntryNotFound& e) {
return {};
}
return kiwix::parseArchiveCounter(*zimArchive);
}
/* Get the count of articles which can be indexed/displayed */
@ -146,19 +103,7 @@ unsigned int Reader::getArticleCount() const
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
for (auto &pair:counterMap) {
if (startsWith(pair.first, "image/") ||
startsWith(pair.first, "video/") ||
startsWith(pair.first, "audio/")) {
counter += pair.second;
}
}
return counter;
return kiwix::getArchiveMediaCount(*zimArchive);
}
/* Get the total of all items of a ZIM file, redirects included */
@ -170,9 +115,7 @@ unsigned int Reader::getGlobalCount() const
/* Return the UID of the ZIM file */
string Reader::getId() const
{
std::ostringstream s;
s << zimArchive->getUuid();
return s.str();
return kiwix::getArchiveId(*zimArchive);
}
Entry Reader::getRandomPage() const
@ -281,7 +224,7 @@ string Reader::getRelation() const
string Reader::getFlavour() const
{
METADATA("Flavour")
return kiwix::getMetaFlavour(*zimArchive);
}
string Reader::getSource() const
@ -297,27 +240,7 @@ string Reader::getScraper() const
string Reader::getOrigId() const
{
string value;
this->getMetadata("startfileuid", value);
if (value.empty()) {
return "";
}
std::string id = value;
std::string origID;
std::string temp = "";
unsigned int k = 0;
char tempArray[16] = "";
for (unsigned int i = 0; i < id.size(); i++) {
if (id[i] == '\n') {
tempArray[k] = atoi(temp.c_str());
temp = "";
k++;
} else {
temp += id[i];
}
}
origID = hexUUID(tempArray);
return origID;
return kiwix::getArchiveOrigId(*zimArchive);
}
Entry Reader::getEntryFromPath(const std::string& path) const
@ -546,7 +469,7 @@ bool Reader::isCorrupted() const
/* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const
{
return zimArchive->getFilesize() / 1024;
return kiwix::getArchiveFileSize(*zimArchive);
}
}

View File

@ -69,18 +69,6 @@ std::string getMetaTags(const zim::Archive& archive, bool original) {
return join(tags, ";");
}
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
std::string getMetaLanguage(const zim::Archive& archive) {
return getMetadata(archive, "Language");
}
@ -101,6 +89,71 @@ std::string getMetaPublisher(const zim::Archive& archive) {
return getMetadata(archive, "Publisher");
}
std::string getMetaFlavour(const zim::Archive& archive) {
return getMetadata(archive, "Flavour");
}
std::string getArchiveId(const zim::Archive& archive) {
std::ostringstream s;
s << archive.getUuid();
return s.str();
}
std::string getArchiveOrigId(const zim::Archive& archive) {
std::string value = getMetadata(archive, "startfileuid");
if (value.empty()) {
return "";
}
std::string id = value;
std::string origID;
std::string temp = "";
unsigned int k = 0;
char tempArray[16] = "";
for (unsigned int i = 0; i < id.size(); i++) {
if (id[i] == '\n') {
tempArray[k] = atoi(temp.c_str());
temp = "";
k++;
} else {
temp += id[i];
}
}
origID = (std::string) zim::Uuid::generate(tempArray);
return origID;
}
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
// should this be in libzim
unsigned int getArchiveMediaCount(const zim::Archive& archive) {
std::map<const std::string, unsigned int> counterMap = parseArchiveCounter(archive);
unsigned int counter = 0;
for (auto &pair:counterMap) {
if (startsWith(pair.first, "image/") ||
startsWith(pair.first, "video/") ||
startsWith(pair.first, "audio/")) {
counter += pair.second;
}
}
return counter;
}
unsigned int getArchiveFileSize(const zim::Archive& archive) {
return archive.getFilesize() / 1024;
}
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry)
{
return entry.getItem(true);
@ -118,4 +171,13 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path
throw zim::EntryNotFound("Cannot find entry for non empty path");
}
MimeCounterType parseArchiveCounter(const zim::Archive& archive) {
try {
auto counterContent = archive.getMetadata("Counter");
return parseMimetypeCounter(counterContent);
} catch (zim::EntryNotFound& e) {
return {};
}
}
} // kiwix

View File

@ -21,6 +21,7 @@
#define KIWIX_ARCHIVETOOLS_H
#include <zim/archive.h>
#include <tools/otherTools.h>
/**
* This file contains all the functions that would make handling data related to
@ -33,15 +34,27 @@ namespace kiwix
std::string getArchiveTitle(const zim::Archive& archive);
std::string getMetaDescription(const zim::Archive& archive);
std::string getMetaTags(const zim::Archive& archive, bool original = false);
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType);
std::string getMetaLanguage(const zim::Archive& archive);
std::string getMetaName(const zim::Archive& archive);
std::string getMetaDate(const zim::Archive& archive);
std::string getMetaCreator(const zim::Archive& archive);
std::string getMetaPublisher(const zim::Archive& archive);
std::string getMetaFlavour(const zim::Archive& archive);
std::string getArchiveId(const zim::Archive& archive);
std::string getArchiveOrigId(const zim::Archive& archive);
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType);
unsigned int getArchiveMediaCount(const zim::Archive& archive);
unsigned int getArchiveFileSize(const zim::Archive& archive);
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
MimeCounterType parseArchiveCounter(const zim::Archive& archive);
}
#endif