mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #836 from kiwix/media_count_libzim
Use new `zim::Archive::getMediaCount` from libzim.
This commit is contained in:
commit
ec31882e94
|
@ -77,8 +77,8 @@ void Book::update(const zim::Archive& archive) {
|
||||||
m_flavour = getMetaFlavour(archive);
|
m_flavour = getMetaFlavour(archive);
|
||||||
m_tags = getMetaTags(archive);
|
m_tags = getMetaTags(archive);
|
||||||
m_category = getCategoryFromTags();
|
m_category = getCategoryFromTags();
|
||||||
m_articleCount = getArchiveArticleCount(archive);
|
m_articleCount = archive.getArticleCount();
|
||||||
m_mediaCount = getArchiveMediaCount(archive);
|
m_mediaCount = archive.getMediaCount();
|
||||||
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
|
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
|
||||||
|
|
||||||
m_illustrations.clear();
|
m_illustrations.clear();
|
||||||
|
|
|
@ -105,46 +105,6 @@ bool getArchiveFavicon(const zim::Archive& archive, unsigned size,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// should this be in libzim
|
|
||||||
unsigned int getArchiveMediaCount(const zim::Archive& archive) {
|
|
||||||
std::map<const std::string, unsigned int> counterMap = parseArchiveCounter(archive);
|
|
||||||
unsigned int counter = 0;
|
|
||||||
|
|
||||||
for (auto &pair:counterMap) {
|
|
||||||
if (startsWith(pair.first, "image/") ||
|
|
||||||
startsWith(pair.first, "video/") ||
|
|
||||||
startsWith(pair.first, "audio/")) {
|
|
||||||
counter += pair.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counter;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int getArchiveArticleCount(const zim::Archive& archive) {
|
|
||||||
// [HACK]
|
|
||||||
// getArticleCount() returns different things depending of the "version" of the zim.
|
|
||||||
// On old zim (<=6), it returns the number of entry in `A` namespace
|
|
||||||
// On recent zim (>=7), it returns:
|
|
||||||
// - the number of entry in `C` namespace (==getEntryCount) if no frontArticleIndex is present
|
|
||||||
// - the number of front article if a frontArticleIndex is present
|
|
||||||
// The use case >=7 without frontArticleIndex is pretty rare so we don't care
|
|
||||||
// We can detect if we are reading a zim <= 6 by checking if we have a newNamespaceScheme.
|
|
||||||
if (archive.hasNewNamespaceScheme()) {
|
|
||||||
//The articleCount is "good"
|
|
||||||
return archive.getArticleCount();
|
|
||||||
} else {
|
|
||||||
// We have to parse the `M/Counter` metadata
|
|
||||||
unsigned int counter = 0;
|
|
||||||
for(const auto& pair:parseArchiveCounter(archive)) {
|
|
||||||
if (startsWith(pair.first, "text/html")) {
|
|
||||||
counter += pair.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return counter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int getArchiveFileSize(const zim::Archive& archive) {
|
unsigned int getArchiveFileSize(const zim::Archive& archive) {
|
||||||
return archive.getFilesize() / 1024;
|
return archive.getFilesize() / 1024;
|
||||||
}
|
}
|
||||||
|
@ -165,14 +125,4 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path
|
||||||
}
|
}
|
||||||
throw zim::EntryNotFound("Cannot find entry for non empty path");
|
throw zim::EntryNotFound("Cannot find entry for non empty path");
|
||||||
}
|
}
|
||||||
|
|
||||||
MimeCounterType parseArchiveCounter(const zim::Archive& archive) {
|
|
||||||
try {
|
|
||||||
auto counterContent = archive.getMetadata("Counter");
|
|
||||||
return parseMimetypeCounter(counterContent);
|
|
||||||
} catch (zim::EntryNotFound& e) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // kiwix
|
} // kiwix
|
||||||
|
|
|
@ -51,9 +51,6 @@ namespace kiwix
|
||||||
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
|
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
|
||||||
|
|
||||||
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
|
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
|
||||||
|
|
||||||
MimeCounterType parseArchiveCounter(const zim::Archive& archive);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -288,67 +288,6 @@ bool kiwix::convertStrToBool(const std::string& value)
|
||||||
throw std::domain_error(ss.str());
|
throw std::domain_error(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
// The counter metadata format is a list of item separated by a `;` :
|
|
||||||
// item0;item1;item2
|
|
||||||
// Each item is a "tuple" mimetype=number.
|
|
||||||
// However, the mimetype may contains parameters:
|
|
||||||
// text/html;raw=true;foo=bar
|
|
||||||
// So the final format may be complex to parse:
|
|
||||||
// key0=value0;key1;foo=bar=value1;key2=value2
|
|
||||||
|
|
||||||
typedef kiwix::MimeCounterType::value_type MimetypeAndCounter;
|
|
||||||
|
|
||||||
std::string readFullMimetypeAndCounterString(std::istream& in)
|
|
||||||
{
|
|
||||||
std::string mtcStr, params;
|
|
||||||
getline(in, mtcStr, ';');
|
|
||||||
if ( mtcStr.find('=') == std::string::npos )
|
|
||||||
{
|
|
||||||
do
|
|
||||||
{
|
|
||||||
if ( !getline(in, params, ';' ) )
|
|
||||||
return std::string();
|
|
||||||
mtcStr += ";" + params;
|
|
||||||
}
|
|
||||||
while ( std::count(params.begin(), params.end(), '=') != 2 );
|
|
||||||
}
|
|
||||||
return mtcStr;
|
|
||||||
}
|
|
||||||
|
|
||||||
MimetypeAndCounter parseASingleMimetypeCounter(const std::string& s)
|
|
||||||
{
|
|
||||||
const std::string::size_type k = s.find_last_of("=");
|
|
||||||
if ( k != std::string::npos )
|
|
||||||
{
|
|
||||||
const std::string mimeType = s.substr(0, k);
|
|
||||||
std::istringstream counterSS(s.substr(k+1));
|
|
||||||
unsigned int counter;
|
|
||||||
if (counterSS >> counter && counterSS.eof())
|
|
||||||
return MimetypeAndCounter{mimeType, counter};
|
|
||||||
}
|
|
||||||
return MimetypeAndCounter{"", 0};
|
|
||||||
}
|
|
||||||
|
|
||||||
} // unnamed namespace
|
|
||||||
|
|
||||||
kiwix::MimeCounterType kiwix::parseMimetypeCounter(const std::string& counterData)
|
|
||||||
{
|
|
||||||
kiwix::MimeCounterType counters;
|
|
||||||
std::istringstream ss(counterData);
|
|
||||||
|
|
||||||
while (ss)
|
|
||||||
{
|
|
||||||
const std::string mtcStr = readFullMimetypeAndCounterString(ss);
|
|
||||||
const MimetypeAndCounter mtc = parseASingleMimetypeCounter(mtcStr);
|
|
||||||
if ( !mtc.first.empty() )
|
|
||||||
counters.insert(mtc);
|
|
||||||
}
|
|
||||||
|
|
||||||
return counters;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string kiwix::gen_date_str()
|
std::string kiwix::gen_date_str()
|
||||||
{
|
{
|
||||||
auto now = std::time(0);
|
auto now = std::time(0);
|
||||||
|
|
|
@ -45,9 +45,6 @@ namespace kiwix
|
||||||
const std::string& tagName);
|
const std::string& tagName);
|
||||||
bool convertStrToBool(const std::string& value);
|
bool convertStrToBool(const std::string& value);
|
||||||
|
|
||||||
using MimeCounterType = std::map<const std::string, zim::entry_index_type>;
|
|
||||||
MimeCounterType parseMimetypeCounter(const std::string& counterData);
|
|
||||||
|
|
||||||
std::string gen_date_str();
|
std::string gen_date_str();
|
||||||
std::string gen_uuid(const std::string& s);
|
std::string gen_uuid(const std::string& s);
|
||||||
|
|
||||||
|
|
|
@ -1,143 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2019 Matthieu Gautier
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or
|
|
||||||
* modify it under the terms of the GNU General Public License as
|
|
||||||
* published by the Free Software Foundation; either version 2 of the
|
|
||||||
* License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful, but
|
|
||||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
|
||||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
|
||||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
#include <map>
|
|
||||||
#include <zim/zim.h>
|
|
||||||
|
|
||||||
namespace kiwix {
|
|
||||||
using CounterType = std::map<const std::string, zim::entry_index_type>;
|
|
||||||
CounterType parseMimetypeCounter(const std::string& counterData);
|
|
||||||
};
|
|
||||||
|
|
||||||
using namespace kiwix;
|
|
||||||
#define parse parseMimetypeCounter
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
TEST(ParseCounterTest, simpleMimeType)
|
|
||||||
{
|
|
||||||
{
|
|
||||||
std::string counterStr = "";
|
|
||||||
CounterType counterMap = {};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "foo=1";
|
|
||||||
CounterType counterMap = {{"foo", 1}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "foo=1;text/html=50;";
|
|
||||||
CounterType counterMap = {{"foo", 1}, {"text/html", 50}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(ParseCounterTest, paramMimeType)
|
|
||||||
{
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html;raw=true=1";
|
|
||||||
CounterType counterMap = {{"text/html;raw=true", 1}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "foo=1;text/html;raw=true=50;bar=2";
|
|
||||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "foo=1;text/html;raw=true;param=value=50;bar=2";
|
|
||||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true;param=value", 50}, {"bar", 2}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "foo=1;text/html;raw=true=50;bar=2";
|
|
||||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "application/javascript=8;text/html=3;application/warc-headers=28364;text/html;raw=true=6336;text/css=47;text/javascript=98;image/png=968;image/webp=24;application/json=3694;image/gif=10274;image/jpeg=1582;font/woff2=25;text/plain=284;application/atom+xml=247;application/x-www-form-urlencoded=9;video/mp4=9;application/x-javascript=7;application/xml=1;image/svg+xml=5";
|
|
||||||
CounterType counterMap = {
|
|
||||||
{"application/javascript", 8},
|
|
||||||
{"text/html", 3},
|
|
||||||
{"application/warc-headers", 28364},
|
|
||||||
{"text/html;raw=true", 6336},
|
|
||||||
{"text/css", 47},
|
|
||||||
{"text/javascript", 98},
|
|
||||||
{"image/png", 968},
|
|
||||||
{"image/webp", 24},
|
|
||||||
{"application/json", 3694},
|
|
||||||
{"image/gif", 10274},
|
|
||||||
{"image/jpeg", 1582},
|
|
||||||
{"font/woff2", 25},
|
|
||||||
{"text/plain", 284},
|
|
||||||
{"application/atom+xml", 247},
|
|
||||||
{"application/x-www-form-urlencoded", 9},
|
|
||||||
{"video/mp4", 9},
|
|
||||||
{"application/x-javascript", 7},
|
|
||||||
{"application/xml", 1},
|
|
||||||
{"image/svg+xml", 5}
|
|
||||||
};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(ParseCounterTest, wrongType)
|
|
||||||
{
|
|
||||||
CounterType empty = {};
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html";
|
|
||||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html=";
|
|
||||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html=foo";
|
|
||||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html=123foo";
|
|
||||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html=50;foo";
|
|
||||||
CounterType counterMap = {{"text/html", 50}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html;foo=20";
|
|
||||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html;foo=20;";
|
|
||||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
std::string counterStr = "text/html=50;;foo";
|
|
||||||
CounterType counterMap = {{"text/html", 50}};
|
|
||||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
|
@ -2,7 +2,6 @@ tests = [
|
||||||
'library',
|
'library',
|
||||||
'regex',
|
'regex',
|
||||||
'tagParsing',
|
'tagParsing',
|
||||||
'counterParsing',
|
|
||||||
'stringTools',
|
'stringTools',
|
||||||
'pathTools',
|
'pathTools',
|
||||||
'kiwixserve',
|
'kiwixserve',
|
||||||
|
|
Loading…
Reference in New Issue