mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #836 from kiwix/media_count_libzim
Use new `zim::Archive::getMediaCount` from libzim.
This commit is contained in:
commit
ec31882e94
|
@ -77,8 +77,8 @@ void Book::update(const zim::Archive& archive) {
|
|||
m_flavour = getMetaFlavour(archive);
|
||||
m_tags = getMetaTags(archive);
|
||||
m_category = getCategoryFromTags();
|
||||
m_articleCount = getArchiveArticleCount(archive);
|
||||
m_mediaCount = getArchiveMediaCount(archive);
|
||||
m_articleCount = archive.getArticleCount();
|
||||
m_mediaCount = archive.getMediaCount();
|
||||
m_size = static_cast<uint64_t>(getArchiveFileSize(archive)) << 10;
|
||||
|
||||
m_illustrations.clear();
|
||||
|
|
|
@ -105,46 +105,6 @@ bool getArchiveFavicon(const zim::Archive& archive, unsigned size,
|
|||
return false;
|
||||
}
|
||||
|
||||
// should this be in libzim
|
||||
unsigned int getArchiveMediaCount(const zim::Archive& archive) {
|
||||
std::map<const std::string, unsigned int> counterMap = parseArchiveCounter(archive);
|
||||
unsigned int counter = 0;
|
||||
|
||||
for (auto &pair:counterMap) {
|
||||
if (startsWith(pair.first, "image/") ||
|
||||
startsWith(pair.first, "video/") ||
|
||||
startsWith(pair.first, "audio/")) {
|
||||
counter += pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
unsigned int getArchiveArticleCount(const zim::Archive& archive) {
|
||||
// [HACK]
|
||||
// getArticleCount() returns different things depending of the "version" of the zim.
|
||||
// On old zim (<=6), it returns the number of entry in `A` namespace
|
||||
// On recent zim (>=7), it returns:
|
||||
// - the number of entry in `C` namespace (==getEntryCount) if no frontArticleIndex is present
|
||||
// - the number of front article if a frontArticleIndex is present
|
||||
// The use case >=7 without frontArticleIndex is pretty rare so we don't care
|
||||
// We can detect if we are reading a zim <= 6 by checking if we have a newNamespaceScheme.
|
||||
if (archive.hasNewNamespaceScheme()) {
|
||||
//The articleCount is "good"
|
||||
return archive.getArticleCount();
|
||||
} else {
|
||||
// We have to parse the `M/Counter` metadata
|
||||
unsigned int counter = 0;
|
||||
for(const auto& pair:parseArchiveCounter(archive)) {
|
||||
if (startsWith(pair.first, "text/html")) {
|
||||
counter += pair.second;
|
||||
}
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int getArchiveFileSize(const zim::Archive& archive) {
|
||||
return archive.getFilesize() / 1024;
|
||||
}
|
||||
|
@ -165,14 +125,4 @@ zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path
|
|||
}
|
||||
throw zim::EntryNotFound("Cannot find entry for non empty path");
|
||||
}
|
||||
|
||||
MimeCounterType parseArchiveCounter(const zim::Archive& archive) {
|
||||
try {
|
||||
auto counterContent = archive.getMetadata("Counter");
|
||||
return parseMimetypeCounter(counterContent);
|
||||
} catch (zim::EntryNotFound& e) {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
} // kiwix
|
||||
|
|
|
@ -51,9 +51,6 @@ namespace kiwix
|
|||
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
|
||||
|
||||
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
|
||||
|
||||
MimeCounterType parseArchiveCounter(const zim::Archive& archive);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -288,67 +288,6 @@ bool kiwix::convertStrToBool(const std::string& value)
|
|||
throw std::domain_error(ss.str());
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
// The counter metadata format is a list of item separated by a `;` :
|
||||
// item0;item1;item2
|
||||
// Each item is a "tuple" mimetype=number.
|
||||
// However, the mimetype may contains parameters:
|
||||
// text/html;raw=true;foo=bar
|
||||
// So the final format may be complex to parse:
|
||||
// key0=value0;key1;foo=bar=value1;key2=value2
|
||||
|
||||
typedef kiwix::MimeCounterType::value_type MimetypeAndCounter;
|
||||
|
||||
std::string readFullMimetypeAndCounterString(std::istream& in)
|
||||
{
|
||||
std::string mtcStr, params;
|
||||
getline(in, mtcStr, ';');
|
||||
if ( mtcStr.find('=') == std::string::npos )
|
||||
{
|
||||
do
|
||||
{
|
||||
if ( !getline(in, params, ';' ) )
|
||||
return std::string();
|
||||
mtcStr += ";" + params;
|
||||
}
|
||||
while ( std::count(params.begin(), params.end(), '=') != 2 );
|
||||
}
|
||||
return mtcStr;
|
||||
}
|
||||
|
||||
MimetypeAndCounter parseASingleMimetypeCounter(const std::string& s)
|
||||
{
|
||||
const std::string::size_type k = s.find_last_of("=");
|
||||
if ( k != std::string::npos )
|
||||
{
|
||||
const std::string mimeType = s.substr(0, k);
|
||||
std::istringstream counterSS(s.substr(k+1));
|
||||
unsigned int counter;
|
||||
if (counterSS >> counter && counterSS.eof())
|
||||
return MimetypeAndCounter{mimeType, counter};
|
||||
}
|
||||
return MimetypeAndCounter{"", 0};
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
kiwix::MimeCounterType kiwix::parseMimetypeCounter(const std::string& counterData)
|
||||
{
|
||||
kiwix::MimeCounterType counters;
|
||||
std::istringstream ss(counterData);
|
||||
|
||||
while (ss)
|
||||
{
|
||||
const std::string mtcStr = readFullMimetypeAndCounterString(ss);
|
||||
const MimetypeAndCounter mtc = parseASingleMimetypeCounter(mtcStr);
|
||||
if ( !mtc.first.empty() )
|
||||
counters.insert(mtc);
|
||||
}
|
||||
|
||||
return counters;
|
||||
}
|
||||
|
||||
std::string kiwix::gen_date_str()
|
||||
{
|
||||
auto now = std::time(0);
|
||||
|
|
|
@ -45,9 +45,6 @@ namespace kiwix
|
|||
const std::string& tagName);
|
||||
bool convertStrToBool(const std::string& value);
|
||||
|
||||
using MimeCounterType = std::map<const std::string, zim::entry_index_type>;
|
||||
MimeCounterType parseMimetypeCounter(const std::string& counterData);
|
||||
|
||||
std::string gen_date_str();
|
||||
std::string gen_uuid(const std::string& s);
|
||||
|
||||
|
|
|
@ -1,143 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Matthieu Gautier
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <zim/zim.h>
|
||||
|
||||
namespace kiwix {
|
||||
using CounterType = std::map<const std::string, zim::entry_index_type>;
|
||||
CounterType parseMimetypeCounter(const std::string& counterData);
|
||||
};
|
||||
|
||||
using namespace kiwix;
|
||||
#define parse parseMimetypeCounter
|
||||
|
||||
namespace
|
||||
{
|
||||
TEST(ParseCounterTest, simpleMimeType)
|
||||
{
|
||||
{
|
||||
std::string counterStr = "";
|
||||
CounterType counterMap = {};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1";
|
||||
CounterType counterMap = {{"foo", 1}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html=50;";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html", 50}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ParseCounterTest, paramMimeType)
|
||||
{
|
||||
{
|
||||
std::string counterStr = "text/html;raw=true=1";
|
||||
CounterType counterMap = {{"text/html;raw=true", 1}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html;raw=true=50;bar=2";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html;raw=true;param=value=50;bar=2";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true;param=value", 50}, {"bar", 2}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html;raw=true=50;bar=2";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "application/javascript=8;text/html=3;application/warc-headers=28364;text/html;raw=true=6336;text/css=47;text/javascript=98;image/png=968;image/webp=24;application/json=3694;image/gif=10274;image/jpeg=1582;font/woff2=25;text/plain=284;application/atom+xml=247;application/x-www-form-urlencoded=9;video/mp4=9;application/x-javascript=7;application/xml=1;image/svg+xml=5";
|
||||
CounterType counterMap = {
|
||||
{"application/javascript", 8},
|
||||
{"text/html", 3},
|
||||
{"application/warc-headers", 28364},
|
||||
{"text/html;raw=true", 6336},
|
||||
{"text/css", 47},
|
||||
{"text/javascript", 98},
|
||||
{"image/png", 968},
|
||||
{"image/webp", 24},
|
||||
{"application/json", 3694},
|
||||
{"image/gif", 10274},
|
||||
{"image/jpeg", 1582},
|
||||
{"font/woff2", 25},
|
||||
{"text/plain", 284},
|
||||
{"application/atom+xml", 247},
|
||||
{"application/x-www-form-urlencoded", 9},
|
||||
{"video/mp4", 9},
|
||||
{"application/x-javascript", 7},
|
||||
{"application/xml", 1},
|
||||
{"image/svg+xml", 5}
|
||||
};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ParseCounterTest, wrongType)
|
||||
{
|
||||
CounterType empty = {};
|
||||
{
|
||||
std::string counterStr = "text/html";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html=";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html=foo";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html=123foo";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html=50;foo";
|
||||
CounterType counterMap = {{"text/html", 50}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html;foo=20";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html;foo=20;";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html=50;;foo";
|
||||
CounterType counterMap = {{"text/html", 50}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
|
@ -2,7 +2,6 @@ tests = [
|
|||
'library',
|
||||
'regex',
|
||||
'tagParsing',
|
||||
'counterParsing',
|
||||
'stringTools',
|
||||
'pathTools',
|
||||
'kiwixserve',
|
||||
|
|
Loading…
Reference in New Issue