mirror of https://github.com/kiwix/libkiwix.git
commit
15d5b4ed58
|
@ -1,2 +1,3 @@
|
|||
.idea/
|
||||
*.swp
|
||||
subprojects/googletest-release*
|
||||
|
|
135
include/reader.h
135
include/reader.h
|
@ -158,31 +158,7 @@ class Reader
|
|||
* @param[out] value The value will be set to the content of the metadata.
|
||||
* @return True if it was possible to get the content of the metadata.
|
||||
*/
|
||||
bool getMetatag(const string& name, string& value) const;
|
||||
|
||||
/**
|
||||
* Get the title of the zim file.
|
||||
*
|
||||
* @return The title of zim file as specified in the zim metadata.
|
||||
* If no title has been set, return a title computed from the
|
||||
* file path.
|
||||
*/
|
||||
string getTitle() const;
|
||||
|
||||
/**
|
||||
* Get the description of the zim file.
|
||||
*
|
||||
* @return The description of the zim file as specified in the zim metadata.
|
||||
* If no description has been set, return the subtitle.
|
||||
*/
|
||||
string getDescription() const;
|
||||
|
||||
/**
|
||||
* Get the language of the zim file.
|
||||
*
|
||||
* @return The language of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getLanguage() const;
|
||||
bool getMetadata(const string& name, string& value) const;
|
||||
|
||||
/**
|
||||
* Get the name of the zim file.
|
||||
|
@ -192,18 +168,13 @@ class Reader
|
|||
string getName() const;
|
||||
|
||||
/**
|
||||
* Get the tags of the zim file.
|
||||
* Get the title of the zim file.
|
||||
*
|
||||
* @return The tags of the zim file as specified in the zim metadata.
|
||||
* @return The title of zim file as specified in the zim metadata.
|
||||
* If no title has been set, return a title computed from the
|
||||
* file path.
|
||||
*/
|
||||
string getTags() const;
|
||||
|
||||
/**
|
||||
* Get the date of the zim file.
|
||||
*
|
||||
* @return The date of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getDate() const;
|
||||
string getTitle() const;
|
||||
|
||||
/**
|
||||
* Get the creator of the zim file.
|
||||
|
@ -219,6 +190,100 @@ class Reader
|
|||
*/
|
||||
string getPublisher() const;
|
||||
|
||||
/**
|
||||
* Get the date of the zim file.
|
||||
*
|
||||
* @return The date of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getDate() const;
|
||||
|
||||
/**
|
||||
* Get the description of the zim file.
|
||||
*
|
||||
* @return The description of the zim file as specified in the zim metadata.
|
||||
* If no description has been set, return the subtitle.
|
||||
*/
|
||||
string getDescription() const;
|
||||
|
||||
/**
|
||||
* Get the long description of the zim file.
|
||||
*
|
||||
* @return The long description of the zim file as specifed in the zim metadata.
|
||||
*/
|
||||
string getLongDescription() const;
|
||||
|
||||
/**
|
||||
* Get the language of the zim file.
|
||||
*
|
||||
* @return The language of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getLanguage() const;
|
||||
|
||||
/**
|
||||
* Get the license of the zim file.
|
||||
*
|
||||
* @return The license of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getLicense() const;
|
||||
|
||||
/**
|
||||
* Get the tags of the zim file.
|
||||
*
|
||||
* @param original If true, return the original tags as specified in the zim metadata.
|
||||
* Else, try to convert it to the new 'normalized' format.
|
||||
* @return The tags of the zim file.
|
||||
*/
|
||||
string getTags(bool original=false) const;
|
||||
|
||||
/**
|
||||
* Get the value (as a string) of a specific tag.
|
||||
*
|
||||
* According to https://wiki.openzim.org/wiki/Tags
|
||||
*
|
||||
* @return The value of the specified tag.
|
||||
* @throw std::out_of_range if the specified tag is not found.
|
||||
*/
|
||||
string getTagStr(const std::string& tagName) const;
|
||||
|
||||
/**
|
||||
* Get the boolean value of a specific tag.
|
||||
*
|
||||
* According to https://wiki.openzim.org/wiki/Tags
|
||||
*
|
||||
* @return The boolean value of the specified tag.
|
||||
* @throw std::out_of_range if the specified tag is not found.
|
||||
* std::domain_error if the value of the tag cannot be convert to bool.
|
||||
*/
|
||||
bool getTagBool(const std::string& tagName) const;
|
||||
|
||||
/**
|
||||
* Get the relations of the zim file.
|
||||
*
|
||||
* @return The relation of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getRelation() const;
|
||||
|
||||
/**
|
||||
* Get the flavour of the zim file.
|
||||
*
|
||||
* @return The flavour of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getFlavour() const;
|
||||
|
||||
/**
|
||||
* Get the source of the zim file.
|
||||
*
|
||||
* @return The source of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getSource() const;
|
||||
|
||||
/**
|
||||
* Get the scraper of the zim file.
|
||||
*
|
||||
* @return The scraper of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getScraper() const;
|
||||
|
||||
/**
|
||||
* Get the origId of the zim file.
|
||||
*
|
||||
|
|
|
@ -47,6 +47,7 @@ std::vector<std::string> split(const std::string&, const std::string&);
|
|||
std::vector<std::string> split(const char*, const char*);
|
||||
std::vector<std::string> split(const std::string&, const char*);
|
||||
std::vector<std::string> split(const char*, const std::string&);
|
||||
std::string join(const std::vector<std::string>& list, const std::string& sep);
|
||||
|
||||
std::string ucAll(const std::string& word);
|
||||
std::string lcAll(const std::string& word);
|
||||
|
|
162
src/reader.cpp
162
src/reader.cpp
|
@ -278,7 +278,7 @@ string Reader::getZimFilePath() const
|
|||
return this->zimFilePath;
|
||||
}
|
||||
/* Return a metatag value */
|
||||
bool Reader::getMetatag(const string& name, string& value) const
|
||||
bool Reader::getMetadata(const string& name, string& value) const
|
||||
{
|
||||
try {
|
||||
auto entry = getEntryFromPath("M/"+name);
|
||||
|
@ -289,10 +289,17 @@ bool Reader::getMetatag(const string& name, string& value) const
|
|||
}
|
||||
}
|
||||
|
||||
#define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
|
||||
|
||||
string Reader::getName() const
|
||||
{
|
||||
METADATA("Name")
|
||||
}
|
||||
|
||||
string Reader::getTitle() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Title", value);
|
||||
this->getMetadata("Title", value);
|
||||
if (value.empty()) {
|
||||
value = getLastPathElement(zimFileHandler->getFilename());
|
||||
std::replace(value.begin(), value.end(), '_', ' ');
|
||||
|
@ -302,65 +309,164 @@ string Reader::getTitle() const
|
|||
return value;
|
||||
}
|
||||
|
||||
string Reader::getName() const
|
||||
string Reader::getCreator() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Name", value);
|
||||
return value;
|
||||
METADATA("Creator")
|
||||
}
|
||||
|
||||
string Reader::getTags() const
|
||||
string Reader::getPublisher() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Tags", value);
|
||||
return value;
|
||||
METADATA("Publisher")
|
||||
}
|
||||
|
||||
string Reader::getDate() const
|
||||
{
|
||||
METADATA("Date")
|
||||
}
|
||||
|
||||
string Reader::getDescription() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Description", value);
|
||||
this->getMetadata("Description", value);
|
||||
|
||||
/* Mediawiki Collection tends to use the "Subtitle" name */
|
||||
if (value.empty()) {
|
||||
this->getMetatag("Subtitle", value);
|
||||
this->getMetadata("Subtitle", value);
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getLongDescription() const
|
||||
{
|
||||
METADATA("LongDescription")
|
||||
}
|
||||
|
||||
string Reader::getLanguage() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Language", value);
|
||||
return value;
|
||||
METADATA("Language")
|
||||
}
|
||||
|
||||
string Reader::getDate() const
|
||||
string Reader::getLicense() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Date", value);
|
||||
return value;
|
||||
METADATA("License")
|
||||
}
|
||||
|
||||
string Reader::getCreator() const
|
||||
std::vector<std::string> convertTags(const std::string& tags_str)
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Creator", value);
|
||||
return value;
|
||||
auto tags = split(tags_str, ";");
|
||||
std::vector<std::string> tagsList;
|
||||
bool picSeen(false), vidSeen(false), detSeen(false), indexSeen(false);
|
||||
for (auto tag: tags) {
|
||||
picSeen |= (tag == "nopic" || startsWith(tag, "_pictures:"));
|
||||
vidSeen |= (tag == "novid" || startsWith(tag, "_videos:"));
|
||||
detSeen |= (tag == "nodet" || startsWith(tag, "_details:"));
|
||||
indexSeen |= startsWith(tag, "_ftindex");
|
||||
if (tag == "nopic") {
|
||||
tagsList.push_back("_pictures:no");
|
||||
} else if (tag == "novid") {
|
||||
tagsList.push_back("_videos:no");
|
||||
} else if (tag == "nodet") {
|
||||
tagsList.push_back("_details:no");
|
||||
} else if (tag == "_ftindex") {
|
||||
tagsList.push_back("_ftindex:yes");
|
||||
} else {
|
||||
tagsList.push_back(tag);
|
||||
}
|
||||
}
|
||||
if (!indexSeen) {
|
||||
tagsList.push_back("_ftindex:no");
|
||||
}
|
||||
if (!picSeen) {
|
||||
tagsList.push_back("_pictures:yes");
|
||||
}
|
||||
if (!vidSeen) {
|
||||
tagsList.push_back("_videos:yes");
|
||||
}
|
||||
if (!detSeen) {
|
||||
tagsList.push_back("_details:yes");
|
||||
}
|
||||
return tagsList;
|
||||
}
|
||||
|
||||
string Reader::getPublisher() const
|
||||
string Reader::getTags(bool original) const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("Publisher", value);
|
||||
return value;
|
||||
string tags_str;
|
||||
getMetadata("Tags", tags_str);
|
||||
if (original) {
|
||||
return tags_str;
|
||||
}
|
||||
auto tags = convertTags(tags_str);
|
||||
return join(tags, ";");
|
||||
}
|
||||
|
||||
string getTagValueFromTagList(const std::vector<std::string>& tagList, const std::string& tagName)
|
||||
{
|
||||
for (auto tag: tagList) {
|
||||
if (tag[0] == '_') {
|
||||
auto delimPos = tag.find(':');
|
||||
if (delimPos == string::npos) {
|
||||
// No delimiter... what to do ?
|
||||
continue;
|
||||
}
|
||||
auto cTagName = tag.substr(1, delimPos-1);
|
||||
auto cTagValue = tag.substr(delimPos+1);
|
||||
if (cTagName == tagName) {
|
||||
return cTagValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::stringstream ss;
|
||||
ss << tagName << " cannot be found";
|
||||
throw std::out_of_range(ss.str());
|
||||
}
|
||||
|
||||
string Reader::getTagStr(const std::string& tagName) const
|
||||
{
|
||||
string tags_str;
|
||||
getMetadata("Tags", tags_str);
|
||||
return getTagValueFromTagList(convertTags(tags_str), tagName);
|
||||
}
|
||||
|
||||
bool Reader::getTagBool(const std::string& tagName) const
|
||||
{
|
||||
auto tagValue = getTagStr(tagName);
|
||||
if (tagValue == "yes") {
|
||||
return true;
|
||||
} else if (tagValue == "no") {
|
||||
return false;
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << "Tag value '" << tagValue << "' for " << tagName << " cannot be converted to bool.";
|
||||
throw std::domain_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
string Reader::getRelation() const
|
||||
{
|
||||
METADATA("Relation")
|
||||
}
|
||||
|
||||
string Reader::getFlavour() const
|
||||
{
|
||||
METADATA("Flavour")
|
||||
}
|
||||
|
||||
string Reader::getSource() const
|
||||
{
|
||||
METADATA("Source")
|
||||
}
|
||||
|
||||
string Reader::getScraper() const
|
||||
{
|
||||
METADATA("Scraper")
|
||||
}
|
||||
#undef METADATA
|
||||
|
||||
string Reader::getOrigId() const
|
||||
{
|
||||
string value;
|
||||
this->getMetatag("startfileuid", value);
|
||||
this->getMetadata("startfileuid", value);
|
||||
if (value.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
|
|
@ -298,6 +298,21 @@ std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
|
|||
return split(lhs.c_str(), rhs);
|
||||
}
|
||||
|
||||
std::string kiwix::join(const std::vector<std::string>& list, const std::string& sep)
|
||||
{
|
||||
std::stringstream ss;
|
||||
bool first = true;
|
||||
for (auto& s:list) {
|
||||
if (first) {
|
||||
ss << sep;
|
||||
first = false;
|
||||
}
|
||||
ss << s;
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
std::string kiwix::ucFirst(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
tests = [
|
||||
'parseUrl',
|
||||
'library',
|
||||
'regex'
|
||||
'regex',
|
||||
'tagParsing'
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright (C) 2019 Matthieu Gautier
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace kiwix {
|
||||
std::vector<std::string> convertTags(const std::string& tags);
|
||||
std::string getTagValueFromTagList(const std::vector<std::string>& tagList, const std::string& tagName);
|
||||
};
|
||||
|
||||
using namespace kiwix;
|
||||
#define parse_tag getTagValueFromTagList
|
||||
|
||||
namespace
|
||||
{
|
||||
TEST(ParseTagTest, convert)
|
||||
{
|
||||
{
|
||||
std::string tagStr = "";
|
||||
std::vector<std::string> tagList = {"_ftindex:no", "_pictures:yes", "_videos:yes", "_details:yes"};
|
||||
ASSERT_EQ(convertTags(tagStr), tagList);
|
||||
}
|
||||
{
|
||||
std::string tagStr = "_category:foo;bar";
|
||||
std::vector<std::string> tagList = {"_category:foo", "bar", "_ftindex:no", "_pictures:yes", "_videos:yes", "_details:yes"};
|
||||
ASSERT_EQ(convertTags(tagStr), tagList);
|
||||
}
|
||||
{
|
||||
std::string tagStr = "_ftindex:no;_pictures:yes;_videos:yes;_details:yes;_category:foo;bar";
|
||||
std::vector<std::string> tagList = {"_ftindex:no", "_pictures:yes", "_videos:yes", "_details:yes", "_category:foo", "bar"};
|
||||
ASSERT_EQ(convertTags(tagStr), tagList);
|
||||
}
|
||||
{
|
||||
std::string tagStr = "_ftindex:yes;_pictures:no;_videos:no;_details:no;_category:foo;bar";
|
||||
std::vector<std::string> tagList = {"_ftindex:yes", "_pictures:no", "_videos:no", "_details:no", "_category:foo", "bar"};
|
||||
ASSERT_EQ(convertTags(tagStr), tagList);
|
||||
}
|
||||
{
|
||||
std::string tagStr = "_ftindex;nopic;novid;nodet;foo;bar";
|
||||
std::vector<std::string> tagList = {"_ftindex:yes", "_pictures:no", "_videos:no", "_details:no", "foo", "bar"};
|
||||
ASSERT_EQ(convertTags(tagStr), tagList);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ParseTagTest, valid)
|
||||
{
|
||||
std::string tagStr = "_ftindex:yes;_pictures:no;_videos:no;_details:yes;_category:foo;bar";
|
||||
auto tagList = convertTags(tagStr);
|
||||
|
||||
ASSERT_EQ(parse_tag(tagList, "ftindex"), "yes");
|
||||
ASSERT_EQ(parse_tag(tagList, "pictures"), "no");
|
||||
ASSERT_EQ(parse_tag(tagList, "category"), "foo");
|
||||
ASSERT_EQ(parse_tag(tagList, "details"), "yes");
|
||||
ASSERT_THROW(parse_tag(tagList, "detail"), std::out_of_range);
|
||||
}
|
||||
|
||||
TEST(ParseTagTest, compat)
|
||||
{
|
||||
std::string tagStr = "_ftindex;nopic;foo;bar";
|
||||
auto tagList = convertTags(tagStr);
|
||||
|
||||
ASSERT_EQ(parse_tag(tagList, "ftindex"), "yes");
|
||||
ASSERT_EQ(parse_tag(tagList, "pictures"), "no");
|
||||
ASSERT_EQ(parse_tag(tagList, "videos"), "yes");
|
||||
ASSERT_EQ(parse_tag(tagList, "details"), "yes");
|
||||
}
|
||||
|
||||
TEST(ParseTagTest, invalid)
|
||||
{
|
||||
std::string tagStr = "_ftindex:y;_pictures;_videos:;_details:yes;_details:no;_category:foo;bar";
|
||||
auto tagList = convertTags(tagStr);
|
||||
|
||||
ASSERT_EQ(parse_tag(tagList, "ftindex"), "y");
|
||||
ASSERT_EQ(parse_tag(tagList, "pictures"), "yes");
|
||||
ASSERT_EQ(parse_tag(tagList, "videos"), "");
|
||||
ASSERT_EQ(parse_tag(tagList, "details"), "yes");
|
||||
}
|
||||
|
||||
};
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
Loading…
Reference in New Issue