diff --git a/include/reader.h b/include/reader.h index 77d4e9793..3dcdc441d 100644 --- a/include/reader.h +++ b/include/reader.h @@ -229,9 +229,11 @@ class Reader /** * Get the tags of the zim file. * - * @return The tags of the zim file as specified in the zim metadata. + * @param original If true, return the original tags as specified in the zim metadata. + * Else, try to convert it to the new 'normalized' format. + * @return The tags of the zim file. */ - string getTags() const; + string getTags(bool original=false) const; /** * Get the relations of the zim file. diff --git a/src/reader.cpp b/src/reader.cpp index 0a02985e7..43b7a1491 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -352,9 +352,52 @@ string Reader::getLicense() const METADATA("License") } -string Reader::getTags() const +std::vector convertTags(const std::string& tags_str) { - METADATA("Tags") + auto tags = split(tags_str, ";"); + std::vector tagsList; + bool picSeen(false), vidSeen(false), detSeen(false), indexSeen(false); + for (auto tag: tags) { + picSeen |= (tag == "nopic" || startsWith(tag, "_pictures:")); + vidSeen |= (tag == "novid" || startsWith(tag, "_videos:")); + detSeen |= (tag == "nodet" || startsWith(tag, "_details:")); + indexSeen |= startsWith(tag, "_ftindex"); + if (tag == "nopic") { + tagsList.push_back("_pictures:no"); + } else if (tag == "novid") { + tagsList.push_back("_videos:no"); + } else if (tag == "nodet") { + tagsList.push_back("_details:no"); + } else if (tag == "_ftindex") { + tagsList.push_back("_ftindex:yes"); + } else { + tagsList.push_back(tag); + } + } + if (!indexSeen) { + tagsList.push_back("_ftindex:no"); + } + if (!picSeen) { + tagsList.push_back("_pictures:yes"); + } + if (!vidSeen) { + tagsList.push_back("_videos:yes"); + } + if (!detSeen) { + tagsList.push_back("_details:yes"); + } + return tagsList; +} + +string Reader::getTags(bool original) const +{ + string tags_str; + getMetadata("Tags", tags_str); + if (original) { + return tags_str; + } + auto tags = convertTags(tags_str); + return join(tags, ";"); } string Reader::getRelation() const diff --git a/test/meson.build b/test/meson.build index ad537ad3f..2777d1ea5 100644 --- a/test/meson.build +++ b/test/meson.build @@ -3,7 +3,8 @@ tests = [ 'parseUrl', 'library', - 'regex' + 'regex', + 'tagParsing' ] diff --git a/test/tagParsing.cpp b/test/tagParsing.cpp new file mode 100644 index 000000000..c58c1dbe4 --- /dev/null +++ b/test/tagParsing.cpp @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2019 Matthieu Gautier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "gtest/gtest.h" +#include +#include + +namespace kiwix { +std::vector convertTags(const std::string& tags); +std::string getTagValueFromTagList(const std::vector& tagList, const std::string& tagName); +}; + +using namespace kiwix; +#define parse_tag getTagValueFromTagList + +namespace +{ +TEST(ParseTagTest, convert) +{ + { + std::string tagStr = ""; + std::vector tagList = {"_ftindex:no", "_pictures:yes", "_videos:yes", "_details:yes"}; + ASSERT_EQ(convertTags(tagStr), tagList); + } + { + std::string tagStr = "_category:foo;bar"; + std::vector tagList = {"_category:foo", "bar", "_ftindex:no", "_pictures:yes", "_videos:yes", "_details:yes"}; + ASSERT_EQ(convertTags(tagStr), tagList); + } + { + std::string tagStr = "_ftindex:no;_pictures:yes;_videos:yes;_details:yes;_category:foo;bar"; + std::vector tagList = {"_ftindex:no", "_pictures:yes", "_videos:yes", "_details:yes", "_category:foo", "bar"}; + ASSERT_EQ(convertTags(tagStr), tagList); + } + { + std::string tagStr = "_ftindex:yes;_pictures:no;_videos:no;_details:no;_category:foo;bar"; + std::vector tagList = {"_ftindex:yes", "_pictures:no", "_videos:no", "_details:no", "_category:foo", "bar"}; + ASSERT_EQ(convertTags(tagStr), tagList); + } + { + std::string tagStr = "_ftindex;nopic;novid;nodet;foo;bar"; + std::vector tagList = {"_ftindex:yes", "_pictures:no", "_videos:no", "_details:no", "foo", "bar"}; + ASSERT_EQ(convertTags(tagStr), tagList); + } +} + +}; +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}