Merge pull request #316 from kiwix/tagging_system

Tagging system
This commit is contained in:
Matthieu Gautier 2020-01-30 18:21:28 +01:00 committed by GitHub
commit 8f990feabb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 131 additions and 71 deletions

View File

@ -60,6 +60,9 @@ class Book
const std::string& getUrl() const { return m_url; }
const std::string& getName() const { return m_name; }
const std::string& getTags() const { return m_tags; }
std::string getTagStr(const std::string& tagName) const;
bool getTagBool(const std::string& tagName) const;
const std::string& getFlavour() const { return m_flavour; }
const std::string& getOrigId() const { return m_origId; }
const uint64_t& getArticleCount() const { return m_articleCount; }
const uint64_t& getMediaCount() const { return m_mediaCount; }
@ -81,6 +84,7 @@ class Book
void setDate(const std::string& date) { m_date = date; }
void setUrl(const std::string& url) { m_url = url; }
void setName(const std::string& name) { m_name = name; }
void setFlavour(const std::string& flavour) { m_flavour = flavour; }
void setTags(const std::string& tags) { m_tags = tags; }
void setOrigId(const std::string& origId) { m_origId = origId; }
void setArticleCount(uint64_t articleCount) { m_articleCount = articleCount; }
@ -103,6 +107,7 @@ class Book
std::string m_date;
std::string m_url;
std::string m_name;
std::string m_flavour;
std::string m_tags;
std::string m_origId;
uint64_t m_articleCount;

View File

@ -21,6 +21,7 @@
#define KIWIX_OTHERTOOLS_H
#include <string>
#include <vector>
namespace pugi {
class xml_node;
@ -31,6 +32,15 @@ namespace kiwix
void sleep(unsigned int milliseconds);
std::string nodeToString(const pugi::xml_node& node);
std::string converta2toa3(const std::string& a2code);
/*
* Convert all format tag string to new format
*/
std::vector<std::string> convertTags(const std::string& tags_str);
std::string getTagValueFromTagList(const std::vector<std::string>& tagList,
const std::string& tagName);
bool convertStrToBool(const std::string& value);
}
#endif

View File

@ -23,6 +23,7 @@
#include "tools/base64.h"
#include "tools/regexTools.h"
#include "tools/networkTools.h"
#include "tools/otherTools.h"
#include <pugixml.hpp>
@ -58,6 +59,7 @@ bool Book::update(const kiwix::Book& other)
m_date = other.m_date;
m_url = other.m_url;
m_name = other.m_name;
m_flavour = other.m_flavour;
m_tags = other.m_tags;
m_origId = other.m_origId;
m_articleCount = other.m_articleCount;
@ -84,6 +86,7 @@ void Book::update(const kiwix::Reader& reader)
m_publisher = reader.getPublisher();
m_date = reader.getDate();
m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags();
m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount();
@ -111,6 +114,7 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
m_date = ATTR("date");
m_url = ATTR("url");
m_name = ATTR("name");
m_flavour = ATTR("flavour");
m_tags = ATTR("tags");
m_origId = ATTR("origId");
m_articleCount = strtoull(ATTR("articleCount"), 0, 0);
@ -141,6 +145,7 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
if (!m_id.compare(0, 9, "urn:uuid:")) {
m_id.erase(0, 9);
}
// No path on opds.
m_title = VALUE("title");
m_description = VALUE("summary");
m_language = VALUE("language");
@ -148,6 +153,7 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_publisher = node.child("publisher").child("name").child_value();
m_date = fromOpdsDate(VALUE("updated"));
m_name = VALUE("name");
m_flavour = VALUE("flavour");
m_tags = VALUE("tags");
for(auto linkNode = node.child("link"); linkNode;
linkNode = linkNode.next_sibling("link")) {
@ -203,4 +209,12 @@ const std::string& Book::getFavicon() const {
return m_favicon;
}
std::string Book::getTagStr(const std::string& tagName) const {
return getTagValueFromTagList(convertTags(m_tags), tagName);
}
bool Book::getTagBool(const std::string& tagName) const {
return convertStrToBool(getTagStr(tagName));
}
}

View File

@ -58,6 +58,7 @@ void LibXMLDumper::handleBook(Book book, pugi::xml_node root_node) {
ADD_ATTR_NOT_EMPTY(entry_node, "creator", book.getCreator());
ADD_ATTR_NOT_EMPTY(entry_node, "publisher", book.getPublisher());
ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName());
ADD_ATTR_NOT_EMPTY(entry_node, "flavour", book.getFlavour());
ADD_ATTR_NOT_EMPTY(entry_node, "tags", book.getTags());
ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", book.getFaviconMimeType());
ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", book.getFaviconUrl());

View File

@ -76,6 +76,7 @@ pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
ADD_TEXT_ENTRY(entry_node, "language", book.getLanguage());
ADD_TEXT_ENTRY(entry_node, "updated", gen_date_from_yyyy_mm_dd(book.getDate()));
ADD_TEXT_ENTRY(entry_node, "name", book.getName());
ADD_TEXT_ENTRY(entry_node, "flavour", book.getFlavour());
ADD_TEXT_ENTRY(entry_node, "tags", book.getTags());
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());

View File

@ -22,6 +22,8 @@
#include <zim/search.h>
#include "tools/otherTools.h"
inline char hi(char v)
{
char hex[] = "0123456789abcdef";
@ -352,43 +354,6 @@ string Reader::getLicense() const
METADATA("License")
}
std::vector<std::string> convertTags(const std::string& tags_str)
{
auto tags = split(tags_str, ";");
std::vector<std::string> tagsList;
bool picSeen(false), vidSeen(false), detSeen(false), indexSeen(false);
for (auto tag: tags) {
picSeen |= (tag == "nopic" || startsWith(tag, "_pictures:"));
vidSeen |= (tag == "novid" || startsWith(tag, "_videos:"));
detSeen |= (tag == "nodet" || startsWith(tag, "_details:"));
indexSeen |= startsWith(tag, "_ftindex");
if (tag == "nopic") {
tagsList.push_back("_pictures:no");
} else if (tag == "novid") {
tagsList.push_back("_videos:no");
} else if (tag == "nodet") {
tagsList.push_back("_details:no");
} else if (tag == "_ftindex") {
tagsList.push_back("_ftindex:yes");
} else {
tagsList.push_back(tag);
}
}
if (!indexSeen) {
tagsList.push_back("_ftindex:no");
}
if (!picSeen) {
tagsList.push_back("_pictures:yes");
}
if (!vidSeen) {
tagsList.push_back("_videos:yes");
}
if (!detSeen) {
tagsList.push_back("_details:yes");
}
return tagsList;
}
string Reader::getTags(bool original) const
{
string tags_str;
@ -400,26 +365,6 @@ string Reader::getTags(bool original) const
return join(tags, ";");
}
string getTagValueFromTagList(const std::vector<std::string>& tagList, const std::string& tagName)
{
for (auto tag: tagList) {
if (tag[0] == '_') {
auto delimPos = tag.find(':');
if (delimPos == string::npos) {
// No delimiter... what to do ?
continue;
}
auto cTagName = tag.substr(1, delimPos-1);
auto cTagValue = tag.substr(delimPos+1);
if (cTagName == tagName) {
return cTagValue;
}
}
}
std::stringstream ss;
ss << tagName << " cannot be found";
throw std::out_of_range(ss.str());
}
string Reader::getTagStr(const std::string& tagName) const
{
@ -430,16 +375,7 @@ string Reader::getTagStr(const std::string& tagName) const
bool Reader::getTagBool(const std::string& tagName) const
{
auto tagValue = getTagStr(tagName);
if (tagValue == "yes") {
return true;
} else if (tagValue == "no") {
return false;
} else {
std::stringstream ss;
ss << "Tag value '" << tagValue << "' for " << tagName << " cannot be converted to bool.";
throw std::domain_error(ss.str());
}
return convertStrToBool(getTagStr(tagName));
}
string Reader::getRelation() const

View File

@ -25,7 +25,10 @@
#include <unistd.h>
#endif
#include "tools/stringTools.h"
#include <map>
#include <sstream>
#include <pugixml.hpp>
@ -204,3 +207,76 @@ std::string kiwix::nodeToString(const pugi::xml_node& node)
std::string kiwix::converta2toa3(const std::string& a2code){
return codeisomapping.at(a2code);
}
std::vector<std::string> kiwix::convertTags(const std::string& tags_str)
{
auto tags = kiwix::split(tags_str, ";");
std::vector<std::string> tagsList;
bool picSeen(false), vidSeen(false), detSeen(false), indexSeen(false);
for (auto tag: tags) {
picSeen |= (tag == "nopic" || startsWith(tag, "_pictures:"));
vidSeen |= (tag == "novid" || startsWith(tag, "_videos:"));
detSeen |= (tag == "nodet" || startsWith(tag, "_details:"));
indexSeen |= kiwix::startsWith(tag, "_ftindex");
if (tag == "nopic") {
tagsList.push_back("_pictures:no");
} else if (tag == "novid") {
tagsList.push_back("_videos:no");
} else if (tag == "nodet") {
tagsList.push_back("_details:no");
} else if (tag == "_ftindex") {
tagsList.push_back("_ftindex:yes");
} else {
tagsList.push_back(tag);
}
}
if (!indexSeen) {
tagsList.push_back("_ftindex:no");
}
if (!picSeen) {
tagsList.push_back("_pictures:yes");
}
if (!vidSeen) {
tagsList.push_back("_videos:yes");
}
if (!detSeen) {
tagsList.push_back("_details:yes");
}
return tagsList;
}
std::string kiwix::getTagValueFromTagList(
const std::vector<std::string>& tagList, const std::string& tagName)
{
for (auto tag: tagList) {
if (tag[0] == '_') {
auto delimPos = tag.find(':');
if (delimPos == std::string::npos) {
// No delimiter... what to do ?
continue;
}
auto cTagName = tag.substr(1, delimPos-1);
auto cTagValue = tag.substr(delimPos+1);
if (cTagName == tagName) {
return cTagValue;
}
}
}
std::stringstream ss;
ss << tagName << " cannot be found";
throw std::out_of_range(ss.str());
}
bool kiwix::convertStrToBool(const std::string& value)
{
if (value == "yes") {
return true;
} else if (value == "no") {
return false;
}
std::stringstream ss;
ss << "Tag value '" << value << "' cannot be converted to bool.";
throw std::domain_error(ss.str());
}

View File

@ -58,6 +58,7 @@ GETTER(jstring, getPublisher)
GETTER(jstring, getDate)
GETTER(jstring, getUrl)
GETTER(jstring, getName)
GETTER(jstring, getFlavour)
GETTER(jstring, getTags)
GETTER(jlong, getArticleCount)
GETTER(jlong, getMediaCount)
@ -66,4 +67,11 @@ GETTER(jstring, getFavicon)
GETTER(jstring, getFaviconUrl)
GETTER(jstring, getFaviconMimeType)
METHOD(jstring, Book, getTagStr, jstring tagName) try {
auto cRet = Book->getTagStr(jni2c(tagName, env));
return c2jni(cRet, env);
} catch(...) {
return c2jni<std::string>("", env);
}
#undef GETTER

View File

@ -19,7 +19,15 @@ public class Book
public native String getDate();
public native String getUrl();
public native String getName();
public native String getFlavour();
public native String getTags();
/**
* Return the value associated to the tag tagName
*
* @param tagName the tag name to search for.
* @return The value of the tag. If the tag is not found, return empty string.
*/
public native String getTagStr(String tagName);
public native long getArticleCount();
public native long getMediaCount();

View File

@ -4,6 +4,7 @@
<Description>Search zim files in the catalog.</Description>
<Url type="application/atom+xml;profile=opds-catalog"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:k="http://kiwix.org/opensearchextension/1.0"
indexOffset="0"
template="/{{root}}/catalog/search?q={searchTerms}&lang={language}&count={count}&start={startIndex}"/>
template="/{{root}}/catalog/search?q={searchTerms?}&lang={language?}&name={k:name?}&tag={k:tag?}&notag={k:notag?}&maxsize={k:maxsize?}&count={count?}&start={startIndex?}"/>
</OpenSearchDescription>