Merge pull request #316 from kiwix/tagging_system

Tagging system
This commit is contained in:
Matthieu Gautier 2020-01-30 18:21:28 +01:00 committed by GitHub
commit 8f990feabb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 131 additions and 71 deletions

View File

@ -60,6 +60,9 @@ class Book
const std::string& getUrl() const { return m_url; } const std::string& getUrl() const { return m_url; }
const std::string& getName() const { return m_name; } const std::string& getName() const { return m_name; }
const std::string& getTags() const { return m_tags; } const std::string& getTags() const { return m_tags; }
std::string getTagStr(const std::string& tagName) const;
bool getTagBool(const std::string& tagName) const;
const std::string& getFlavour() const { return m_flavour; }
const std::string& getOrigId() const { return m_origId; } const std::string& getOrigId() const { return m_origId; }
const uint64_t& getArticleCount() const { return m_articleCount; } const uint64_t& getArticleCount() const { return m_articleCount; }
const uint64_t& getMediaCount() const { return m_mediaCount; } const uint64_t& getMediaCount() const { return m_mediaCount; }
@ -81,6 +84,7 @@ class Book
void setDate(const std::string& date) { m_date = date; } void setDate(const std::string& date) { m_date = date; }
void setUrl(const std::string& url) { m_url = url; } void setUrl(const std::string& url) { m_url = url; }
void setName(const std::string& name) { m_name = name; } void setName(const std::string& name) { m_name = name; }
void setFlavour(const std::string& flavour) { m_flavour = flavour; }
void setTags(const std::string& tags) { m_tags = tags; } void setTags(const std::string& tags) { m_tags = tags; }
void setOrigId(const std::string& origId) { m_origId = origId; } void setOrigId(const std::string& origId) { m_origId = origId; }
void setArticleCount(uint64_t articleCount) { m_articleCount = articleCount; } void setArticleCount(uint64_t articleCount) { m_articleCount = articleCount; }
@ -103,6 +107,7 @@ class Book
std::string m_date; std::string m_date;
std::string m_url; std::string m_url;
std::string m_name; std::string m_name;
std::string m_flavour;
std::string m_tags; std::string m_tags;
std::string m_origId; std::string m_origId;
uint64_t m_articleCount; uint64_t m_articleCount;

View File

@ -21,6 +21,7 @@
#define KIWIX_OTHERTOOLS_H #define KIWIX_OTHERTOOLS_H
#include <string> #include <string>
#include <vector>
namespace pugi { namespace pugi {
class xml_node; class xml_node;
@ -28,9 +29,18 @@ namespace pugi {
namespace kiwix namespace kiwix
{ {
void sleep(unsigned int milliseconds); void sleep(unsigned int milliseconds);
std::string nodeToString(const pugi::xml_node& node); std::string nodeToString(const pugi::xml_node& node);
std::string converta2toa3(const std::string& a2code); std::string converta2toa3(const std::string& a2code);
/*
* Convert all format tag string to new format
*/
std::vector<std::string> convertTags(const std::string& tags_str);
std::string getTagValueFromTagList(const std::vector<std::string>& tagList,
const std::string& tagName);
bool convertStrToBool(const std::string& value);
} }
#endif #endif

View File

@ -23,6 +23,7 @@
#include "tools/base64.h" #include "tools/base64.h"
#include "tools/regexTools.h" #include "tools/regexTools.h"
#include "tools/networkTools.h" #include "tools/networkTools.h"
#include "tools/otherTools.h"
#include <pugixml.hpp> #include <pugixml.hpp>
@ -58,6 +59,7 @@ bool Book::update(const kiwix::Book& other)
m_date = other.m_date; m_date = other.m_date;
m_url = other.m_url; m_url = other.m_url;
m_name = other.m_name; m_name = other.m_name;
m_flavour = other.m_flavour;
m_tags = other.m_tags; m_tags = other.m_tags;
m_origId = other.m_origId; m_origId = other.m_origId;
m_articleCount = other.m_articleCount; m_articleCount = other.m_articleCount;
@ -84,6 +86,7 @@ void Book::update(const kiwix::Reader& reader)
m_publisher = reader.getPublisher(); m_publisher = reader.getPublisher();
m_date = reader.getDate(); m_date = reader.getDate();
m_name = reader.getName(); m_name = reader.getName();
m_flavour = reader.getFlavour();
m_tags = reader.getTags(); m_tags = reader.getTags();
m_origId = reader.getOrigId(); m_origId = reader.getOrigId();
m_articleCount = reader.getArticleCount(); m_articleCount = reader.getArticleCount();
@ -111,6 +114,7 @@ void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir)
m_date = ATTR("date"); m_date = ATTR("date");
m_url = ATTR("url"); m_url = ATTR("url");
m_name = ATTR("name"); m_name = ATTR("name");
m_flavour = ATTR("flavour");
m_tags = ATTR("tags"); m_tags = ATTR("tags");
m_origId = ATTR("origId"); m_origId = ATTR("origId");
m_articleCount = strtoull(ATTR("articleCount"), 0, 0); m_articleCount = strtoull(ATTR("articleCount"), 0, 0);
@ -141,6 +145,7 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
if (!m_id.compare(0, 9, "urn:uuid:")) { if (!m_id.compare(0, 9, "urn:uuid:")) {
m_id.erase(0, 9); m_id.erase(0, 9);
} }
// No path on opds.
m_title = VALUE("title"); m_title = VALUE("title");
m_description = VALUE("summary"); m_description = VALUE("summary");
m_language = VALUE("language"); m_language = VALUE("language");
@ -148,6 +153,7 @@ void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost
m_publisher = node.child("publisher").child("name").child_value(); m_publisher = node.child("publisher").child("name").child_value();
m_date = fromOpdsDate(VALUE("updated")); m_date = fromOpdsDate(VALUE("updated"));
m_name = VALUE("name"); m_name = VALUE("name");
m_flavour = VALUE("flavour");
m_tags = VALUE("tags"); m_tags = VALUE("tags");
for(auto linkNode = node.child("link"); linkNode; for(auto linkNode = node.child("link"); linkNode;
linkNode = linkNode.next_sibling("link")) { linkNode = linkNode.next_sibling("link")) {
@ -203,4 +209,12 @@ const std::string& Book::getFavicon() const {
return m_favicon; return m_favicon;
} }
std::string Book::getTagStr(const std::string& tagName) const {
return getTagValueFromTagList(convertTags(m_tags), tagName);
}
bool Book::getTagBool(const std::string& tagName) const {
return convertStrToBool(getTagStr(tagName));
}
} }

View File

@ -58,6 +58,7 @@ void LibXMLDumper::handleBook(Book book, pugi::xml_node root_node) {
ADD_ATTR_NOT_EMPTY(entry_node, "creator", book.getCreator()); ADD_ATTR_NOT_EMPTY(entry_node, "creator", book.getCreator());
ADD_ATTR_NOT_EMPTY(entry_node, "publisher", book.getPublisher()); ADD_ATTR_NOT_EMPTY(entry_node, "publisher", book.getPublisher());
ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName()); ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName());
ADD_ATTR_NOT_EMPTY(entry_node, "flavour", book.getFlavour());
ADD_ATTR_NOT_EMPTY(entry_node, "tags", book.getTags()); ADD_ATTR_NOT_EMPTY(entry_node, "tags", book.getTags());
ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", book.getFaviconMimeType()); ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", book.getFaviconMimeType());
ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", book.getFaviconUrl()); ADD_ATTR_NOT_EMPTY(entry_node, "faviconUrl", book.getFaviconUrl());

View File

@ -76,6 +76,7 @@ pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) {
ADD_TEXT_ENTRY(entry_node, "language", book.getLanguage()); ADD_TEXT_ENTRY(entry_node, "language", book.getLanguage());
ADD_TEXT_ENTRY(entry_node, "updated", gen_date_from_yyyy_mm_dd(book.getDate())); ADD_TEXT_ENTRY(entry_node, "updated", gen_date_from_yyyy_mm_dd(book.getDate()));
ADD_TEXT_ENTRY(entry_node, "name", book.getName()); ADD_TEXT_ENTRY(entry_node, "name", book.getName());
ADD_TEXT_ENTRY(entry_node, "flavour", book.getFlavour());
ADD_TEXT_ENTRY(entry_node, "tags", book.getTags()); ADD_TEXT_ENTRY(entry_node, "tags", book.getTags());
ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()); ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath());

View File

@ -22,6 +22,8 @@
#include <zim/search.h> #include <zim/search.h>
#include "tools/otherTools.h"
inline char hi(char v) inline char hi(char v)
{ {
char hex[] = "0123456789abcdef"; char hex[] = "0123456789abcdef";
@ -352,43 +354,6 @@ string Reader::getLicense() const
METADATA("License") METADATA("License")
} }
std::vector<std::string> convertTags(const std::string& tags_str)
{
auto tags = split(tags_str, ";");
std::vector<std::string> tagsList;
bool picSeen(false), vidSeen(false), detSeen(false), indexSeen(false);
for (auto tag: tags) {
picSeen |= (tag == "nopic" || startsWith(tag, "_pictures:"));
vidSeen |= (tag == "novid" || startsWith(tag, "_videos:"));
detSeen |= (tag == "nodet" || startsWith(tag, "_details:"));
indexSeen |= startsWith(tag, "_ftindex");
if (tag == "nopic") {
tagsList.push_back("_pictures:no");
} else if (tag == "novid") {
tagsList.push_back("_videos:no");
} else if (tag == "nodet") {
tagsList.push_back("_details:no");
} else if (tag == "_ftindex") {
tagsList.push_back("_ftindex:yes");
} else {
tagsList.push_back(tag);
}
}
if (!indexSeen) {
tagsList.push_back("_ftindex:no");
}
if (!picSeen) {
tagsList.push_back("_pictures:yes");
}
if (!vidSeen) {
tagsList.push_back("_videos:yes");
}
if (!detSeen) {
tagsList.push_back("_details:yes");
}
return tagsList;
}
string Reader::getTags(bool original) const string Reader::getTags(bool original) const
{ {
string tags_str; string tags_str;
@ -400,26 +365,6 @@ string Reader::getTags(bool original) const
return join(tags, ";"); return join(tags, ";");
} }
string getTagValueFromTagList(const std::vector<std::string>& tagList, const std::string& tagName)
{
for (auto tag: tagList) {
if (tag[0] == '_') {
auto delimPos = tag.find(':');
if (delimPos == string::npos) {
// No delimiter... what to do ?
continue;
}
auto cTagName = tag.substr(1, delimPos-1);
auto cTagValue = tag.substr(delimPos+1);
if (cTagName == tagName) {
return cTagValue;
}
}
}
std::stringstream ss;
ss << tagName << " cannot be found";
throw std::out_of_range(ss.str());
}
string Reader::getTagStr(const std::string& tagName) const string Reader::getTagStr(const std::string& tagName) const
{ {
@ -430,16 +375,7 @@ string Reader::getTagStr(const std::string& tagName) const
bool Reader::getTagBool(const std::string& tagName) const bool Reader::getTagBool(const std::string& tagName) const
{ {
auto tagValue = getTagStr(tagName); return convertStrToBool(getTagStr(tagName));
if (tagValue == "yes") {
return true;
} else if (tagValue == "no") {
return false;
} else {
std::stringstream ss;
ss << "Tag value '" << tagValue << "' for " << tagName << " cannot be converted to bool.";
throw std::domain_error(ss.str());
}
} }
string Reader::getRelation() const string Reader::getRelation() const

View File

@ -25,7 +25,10 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
#include "tools/stringTools.h"
#include <map> #include <map>
#include <sstream>
#include <pugixml.hpp> #include <pugixml.hpp>
@ -204,3 +207,76 @@ std::string kiwix::nodeToString(const pugi::xml_node& node)
std::string kiwix::converta2toa3(const std::string& a2code){ std::string kiwix::converta2toa3(const std::string& a2code){
return codeisomapping.at(a2code); return codeisomapping.at(a2code);
} }
std::vector<std::string> kiwix::convertTags(const std::string& tags_str)
{
auto tags = kiwix::split(tags_str, ";");
std::vector<std::string> tagsList;
bool picSeen(false), vidSeen(false), detSeen(false), indexSeen(false);
for (auto tag: tags) {
picSeen |= (tag == "nopic" || startsWith(tag, "_pictures:"));
vidSeen |= (tag == "novid" || startsWith(tag, "_videos:"));
detSeen |= (tag == "nodet" || startsWith(tag, "_details:"));
indexSeen |= kiwix::startsWith(tag, "_ftindex");
if (tag == "nopic") {
tagsList.push_back("_pictures:no");
} else if (tag == "novid") {
tagsList.push_back("_videos:no");
} else if (tag == "nodet") {
tagsList.push_back("_details:no");
} else if (tag == "_ftindex") {
tagsList.push_back("_ftindex:yes");
} else {
tagsList.push_back(tag);
}
}
if (!indexSeen) {
tagsList.push_back("_ftindex:no");
}
if (!picSeen) {
tagsList.push_back("_pictures:yes");
}
if (!vidSeen) {
tagsList.push_back("_videos:yes");
}
if (!detSeen) {
tagsList.push_back("_details:yes");
}
return tagsList;
}
std::string kiwix::getTagValueFromTagList(
const std::vector<std::string>& tagList, const std::string& tagName)
{
for (auto tag: tagList) {
if (tag[0] == '_') {
auto delimPos = tag.find(':');
if (delimPos == std::string::npos) {
// No delimiter... what to do ?
continue;
}
auto cTagName = tag.substr(1, delimPos-1);
auto cTagValue = tag.substr(delimPos+1);
if (cTagName == tagName) {
return cTagValue;
}
}
}
std::stringstream ss;
ss << tagName << " cannot be found";
throw std::out_of_range(ss.str());
}
bool kiwix::convertStrToBool(const std::string& value)
{
if (value == "yes") {
return true;
} else if (value == "no") {
return false;
}
std::stringstream ss;
ss << "Tag value '" << value << "' cannot be converted to bool.";
throw std::domain_error(ss.str());
}

View File

@ -58,6 +58,7 @@ GETTER(jstring, getPublisher)
GETTER(jstring, getDate) GETTER(jstring, getDate)
GETTER(jstring, getUrl) GETTER(jstring, getUrl)
GETTER(jstring, getName) GETTER(jstring, getName)
GETTER(jstring, getFlavour)
GETTER(jstring, getTags) GETTER(jstring, getTags)
GETTER(jlong, getArticleCount) GETTER(jlong, getArticleCount)
GETTER(jlong, getMediaCount) GETTER(jlong, getMediaCount)
@ -66,4 +67,11 @@ GETTER(jstring, getFavicon)
GETTER(jstring, getFaviconUrl) GETTER(jstring, getFaviconUrl)
GETTER(jstring, getFaviconMimeType) GETTER(jstring, getFaviconMimeType)
METHOD(jstring, Book, getTagStr, jstring tagName) try {
auto cRet = Book->getTagStr(jni2c(tagName, env));
return c2jni(cRet, env);
} catch(...) {
return c2jni<std::string>("", env);
}
#undef GETTER #undef GETTER

View File

@ -19,7 +19,15 @@ public class Book
public native String getDate(); public native String getDate();
public native String getUrl(); public native String getUrl();
public native String getName(); public native String getName();
public native String getFlavour();
public native String getTags(); public native String getTags();
/**
* Return the value associated to the tag tagName
*
* @param tagName the tag name to search for.
* @return The value of the tag. If the tag is not found, return empty string.
*/
public native String getTagStr(String tagName);
public native long getArticleCount(); public native long getArticleCount();
public native long getMediaCount(); public native long getMediaCount();

View File

@ -4,6 +4,7 @@
<Description>Search zim files in the catalog.</Description> <Description>Search zim files in the catalog.</Description>
<Url type="application/atom+xml;profile=opds-catalog" <Url type="application/atom+xml;profile=opds-catalog"
xmlns:atom="http://www.w3.org/2005/Atom" xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:k="http://kiwix.org/opensearchextension/1.0"
indexOffset="0" indexOffset="0"
template="/{{root}}/catalog/search?q={searchTerms}&lang={language}&count={count}&start={startIndex}"/> template="/{{root}}/catalog/search?q={searchTerms?}&lang={language?}&name={k:name?}&tag={k:tag?}&notag={k:notag?}&maxsize={k:maxsize?}&count={count?}&start={startIndex?}"/>
</OpenSearchDescription> </OpenSearchDescription>