Slight enhancement of escapeForJSON()

- More familiar escape sequences for tab, newline and carriage return
  symbols.

- Quote symbol is escaped by default too, however that behaviour can
  be disabled for uses in HTML-related contexts where quotes should then
  be replaced with the character entity "
This commit is contained in:
Veloman Yunkan 2024-01-06 16:29:41 +04:00
parent f3d3ab13cb
commit 8b8a2eede7
3 changed files with 29 additions and 16 deletions

View File

@ -327,14 +327,27 @@ std::string kiwix::render_template(const std::string& template_str, kainjow::mus
return ss.str(); return ss.str();
} }
std::string kiwix::escapeForJSON(const std::string& s) // The escapeQuote parameter of escapeForJSON() defaults to true.
// This constant makes the calls to escapeForJSON() where the quote symbol
// should not be escaped (as it is later replaced with the HTML character entity
// ") more readable.
static const bool DONT_ESCAPE_QUOTE = false;
std::string kiwix::escapeForJSON(const std::string& s, bool escapeQuote)
{ {
std::ostringstream oss; std::ostringstream oss;
for (char c : s) { for (char c : s) {
if ( c == '\\' ) { if ( c == '\\' ) {
oss << "\\\\"; oss << "\\\\";
} else if ( unsigned(c) < 0x20U ) { } else if ( unsigned(c) < 0x20U ) {
oss << "\\u" << std::setw(4) << std::setfill('0') << unsigned(c); switch ( c ) {
case '\n': oss << "\\n"; break;
case '\r': oss << "\\r"; break;
case '\t': oss << "\\t"; break;
default: oss << "\\u" << std::setw(4) << std::setfill('0') << unsigned(c);
}
} else if ( c == '"' && escapeQuote ) {
oss << "\\\"";
} else { } else {
oss << c; oss << c;
} }
@ -370,10 +383,10 @@ void kiwix::Suggestions::add(const zim::SuggestionItem& suggestion)
? suggestion.getSnippet() ? suggestion.getSnippet()
: suggestion.getTitle(); : suggestion.getTitle();
result.set("label", escapeForJSON(label)); result.set("label", escapeForJSON(label, DONT_ESCAPE_QUOTE));
result.set("value", escapeForJSON(suggestion.getTitle())); result.set("value", escapeForJSON(suggestion.getTitle(), DONT_ESCAPE_QUOTE));
result.set("kind", "path"); result.set("kind", "path");
result.set("path", escapeForJSON(suggestion.getPath())); result.set("path", escapeForJSON(suggestion.getPath(), DONT_ESCAPE_QUOTE));
result.set("first", m_data.is_empty_list()); result.set("first", m_data.is_empty_list());
m_data.push_back(result); m_data.push_back(result);
} }
@ -383,8 +396,8 @@ void kiwix::Suggestions::addFTSearchSuggestion(const std::string& uiLang,
{ {
kainjow::mustache::data result; kainjow::mustache::data result;
const std::string label = makeFulltextSearchSuggestion(uiLang, queryString); const std::string label = makeFulltextSearchSuggestion(uiLang, queryString);
result.set("label", escapeForJSON(label)); result.set("label", escapeForJSON(label, DONT_ESCAPE_QUOTE));
result.set("value", escapeForJSON(queryString + " ")); result.set("value", escapeForJSON(queryString + " ", DONT_ESCAPE_QUOTE));
result.set("kind", "pattern"); result.set("kind", "pattern");
result.set("first", m_data.is_empty_list()); result.set("first", m_data.is_empty_list());
m_data.push_back(result); m_data.push_back(result);

View File

@ -53,7 +53,7 @@ private:
const icu::Locale locale; const icu::Locale locale;
}; };
std::string escapeForJSON(const std::string& s); std::string escapeForJSON(const std::string& s, bool escapeQuote = true);
/* urlEncode() is the equivalent of JS encodeURIComponent(), with the only /* urlEncode() is the equivalent of JS encodeURIComponent(), with the only
* difference that the slash (/) symbol is NOT encoded. */ * difference that the slash (/) symbol is NOT encoded. */

View File

@ -110,10 +110,10 @@ TEST(Suggestions, specialCharHandling)
CHECK_SUGGESTIONS(s.getJSON(), CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([ R"EXPECTEDJSON([
{ {
"value" : "Title with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?", "value" : "Title with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"label" : "Snippet with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?", "label" : "Snippet with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"kind" : "path" "kind" : "path"
, "path" : "Path with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?" , "path" : "Path with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?"
} }
] ]
)EXPECTEDJSON" )EXPECTEDJSON"
@ -128,10 +128,10 @@ R"EXPECTEDJSON([
CHECK_SUGGESTIONS(s.getJSON(), CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([ R"EXPECTEDJSON([
{ {
"value" : "Snippetless title with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?", "value" : "Snippetless title with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"label" : "Snippetless title with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?", "label" : "Snippetless title with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"kind" : "path" "kind" : "path"
, "path" : "Path with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?" , "path" : "Path with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?"
} }
] ]
)EXPECTEDJSON" )EXPECTEDJSON"
@ -145,8 +145,8 @@ R"EXPECTEDJSON([
CHECK_SUGGESTIONS(s.getJSON(), CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([ R"EXPECTEDJSON([
{ {
"value" : "text with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.? ", "value" : "text with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.? ",
"label" : "containing &apos;text with \u0009\u0010\u0013\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?&apos;...", "label" : "containing &apos;text with \t\n\r\\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?&apos;...",
"kind" : "pattern" "kind" : "pattern"
//EOLWHITESPACEMARKER //EOLWHITESPACEMARKER
} }