From 8b8a2eede7e9a813d540588ca79525ddb8650835 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 6 Jan 2024 16:29:41 +0400 Subject: [PATCH] Slight enhancement of escapeForJSON() - More familiar escape sequences for tab, newline and carriage return symbols. - Quote symbol is escaped by default too, however that behaviour can be disabled for uses in HTML-related contexts where quotes should then be replaced with the character entity " --- src/tools/otherTools.cpp | 27 ++++++++++++++++++++------- src/tools/stringTools.h | 2 +- test/otherTools.cpp | 16 ++++++++-------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/tools/otherTools.cpp b/src/tools/otherTools.cpp index 97d1567f7..0c6eb36ca 100644 --- a/src/tools/otherTools.cpp +++ b/src/tools/otherTools.cpp @@ -327,14 +327,27 @@ std::string kiwix::render_template(const std::string& template_str, kainjow::mus return ss.str(); } -std::string kiwix::escapeForJSON(const std::string& s) +// The escapeQuote parameter of escapeForJSON() defaults to true. +// This constant makes the calls to escapeForJSON() where the quote symbol +// should not be escaped (as it is later replaced with the HTML character entity +// ") more readable. +static const bool DONT_ESCAPE_QUOTE = false; + +std::string kiwix::escapeForJSON(const std::string& s, bool escapeQuote) { std::ostringstream oss; for (char c : s) { if ( c == '\\' ) { oss << "\\\\"; } else if ( unsigned(c) < 0x20U ) { - oss << "\\u" << std::setw(4) << std::setfill('0') << unsigned(c); + switch ( c ) { + case '\n': oss << "\\n"; break; + case '\r': oss << "\\r"; break; + case '\t': oss << "\\t"; break; + default: oss << "\\u" << std::setw(4) << std::setfill('0') << unsigned(c); + } + } else if ( c == '"' && escapeQuote ) { + oss << "\\\""; } else { oss << c; } @@ -370,10 +383,10 @@ void kiwix::Suggestions::add(const zim::SuggestionItem& suggestion) ? suggestion.getSnippet() : suggestion.getTitle(); - result.set("label", escapeForJSON(label)); - result.set("value", escapeForJSON(suggestion.getTitle())); + result.set("label", escapeForJSON(label, DONT_ESCAPE_QUOTE)); + result.set("value", escapeForJSON(suggestion.getTitle(), DONT_ESCAPE_QUOTE)); result.set("kind", "path"); - result.set("path", escapeForJSON(suggestion.getPath())); + result.set("path", escapeForJSON(suggestion.getPath(), DONT_ESCAPE_QUOTE)); result.set("first", m_data.is_empty_list()); m_data.push_back(result); } @@ -383,8 +396,8 @@ void kiwix::Suggestions::addFTSearchSuggestion(const std::string& uiLang, { kainjow::mustache::data result; const std::string label = makeFulltextSearchSuggestion(uiLang, queryString); - result.set("label", escapeForJSON(label)); - result.set("value", escapeForJSON(queryString + " ")); + result.set("label", escapeForJSON(label, DONT_ESCAPE_QUOTE)); + result.set("value", escapeForJSON(queryString + " ", DONT_ESCAPE_QUOTE)); result.set("kind", "pattern"); result.set("first", m_data.is_empty_list()); m_data.push_back(result); diff --git a/src/tools/stringTools.h b/src/tools/stringTools.h index 890254283..97fa34738 100644 --- a/src/tools/stringTools.h +++ b/src/tools/stringTools.h @@ -53,7 +53,7 @@ private: const icu::Locale locale; }; -std::string escapeForJSON(const std::string& s); +std::string escapeForJSON(const std::string& s, bool escapeQuote = true); /* urlEncode() is the equivalent of JS encodeURIComponent(), with the only * difference that the slash (/) symbol is NOT encoded. */ diff --git a/test/otherTools.cpp b/test/otherTools.cpp index d437e188d..3a3eb0477 100644 --- a/test/otherTools.cpp +++ b/test/otherTools.cpp @@ -110,10 +110,10 @@ TEST(Suggestions, specialCharHandling) CHECK_SUGGESTIONS(s.getJSON(), R"EXPECTEDJSON([ { - "value" : "Title with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", - "label" : "Snippet with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", + "value" : "Title with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", + "label" : "Snippet with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", "kind" : "path" - , "path" : "Path with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?" + , "path" : "Path with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?" } ] )EXPECTEDJSON" @@ -128,10 +128,10 @@ R"EXPECTEDJSON([ CHECK_SUGGESTIONS(s.getJSON(), R"EXPECTEDJSON([ { - "value" : "Snippetless title with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", - "label" : "Snippetless title with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", + "value" : "Snippetless title with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", + "label" : "Snippetless title with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?", "kind" : "path" - , "path" : "Path with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?" + , "path" : "Path with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?" } ] )EXPECTEDJSON" @@ -145,8 +145,8 @@ R"EXPECTEDJSON([ CHECK_SUGGESTIONS(s.getJSON(), R"EXPECTEDJSON([ { - "value" : "text with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.? ", - "label" : "containing 'text with \u0009\u0010\u0013\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?'...", + "value" : "text with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.? ", + "label" : "containing 'text with \t\n\r\\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?'...", "kind" : "pattern" //EOLWHITESPACEMARKER }