Merge pull request #843 from kiwix/backslash_handling_in_suggestions

Backslash handling in suggestions
This commit is contained in:
Kelson 2022-11-17 11:48:37 +01:00 committed by GitHub
commit 3568ccd511
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 275 additions and 41 deletions

View File

@ -12,14 +12,14 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Setup python 3.10
uses: actions/setup-python@v2
- name: Setup python 3.9
uses: actions/setup-python@v1
with:
python-version: '3.10'
python-version: '3.9'
- name: Install packages
run: |
brew update
brew install gcovr pkg-config ninja
brew install gcovr pkg-config ninja || brew link --overwrite python
- name: Install python modules
run: pip3 install meson==0.49.2 pytest
- name: Install deps

View File

@ -138,15 +138,6 @@ std::string renderUrl(const std::string& root, const std::string& urlTemplate)
return url;
}
std::string makeFulltextSearchSuggestion(const std::string& lang, const std::string& queryString)
{
return i18n::expandParameterizedString(lang, "suggest-full-text-search",
{
{"SEARCH_TERMS", queryString}
}
);
}
ParameterizedMessage noSuchBookErrorMsg(const std::string& bookName)
{
return ParameterizedMessage("no-such-book", { {"BOOK_NAME", bookName} });
@ -700,9 +691,7 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
printf("Searching suggestions for: \"%s\"\n", queryString.c_str());
}
MustacheData results{MustacheData::type::list};
bool first = true;
Suggestions results;
/* Get the suggestions */
auto searcher = suggestionSearcherCache.getOrPut(bookId,
@ -713,38 +702,16 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
auto srs = search.getResults(start, count);
for(auto& suggestion: srs) {
MustacheData result;
result.set("label", suggestion.getTitle());
if (suggestion.hasSnippet()) {
result.set("label", suggestion.getSnippet());
}
result.set("value", suggestion.getTitle());
result.set("kind", "path");
result.set("path", suggestion.getPath());
result.set("first", first);
first = false;
results.push_back(result);
results.add(suggestion);
}
/* Propose the fulltext search if possible */
if (archive->hasFulltextIndex()) {
MustacheData result;
const auto lang = request.get_user_language();
result.set("label", makeFulltextSearchSuggestion(lang, queryString));
result.set("value", queryString + " ");
result.set("kind", "pattern");
result.set("first", first);
results.push_back(result);
results.addFTSearchSuggestion(request.get_user_language(), queryString);
}
auto data = get_default_data();
data.set("suggestions", results);
auto response = ContentResponse::build(*this, RESOURCE::templates::suggestion_json, data, "application/json; charset=utf-8");
return std::move(response);
return ContentResponse::build(*this, results.getJSON(), "application/json; charset=utf-8");
}
std::unique_ptr<Response> InternalServer::handle_viewer_settings(const RequestContext& request)

View File

@ -32,12 +32,15 @@
#endif
#include "tools/stringTools.h"
#include "server/i18n.h"
#include "libkiwix-resources.h"
#include <map>
#include <sstream>
#include <pugixml.hpp>
#include <zim/uuid.h>
#include <zim/suggestion_iterator.h>
static std::map<std::string, std::string> codeisomapping {
@ -326,3 +329,72 @@ std::string kiwix::render_template(const std::string& template_str, kainjow::mus
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
}
namespace
{
std::string escapeBackslashes(const std::string& s)
{
std::string es;
es.reserve(s.size());
for (char c : s) {
if ( c == '\\' ) {
es.push_back('\\');
}
es.push_back(c);
}
return es;
}
std::string makeFulltextSearchSuggestion(const std::string& lang,
const std::string& queryString)
{
return kiwix::i18n::expandParameterizedString(lang, "suggest-full-text-search",
{
{"SEARCH_TERMS", queryString}
}
);
}
} // unnamed namespace
kiwix::Suggestions::Suggestions()
: m_data(kainjow::mustache::data::type::list)
{
}
void kiwix::Suggestions::add(const zim::SuggestionItem& suggestion)
{
kainjow::mustache::data result;
const std::string label = suggestion.hasSnippet()
? suggestion.getSnippet()
: suggestion.getTitle();
result.set("label", escapeBackslashes(label));
result.set("value", escapeBackslashes(suggestion.getTitle()));
result.set("kind", "path");
result.set("path", escapeBackslashes(suggestion.getPath()));
result.set("first", m_data.is_empty_list());
m_data.push_back(result);
}
void kiwix::Suggestions::addFTSearchSuggestion(const std::string& uiLang,
const std::string& queryString)
{
kainjow::mustache::data result;
const std::string label = makeFulltextSearchSuggestion(uiLang, queryString);
result.set("label", escapeBackslashes(label));
result.set("value", escapeBackslashes(queryString + " "));
result.set("kind", "pattern");
result.set("first", m_data.is_empty_list());
m_data.push_back(result);
}
std::string kiwix::Suggestions::getJSON() const
{
kainjow::mustache::data data;
data.set("suggestions", m_data);
return render_template(RESOURCE::templates::suggestion_json, data);
}

View File

@ -33,6 +33,10 @@ namespace pugi {
class xml_node;
}
namespace zim {
class SuggestionItem;
}
namespace kiwix
{
std::string nodeToString(const pugi::xml_node& node);
@ -67,6 +71,22 @@ namespace kiwix
return defaultValue;
}
class Suggestions
{
public:
Suggestions();
void add(const zim::SuggestionItem& suggestion);
void addFTSearchSuggestion(const std::string& uiLang,
const std::string& query);
std::string getJSON() const;
private:
kainjow::mustache::data m_data;
};
}
#endif

View File

@ -4,6 +4,7 @@ tests = [
'tagParsing',
'stringTools',
'pathTools',
'otherTools',
'kiwixserve',
'book',
'manager',

174
test/otherTools.cpp Normal file
View File

@ -0,0 +1,174 @@
/*
* Copyright (C) 2022 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
#include "gtest/gtest.h"
#include "../src/tools/otherTools.h"
#include "zim/suggestion_iterator.h"
#include <regex>
namespace
{
// Output generated via mustache templates sometimes contains end-of-line
// whitespace. This complicates representing the expected output of a unit-test
// as C++ raw strings in editors that are configured to delete EOL whitespace.
// A workaround is to put special markers (//EOLWHITESPACEMARKER) at the end
// of such lines in the expected output string and remove them at runtime.
// This is exactly what this function is for.
std::string removeEOLWhitespaceMarkers(const std::string& s)
{
const std::regex pattern("//EOLWHITESPACEMARKER");
return std::regex_replace(s, pattern, "");
}
} // unnamed namespace
#define CHECK_SUGGESTIONS(actual, expected) \
EXPECT_EQ(actual, removeEOLWhitespaceMarkers(expected))
TEST(Suggestions, basicTest)
{
kiwix::Suggestions s;
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
//EOLWHITESPACEMARKER
]
)EXPECTEDJSON"
);
s.add(zim::SuggestionItem("Title", "/PATH", "Snippet"));
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Title",
"label" : "Snippet",
"kind" : "path"
, "path" : "/PATH"
}
]
)EXPECTEDJSON"
);
s.add(zim::SuggestionItem("Title Without Snippet", "/P/a/t/h"));
s.addFTSearchSuggestion("en", "kiwi");
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Title",
"label" : "Snippet",
"kind" : "path"
, "path" : "/PATH"
},
{
"value" : "Title Without Snippet",
"label" : "Title Without Snippet",
"kind" : "path"
, "path" : "/P/a/t/h"
},
{
"value" : "kiwi ",
"label" : "containing &apos;kiwi&apos;...",
"kind" : "pattern"
//EOLWHITESPACEMARKER
}
]
)EXPECTEDJSON"
);
}
TEST(Suggestions, specialCharHandling)
{
// HTML special symbols (<, >, &, ", and ') must be HTML-escaped
// Backslash symbols (\) must be duplicated.
const std::string SYMBOLS(R"(\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?)");
{
kiwix::Suggestions s;
s.add(zim::SuggestionItem("Title with " + SYMBOLS,
"Path with " + SYMBOLS,
"Snippet with " + SYMBOLS));
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Title with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"label" : "Snippet with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"kind" : "path"
, "path" : "Path with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?"
}
]
)EXPECTEDJSON"
);
}
{
kiwix::Suggestions s;
s.add(zim::SuggestionItem("Snippetless title with " + SYMBOLS,
"Path with " + SYMBOLS));
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Snippetless title with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"label" : "Snippetless title with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"kind" : "path"
, "path" : "Path with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?"
}
]
)EXPECTEDJSON"
);
}
{
kiwix::Suggestions s;
s.addFTSearchSuggestion("eng", "text with " + SYMBOLS);
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "text with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.? ",
"label" : "containing &apos;text with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?&apos;...",
"kind" : "pattern"
//EOLWHITESPACEMARKER
}
]
)EXPECTEDJSON"
);
}
}
TEST(Suggestions, fulltextSearchSuggestionIsTranslated)
{
kiwix::Suggestions s;
s.addFTSearchSuggestion("it", "kiwi");
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "kiwi ",
"label" : "contenente &apos;kiwi&apos;...",
"kind" : "pattern"
//EOLWHITESPACEMARKER
}
]
)EXPECTEDJSON"
);
}