Merge pull request #780 from kiwix/deduping_searchResults_unittests

This commit is contained in:
Matthieu Gautier 2022-06-10 15:47:50 +02:00 committed by GitHub
commit 83a9e54399
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 328 additions and 1195 deletions

View File

@ -17,8 +17,7 @@ tests = [
if build_machine.system() != 'windows'
tests += [
'server',
'server_html_search',
'server_xml_search'
'server_search'
]
endif

View File

@ -137,15 +137,63 @@ std::string makeSearchResultsHtml(const std::string& pattern,
return html;
}
#define SEARCH_RESULT(LINK, TITLE, SNIPPET, BOOK_TITLE, WORDCOUNT) \
"\n <a href=\"" LINK "\">\n"\
" " TITLE "\n"\
" </a>\n"\
" <cite>" SNIPPET "</cite>\n"\
" <div class=\"book-title\">from " BOOK_TITLE "</div>\n"\
" <div class=\"informations\">" WORDCOUNT " words</div>\n"
std::string makeSearchResultsXml(const std::string& header,
const std::string& results)
{
const char SEARCHRESULTS_XML_TEMPLATE[] = R"XML(<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
%HEADER%%RESULTS%
</channel>
</rss>
)XML";
const std::vector<std::string> LARGE_SEARCH_RESULTS = {
std::string html = removeEOLWhitespaceMarkers(SEARCHRESULTS_XML_TEMPLATE);
html = replace(html, "%HEADER%", header);
html = replace(html, "%RESULTS%", results);
return html;
}
struct SearchResult
{
std::string link;
std::string title;
std::string snippet;
std::string bookTitle;
std::string wordCount;
std::string getHtml() const
{
return std::string()
+ "\n <a href=\"" + link + "\">\n"
+ " " + title + "\n"
+ " </a>\n"
+ " <cite>" + snippet + "</cite>\n"
+ " <div class=\"book-title\">from " + bookTitle + "</div>\n"
+ " <div class=\"informations\">" + wordCount + " words</div>\n";
}
std::string getXml() const
{
return std::string()
+ " <title>" + title + "</title>\n"
+ " <link>" + replace(link, "'", "&apos;") + "</link>\n"
+ " <description>" + snippet + "</description>\n"
+ " <book>\n"
+ " <title>" + bookTitle + "</title>\n"
+ " </book>\n"
+ " <wordCount>" + wordCount + "</wordCount>";
}
};
#define SEARCH_RESULT(LINK, TITLE, SNIPPET, BOOK_TITLE, WORDCOUNT) \
SearchResult{LINK, TITLE, SNIPPET, BOOK_TITLE, WORDCOUNT}
const std::vector<SearchResult> LARGE_SEARCH_RESULTS = {
SEARCH_RESULT(
/*link*/ "/ROOT/zimfile/A/Genius_+_Soul_=_Jazz",
/*title*/ "Genius + Soul = Jazz",
@ -507,11 +555,11 @@ const std::vector<std::string> LARGE_SEARCH_RESULTS = {
//
// In order to be able to share the same expected output data
// LARGE_SEARCH_RESULTS between multiple build platforms and test-points
// of the TaskbarlessServerTest.searchResults test-case
// of the ServerTest.searchResults test-case
//
// 1. Snippets are excluded from the plain-text comparison of actual and
// expected HTML strings. This is done with the help of the
// function maskSnippetsInSearchResults()
// function maskSnippetsInHtmlSearchResults()
//
// 2. Snippets are checked separately. If a plain-text comparison fails
// then a weaker comparison is attempted. Currently it works by testing
@ -529,9 +577,48 @@ const std::vector<std::string> LARGE_SEARCH_RESULTS = {
// - Non-overlapping snippets can be joined with a " ... " in between.
//
std::string maskSnippetsInSearchResults(std::string s)
typedef std::vector<std::string> Snippets;
const char SNIPPET_REGEX_FOR_HTML[] = "<cite>(.+)</cite>";
std::string maskSnippetsInHtmlSearchResults(std::string s)
{
return replace(s, "<cite>.+</cite>", "<cite>SNIPPET TEXT WAS MASKED</cite>");
return replace(s, SNIPPET_REGEX_FOR_HTML, "<cite>SNIPPET TEXT WAS MASKED</cite>");
}
Snippets extractSearchResultSnippetsFromHtml(const std::string& html)
{
Snippets snippets;
const std::regex snippetRegex(SNIPPET_REGEX_FOR_HTML);
std::sregex_iterator snippetIt(html.begin(), html.end(), snippetRegex);
const std::sregex_iterator end;
for ( ; snippetIt != end; ++snippetIt)
{
const std::smatch snippetMatch = *snippetIt;
snippets.push_back(snippetMatch[1].str());
}
return snippets;
}
const char SNIPPET_REGEX_FOR_XML[] = "<description>(?!Search result for)(.+)</description>";
std::string maskSnippetsInXmlSearchResults(std::string s)
{
return replace(s, SNIPPET_REGEX_FOR_XML, "<description>SNIPPET TEXT WAS MASKED</description>");
}
Snippets extractSearchResultSnippetsFromXml(const std::string& xml)
{
Snippets snippets;
const std::regex snippetRegex(SNIPPET_REGEX_FOR_XML);
std::sregex_iterator snippetIt(xml.begin(), xml.end(), snippetRegex);
const std::sregex_iterator end;
for ( ; snippetIt != end; ++snippetIt)
{
const std::smatch snippetMatch = *snippetIt;
snippets.push_back(snippetMatch[1].str());
}
return snippets;
}
bool isValidSnippet(const std::string& s)
@ -583,8 +670,6 @@ bool isSubSnippet(std::string subSnippet, const std::string& superSnippet)
#define RAYCHARLESZIMID "6f1d19d0-633f-087b-fb55-7ac324ff9baf"
#define EXAMPLEZIMID "5dc0b3af-5df2-0925-f0ca-d2bf75e78af6"
TEST_F(TaskbarlessServerTest, searchResults)
{
struct TestData
{
struct PaginationEntry
@ -599,7 +684,7 @@ TEST_F(TaskbarlessServerTest, searchResults)
size_t resultsPerPage;
size_t totalResultCount;
size_t firstResultIndex;
std::vector<std::string> results;
std::vector<SearchResult> results;
std::vector<PaginationEntry> pagination;
static std::string makeUrl(const std::string& query, int start, size_t resultsPerPage)
@ -617,20 +702,38 @@ TEST_F(TaskbarlessServerTest, searchResults)
return url;
}
std::string getPattern() const
std::string extractQueryValue(const std::string& key) const
{
const std::string p = "pattern=";
const std::string p = key + "=";
const size_t i = query.find(p);
if (i == std::string::npos) {
return "";
}
std::string r = query.substr(i + p.size());
return r.substr(0, r.find("&"));
}
std::string getPattern() const
{
return extractQueryValue("pattern");
}
std::string getLang() const
{
return extractQueryValue("books.filter.lang");
}
std::string url() const
{
return makeUrl(query, start, resultsPerPage);
}
std::string expectedHeader() const
std::string xmlSearchUrl() const
{
return url() + "&format=xml";
}
std::string expectedHtmlHeader() const
{
if ( totalResultCount == 0 ) {
return "\n No results were found for <b>\"" + getPattern() + "\"</b>";
@ -654,7 +757,7 @@ TEST_F(TaskbarlessServerTest, searchResults)
return header;
}
std::string expectedResultsString() const
std::string expectedHtmlResultsString() const
{
if ( results.empty() ) {
return "\n ";
@ -663,13 +766,13 @@ TEST_F(TaskbarlessServerTest, searchResults)
std::string s;
for ( const auto& r : results ) {
s += "\n <li>";
s += maskSnippetsInSearchResults(r);
s += maskSnippetsInHtmlSearchResults(r.getHtml());
s += " </li>";
}
return s;
}
std::string expectedFooter() const
std::string expectedHtmlFooter() const
{
if ( pagination.empty() ) {
return "\n ";
@ -697,9 +800,63 @@ TEST_F(TaskbarlessServerTest, searchResults)
{
return makeSearchResultsHtml(
getPattern(),
expectedHeader(),
expectedResultsString(),
expectedFooter()
expectedHtmlHeader(),
expectedHtmlResultsString(),
expectedHtmlFooter()
);
}
std::string expectedXmlHeader() const
{
std::string header = R"(<title>Search: PATTERN</title>
<link>URL</link>
<description>Search result for PATTERN</description>
<opensearch:totalResults>RESULTCOUNT</opensearch:totalResults>
<opensearch:startIndex>FIRSTRESULT</opensearch:startIndex>
<opensearch:itemsPerPage>ITEMCOUNT</opensearch:itemsPerPage>
<atom:link rel="search" type="application/opensearchdescription+xml" href="/ROOT/search/searchdescription.xml"/>
<opensearch:Query role="request"
searchTerms="PATTERN"LANGQUERY
startIndex="FIRSTRESULT"
count="ITEMCOUNT"
/>)";
const auto realResultsPerPage = resultsPerPage?resultsPerPage:25;
const auto url = makeUrl(query + "&format=xml", firstResultIndex, realResultsPerPage);
header = replace(header, "URL", replace(url, "&", "&amp;"));
header = replace(header, "FIRSTRESULT", to_string(firstResultIndex));
header = replace(header, "ITEMCOUNT", to_string(realResultsPerPage));
header = replace(header, "RESULTCOUNT", to_string(totalResultCount));
header = replace(header, "PATTERN", getPattern());
auto queryLang = getLang();
if (queryLang.empty()) {
header = replace(header, "LANGQUERY", "");
} else {
header = replace(header, "LANGQUERY", "\n language=\""+queryLang+"\"");
}
return header;
}
std::string expectedXmlResultsString() const
{
if ( results.empty() ) {
return "\n ";
}
std::string s;
for ( const auto& r : results ) {
s += "\n <item>\n";
s += maskSnippetsInXmlSearchResults(r.getXml());
s += "\n </item>";
}
return s;
}
std::string expectedXml() const
{
return makeSearchResultsXml(
expectedXmlHeader(),
expectedXmlResultsString()
);
}
@ -708,28 +865,25 @@ TEST_F(TaskbarlessServerTest, searchResults)
return TestContext{ { "url", url() } };
}
void check(const std::string& html) const
TestContext xmlTestContext() const
{
EXPECT_EQ(maskSnippetsInSearchResults(html), expectedHtml())
return TestContext{ { "url", xmlSearchUrl() } };
}
void checkHtml(const std::string& html) const
{
EXPECT_EQ(maskSnippetsInHtmlSearchResults(html), expectedHtml())
<< testContext();
checkSnippets(extractSearchResultSnippets(html));
checkSnippets(extractSearchResultSnippetsFromHtml(html));
}
typedef std::vector<std::string> Snippets;
void checkXml(const std::string& xml) const
{
EXPECT_EQ(maskSnippetsInXmlSearchResults(xml), expectedXml())
<< xmlTestContext();
static Snippets extractSearchResultSnippets(const std::string& html)
{
Snippets snippets;
const std::regex snippetRegex("<cite>(.*)</cite>");
std::sregex_iterator snippetIt(html.begin(), html.end(), snippetRegex);
const std::sregex_iterator end;
for ( ; snippetIt != end; ++snippetIt)
{
const std::smatch snippetMatch = *snippetIt;
snippets.push_back(snippetMatch[1].str());
}
return snippets;
checkSnippets(extractSearchResultSnippetsFromXml(xml));
}
void checkSnippets(const Snippets& snippets) const
@ -738,14 +892,9 @@ TEST_F(TaskbarlessServerTest, searchResults)
for ( size_t i = 0; i < results.size(); ++i )
{
const auto& r = results[i];
const auto expectedSnippet = extractSearchResultSnippets(r);
ASSERT_EQ(1u, expectedSnippet.size())
<< "Multiple snippets in test data:"
<< "\n" << r;
if ( snippets[i] != expectedSnippet[0] ) {
if ( snippets[i] != r.snippet ) {
std::cout << "Trying a weaker check for a mismatching snippet...\n";
checkMismatchingSnippet(snippets[i], expectedSnippet[0]);
checkMismatchingSnippet(snippets[i], r.snippet);
}
}
}
@ -767,6 +916,8 @@ TEST_F(TaskbarlessServerTest, searchResults)
}
};
TEST_F(ServerTest, searchResults)
{
const TestData testData[] = {
{
/* query */ "pattern=velomanyunkan&books.id=" RAYCHARLESZIMID,
@ -1301,8 +1452,17 @@ TEST_F(TaskbarlessServerTest, searchResults)
};
for ( const auto& t : testData ) {
const auto r = zfs1_->GET(t.url().c_str());
EXPECT_EQ(r->status, 200);
t.check(r->body);
const std::string htmlSearchUrl = t.url();
const auto htmlRes = taskbarlessZimFileServer().GET(htmlSearchUrl.c_str());
EXPECT_EQ(htmlRes->status, 200);
t.checkHtml(htmlRes->body);
const std::string xmlSearchUrl = t.xmlSearchUrl();
const auto xmlRes1 = zfs1_->GET(xmlSearchUrl.c_str());
const auto xmlRes2 = taskbarlessZimFileServer().GET(xmlSearchUrl.c_str());
EXPECT_EQ(xmlRes1->status, 200);
EXPECT_EQ(xmlRes2->status, 200);
EXPECT_EQ(xmlRes1->body, xmlRes2->body);
t.checkXml(xmlRes1->body);
}
}

View File

@ -134,6 +134,9 @@ ZimFileServer::~ZimFileServer()
class ServerTest : public ::testing::Test
{
private:
std::unique_ptr<ZimFileServer> taskbarlessZfs_;
protected:
std::unique_ptr<ZimFileServer> zfs1_;
@ -149,16 +152,16 @@ protected:
zfs1_.reset(new ZimFileServer(SERVER_PORT, /*withTaskbar=*/true, ZIMFILES));
}
ZimFileServer& taskbarlessZimFileServer()
{
if ( ! taskbarlessZfs_ ) {
taskbarlessZfs_.reset(new ZimFileServer(SERVER_PORT+1, /*withTaskbar=*/false, ZIMFILES));
}
return *taskbarlessZfs_;
}
void TearDown() override {
zfs1_.reset();
taskbarlessZfs_.reset();
}
};
class TaskbarlessServerTest : public ServerTest
{
protected:
void SetUp() override {
zfs1_.reset(new ZimFileServer(SERVER_PORT, /*withTaskbar=*/false, ZIMFILES));
}
};

File diff suppressed because it is too large Load Diff