mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #780 from kiwix/deduping_searchResults_unittests
This commit is contained in:
commit
83a9e54399
|
@ -17,8 +17,7 @@ tests = [
|
|||
if build_machine.system() != 'windows'
|
||||
tests += [
|
||||
'server',
|
||||
'server_html_search',
|
||||
'server_xml_search'
|
||||
'server_search'
|
||||
]
|
||||
endif
|
||||
|
||||
|
|
|
@ -137,15 +137,63 @@ std::string makeSearchResultsHtml(const std::string& pattern,
|
|||
return html;
|
||||
}
|
||||
|
||||
#define SEARCH_RESULT(LINK, TITLE, SNIPPET, BOOK_TITLE, WORDCOUNT) \
|
||||
"\n <a href=\"" LINK "\">\n"\
|
||||
" " TITLE "\n"\
|
||||
" </a>\n"\
|
||||
" <cite>" SNIPPET "</cite>\n"\
|
||||
" <div class=\"book-title\">from " BOOK_TITLE "</div>\n"\
|
||||
" <div class=\"informations\">" WORDCOUNT " words</div>\n"
|
||||
std::string makeSearchResultsXml(const std::string& header,
|
||||
const std::string& results)
|
||||
{
|
||||
const char SEARCHRESULTS_XML_TEMPLATE[] = R"XML(<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
%HEADER%%RESULTS%
|
||||
</channel>
|
||||
</rss>
|
||||
)XML";
|
||||
|
||||
const std::vector<std::string> LARGE_SEARCH_RESULTS = {
|
||||
std::string html = removeEOLWhitespaceMarkers(SEARCHRESULTS_XML_TEMPLATE);
|
||||
html = replace(html, "%HEADER%", header);
|
||||
html = replace(html, "%RESULTS%", results);
|
||||
return html;
|
||||
}
|
||||
|
||||
struct SearchResult
|
||||
{
|
||||
std::string link;
|
||||
std::string title;
|
||||
std::string snippet;
|
||||
std::string bookTitle;
|
||||
std::string wordCount;
|
||||
|
||||
std::string getHtml() const
|
||||
{
|
||||
return std::string()
|
||||
+ "\n <a href=\"" + link + "\">\n"
|
||||
+ " " + title + "\n"
|
||||
+ " </a>\n"
|
||||
+ " <cite>" + snippet + "</cite>\n"
|
||||
+ " <div class=\"book-title\">from " + bookTitle + "</div>\n"
|
||||
+ " <div class=\"informations\">" + wordCount + " words</div>\n";
|
||||
}
|
||||
|
||||
std::string getXml() const
|
||||
{
|
||||
return std::string()
|
||||
+ " <title>" + title + "</title>\n"
|
||||
+ " <link>" + replace(link, "'", "'") + "</link>\n"
|
||||
+ " <description>" + snippet + "</description>\n"
|
||||
+ " <book>\n"
|
||||
+ " <title>" + bookTitle + "</title>\n"
|
||||
+ " </book>\n"
|
||||
+ " <wordCount>" + wordCount + "</wordCount>";
|
||||
}
|
||||
};
|
||||
|
||||
#define SEARCH_RESULT(LINK, TITLE, SNIPPET, BOOK_TITLE, WORDCOUNT) \
|
||||
SearchResult{LINK, TITLE, SNIPPET, BOOK_TITLE, WORDCOUNT}
|
||||
|
||||
|
||||
|
||||
const std::vector<SearchResult> LARGE_SEARCH_RESULTS = {
|
||||
SEARCH_RESULT(
|
||||
/*link*/ "/ROOT/zimfile/A/Genius_+_Soul_=_Jazz",
|
||||
/*title*/ "Genius + Soul = Jazz",
|
||||
|
@ -507,11 +555,11 @@ const std::vector<std::string> LARGE_SEARCH_RESULTS = {
|
|||
//
|
||||
// In order to be able to share the same expected output data
|
||||
// LARGE_SEARCH_RESULTS between multiple build platforms and test-points
|
||||
// of the TaskbarlessServerTest.searchResults test-case
|
||||
// of the ServerTest.searchResults test-case
|
||||
//
|
||||
// 1. Snippets are excluded from the plain-text comparison of actual and
|
||||
// expected HTML strings. This is done with the help of the
|
||||
// function maskSnippetsInSearchResults()
|
||||
// function maskSnippetsInHtmlSearchResults()
|
||||
//
|
||||
// 2. Snippets are checked separately. If a plain-text comparison fails
|
||||
// then a weaker comparison is attempted. Currently it works by testing
|
||||
|
@ -529,9 +577,48 @@ const std::vector<std::string> LARGE_SEARCH_RESULTS = {
|
|||
// - Non-overlapping snippets can be joined with a " ... " in between.
|
||||
//
|
||||
|
||||
std::string maskSnippetsInSearchResults(std::string s)
|
||||
typedef std::vector<std::string> Snippets;
|
||||
|
||||
const char SNIPPET_REGEX_FOR_HTML[] = "<cite>(.+)</cite>";
|
||||
|
||||
std::string maskSnippetsInHtmlSearchResults(std::string s)
|
||||
{
|
||||
return replace(s, "<cite>.+</cite>", "<cite>SNIPPET TEXT WAS MASKED</cite>");
|
||||
return replace(s, SNIPPET_REGEX_FOR_HTML, "<cite>SNIPPET TEXT WAS MASKED</cite>");
|
||||
}
|
||||
|
||||
Snippets extractSearchResultSnippetsFromHtml(const std::string& html)
|
||||
{
|
||||
Snippets snippets;
|
||||
const std::regex snippetRegex(SNIPPET_REGEX_FOR_HTML);
|
||||
std::sregex_iterator snippetIt(html.begin(), html.end(), snippetRegex);
|
||||
const std::sregex_iterator end;
|
||||
for ( ; snippetIt != end; ++snippetIt)
|
||||
{
|
||||
const std::smatch snippetMatch = *snippetIt;
|
||||
snippets.push_back(snippetMatch[1].str());
|
||||
}
|
||||
return snippets;
|
||||
}
|
||||
|
||||
const char SNIPPET_REGEX_FOR_XML[] = "<description>(?!Search result for)(.+)</description>";
|
||||
|
||||
std::string maskSnippetsInXmlSearchResults(std::string s)
|
||||
{
|
||||
return replace(s, SNIPPET_REGEX_FOR_XML, "<description>SNIPPET TEXT WAS MASKED</description>");
|
||||
}
|
||||
|
||||
Snippets extractSearchResultSnippetsFromXml(const std::string& xml)
|
||||
{
|
||||
Snippets snippets;
|
||||
const std::regex snippetRegex(SNIPPET_REGEX_FOR_XML);
|
||||
std::sregex_iterator snippetIt(xml.begin(), xml.end(), snippetRegex);
|
||||
const std::sregex_iterator end;
|
||||
for ( ; snippetIt != end; ++snippetIt)
|
||||
{
|
||||
const std::smatch snippetMatch = *snippetIt;
|
||||
snippets.push_back(snippetMatch[1].str());
|
||||
}
|
||||
return snippets;
|
||||
}
|
||||
|
||||
bool isValidSnippet(const std::string& s)
|
||||
|
@ -583,8 +670,6 @@ bool isSubSnippet(std::string subSnippet, const std::string& superSnippet)
|
|||
#define RAYCHARLESZIMID "6f1d19d0-633f-087b-fb55-7ac324ff9baf"
|
||||
#define EXAMPLEZIMID "5dc0b3af-5df2-0925-f0ca-d2bf75e78af6"
|
||||
|
||||
TEST_F(TaskbarlessServerTest, searchResults)
|
||||
{
|
||||
struct TestData
|
||||
{
|
||||
struct PaginationEntry
|
||||
|
@ -599,7 +684,7 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
size_t resultsPerPage;
|
||||
size_t totalResultCount;
|
||||
size_t firstResultIndex;
|
||||
std::vector<std::string> results;
|
||||
std::vector<SearchResult> results;
|
||||
std::vector<PaginationEntry> pagination;
|
||||
|
||||
static std::string makeUrl(const std::string& query, int start, size_t resultsPerPage)
|
||||
|
@ -617,20 +702,38 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
return url;
|
||||
}
|
||||
|
||||
std::string getPattern() const
|
||||
std::string extractQueryValue(const std::string& key) const
|
||||
{
|
||||
const std::string p = "pattern=";
|
||||
const std::string p = key + "=";
|
||||
const size_t i = query.find(p);
|
||||
if (i == std::string::npos) {
|
||||
return "";
|
||||
}
|
||||
std::string r = query.substr(i + p.size());
|
||||
return r.substr(0, r.find("&"));
|
||||
}
|
||||
|
||||
std::string getPattern() const
|
||||
{
|
||||
return extractQueryValue("pattern");
|
||||
}
|
||||
|
||||
std::string getLang() const
|
||||
{
|
||||
return extractQueryValue("books.filter.lang");
|
||||
}
|
||||
|
||||
std::string url() const
|
||||
{
|
||||
return makeUrl(query, start, resultsPerPage);
|
||||
}
|
||||
|
||||
std::string expectedHeader() const
|
||||
std::string xmlSearchUrl() const
|
||||
{
|
||||
return url() + "&format=xml";
|
||||
}
|
||||
|
||||
std::string expectedHtmlHeader() const
|
||||
{
|
||||
if ( totalResultCount == 0 ) {
|
||||
return "\n No results were found for <b>\"" + getPattern() + "\"</b>";
|
||||
|
@ -654,7 +757,7 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
return header;
|
||||
}
|
||||
|
||||
std::string expectedResultsString() const
|
||||
std::string expectedHtmlResultsString() const
|
||||
{
|
||||
if ( results.empty() ) {
|
||||
return "\n ";
|
||||
|
@ -663,13 +766,13 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
std::string s;
|
||||
for ( const auto& r : results ) {
|
||||
s += "\n <li>";
|
||||
s += maskSnippetsInSearchResults(r);
|
||||
s += maskSnippetsInHtmlSearchResults(r.getHtml());
|
||||
s += " </li>";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string expectedFooter() const
|
||||
std::string expectedHtmlFooter() const
|
||||
{
|
||||
if ( pagination.empty() ) {
|
||||
return "\n ";
|
||||
|
@ -697,9 +800,63 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
{
|
||||
return makeSearchResultsHtml(
|
||||
getPattern(),
|
||||
expectedHeader(),
|
||||
expectedResultsString(),
|
||||
expectedFooter()
|
||||
expectedHtmlHeader(),
|
||||
expectedHtmlResultsString(),
|
||||
expectedHtmlFooter()
|
||||
);
|
||||
}
|
||||
|
||||
std::string expectedXmlHeader() const
|
||||
{
|
||||
std::string header = R"(<title>Search: PATTERN</title>
|
||||
<link>URL</link>
|
||||
<description>Search result for PATTERN</description>
|
||||
<opensearch:totalResults>RESULTCOUNT</opensearch:totalResults>
|
||||
<opensearch:startIndex>FIRSTRESULT</opensearch:startIndex>
|
||||
<opensearch:itemsPerPage>ITEMCOUNT</opensearch:itemsPerPage>
|
||||
<atom:link rel="search" type="application/opensearchdescription+xml" href="/ROOT/search/searchdescription.xml"/>
|
||||
<opensearch:Query role="request"
|
||||
searchTerms="PATTERN"LANGQUERY
|
||||
startIndex="FIRSTRESULT"
|
||||
count="ITEMCOUNT"
|
||||
/>)";
|
||||
|
||||
const auto realResultsPerPage = resultsPerPage?resultsPerPage:25;
|
||||
const auto url = makeUrl(query + "&format=xml", firstResultIndex, realResultsPerPage);
|
||||
header = replace(header, "URL", replace(url, "&", "&"));
|
||||
header = replace(header, "FIRSTRESULT", to_string(firstResultIndex));
|
||||
header = replace(header, "ITEMCOUNT", to_string(realResultsPerPage));
|
||||
header = replace(header, "RESULTCOUNT", to_string(totalResultCount));
|
||||
header = replace(header, "PATTERN", getPattern());
|
||||
auto queryLang = getLang();
|
||||
if (queryLang.empty()) {
|
||||
header = replace(header, "LANGQUERY", "");
|
||||
} else {
|
||||
header = replace(header, "LANGQUERY", "\n language=\""+queryLang+"\"");
|
||||
}
|
||||
return header;
|
||||
}
|
||||
|
||||
std::string expectedXmlResultsString() const
|
||||
{
|
||||
if ( results.empty() ) {
|
||||
return "\n ";
|
||||
}
|
||||
|
||||
std::string s;
|
||||
for ( const auto& r : results ) {
|
||||
s += "\n <item>\n";
|
||||
s += maskSnippetsInXmlSearchResults(r.getXml());
|
||||
s += "\n </item>";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string expectedXml() const
|
||||
{
|
||||
return makeSearchResultsXml(
|
||||
expectedXmlHeader(),
|
||||
expectedXmlResultsString()
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -708,28 +865,25 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
return TestContext{ { "url", url() } };
|
||||
}
|
||||
|
||||
void check(const std::string& html) const
|
||||
TestContext xmlTestContext() const
|
||||
{
|
||||
EXPECT_EQ(maskSnippetsInSearchResults(html), expectedHtml())
|
||||
return TestContext{ { "url", xmlSearchUrl() } };
|
||||
}
|
||||
|
||||
void checkHtml(const std::string& html) const
|
||||
{
|
||||
EXPECT_EQ(maskSnippetsInHtmlSearchResults(html), expectedHtml())
|
||||
<< testContext();
|
||||
|
||||
checkSnippets(extractSearchResultSnippets(html));
|
||||
checkSnippets(extractSearchResultSnippetsFromHtml(html));
|
||||
}
|
||||
|
||||
typedef std::vector<std::string> Snippets;
|
||||
void checkXml(const std::string& xml) const
|
||||
{
|
||||
EXPECT_EQ(maskSnippetsInXmlSearchResults(xml), expectedXml())
|
||||
<< xmlTestContext();
|
||||
|
||||
static Snippets extractSearchResultSnippets(const std::string& html)
|
||||
{
|
||||
Snippets snippets;
|
||||
const std::regex snippetRegex("<cite>(.*)</cite>");
|
||||
std::sregex_iterator snippetIt(html.begin(), html.end(), snippetRegex);
|
||||
const std::sregex_iterator end;
|
||||
for ( ; snippetIt != end; ++snippetIt)
|
||||
{
|
||||
const std::smatch snippetMatch = *snippetIt;
|
||||
snippets.push_back(snippetMatch[1].str());
|
||||
}
|
||||
return snippets;
|
||||
checkSnippets(extractSearchResultSnippetsFromXml(xml));
|
||||
}
|
||||
|
||||
void checkSnippets(const Snippets& snippets) const
|
||||
|
@ -738,14 +892,9 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
for ( size_t i = 0; i < results.size(); ++i )
|
||||
{
|
||||
const auto& r = results[i];
|
||||
const auto expectedSnippet = extractSearchResultSnippets(r);
|
||||
ASSERT_EQ(1u, expectedSnippet.size())
|
||||
<< "Multiple snippets in test data:"
|
||||
<< "\n" << r;
|
||||
|
||||
if ( snippets[i] != expectedSnippet[0] ) {
|
||||
if ( snippets[i] != r.snippet ) {
|
||||
std::cout << "Trying a weaker check for a mismatching snippet...\n";
|
||||
checkMismatchingSnippet(snippets[i], expectedSnippet[0]);
|
||||
checkMismatchingSnippet(snippets[i], r.snippet);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -767,6 +916,8 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
}
|
||||
};
|
||||
|
||||
TEST_F(ServerTest, searchResults)
|
||||
{
|
||||
const TestData testData[] = {
|
||||
{
|
||||
/* query */ "pattern=velomanyunkan&books.id=" RAYCHARLESZIMID,
|
||||
|
@ -1301,8 +1452,17 @@ TEST_F(TaskbarlessServerTest, searchResults)
|
|||
};
|
||||
|
||||
for ( const auto& t : testData ) {
|
||||
const auto r = zfs1_->GET(t.url().c_str());
|
||||
EXPECT_EQ(r->status, 200);
|
||||
t.check(r->body);
|
||||
const std::string htmlSearchUrl = t.url();
|
||||
const auto htmlRes = taskbarlessZimFileServer().GET(htmlSearchUrl.c_str());
|
||||
EXPECT_EQ(htmlRes->status, 200);
|
||||
t.checkHtml(htmlRes->body);
|
||||
|
||||
const std::string xmlSearchUrl = t.xmlSearchUrl();
|
||||
const auto xmlRes1 = zfs1_->GET(xmlSearchUrl.c_str());
|
||||
const auto xmlRes2 = taskbarlessZimFileServer().GET(xmlSearchUrl.c_str());
|
||||
EXPECT_EQ(xmlRes1->status, 200);
|
||||
EXPECT_EQ(xmlRes2->status, 200);
|
||||
EXPECT_EQ(xmlRes1->body, xmlRes2->body);
|
||||
t.checkXml(xmlRes1->body);
|
||||
}
|
||||
}
|
|
@ -134,6 +134,9 @@ ZimFileServer::~ZimFileServer()
|
|||
|
||||
class ServerTest : public ::testing::Test
|
||||
{
|
||||
private:
|
||||
std::unique_ptr<ZimFileServer> taskbarlessZfs_;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ZimFileServer> zfs1_;
|
||||
|
||||
|
@ -149,16 +152,16 @@ protected:
|
|||
zfs1_.reset(new ZimFileServer(SERVER_PORT, /*withTaskbar=*/true, ZIMFILES));
|
||||
}
|
||||
|
||||
ZimFileServer& taskbarlessZimFileServer()
|
||||
{
|
||||
if ( ! taskbarlessZfs_ ) {
|
||||
taskbarlessZfs_.reset(new ZimFileServer(SERVER_PORT+1, /*withTaskbar=*/false, ZIMFILES));
|
||||
}
|
||||
return *taskbarlessZfs_;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
zfs1_.reset();
|
||||
taskbarlessZfs_.reset();
|
||||
}
|
||||
};
|
||||
|
||||
class TaskbarlessServerTest : public ServerTest
|
||||
{
|
||||
protected:
|
||||
void SetUp() override {
|
||||
zfs1_.reset(new ZimFileServer(SERVER_PORT, /*withTaskbar=*/false, ZIMFILES));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue