mirror of https://github.com/kiwix/libkiwix.git
Merge pull request #724 from kiwix/search_improvement
This commit is contained in:
commit
95d4dd63ac
|
@ -109,6 +109,54 @@ unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
|
|||
}
|
||||
} // unnamed namespace
|
||||
|
||||
SearchInfo::SearchInfo(const std::string& pattern)
|
||||
: pattern(pattern),
|
||||
geoQuery()
|
||||
{}
|
||||
|
||||
SearchInfo::SearchInfo(const std::string& pattern, GeoQuery geoQuery)
|
||||
: pattern(pattern),
|
||||
geoQuery(geoQuery)
|
||||
{}
|
||||
|
||||
SearchInfo::SearchInfo(const RequestContext& request)
|
||||
: pattern(request.get_optional_param<std::string>("pattern", "")),
|
||||
geoQuery(),
|
||||
bookName(request.get_optional_param<std::string>("content", ""))
|
||||
{
|
||||
/* Retrive geo search */
|
||||
try {
|
||||
auto latitude = request.get_argument<float>("latitude");
|
||||
auto longitude = request.get_argument<float>("longitude");
|
||||
auto distance = request.get_argument<float>("distance");
|
||||
geoQuery = GeoQuery(latitude, longitude, distance);
|
||||
} catch(const std::out_of_range&) {}
|
||||
catch(const std::invalid_argument&) {}
|
||||
|
||||
if (!geoQuery && pattern.empty()) {
|
||||
throw std::invalid_argument("No query provided.");
|
||||
}
|
||||
}
|
||||
|
||||
zim::Query SearchInfo::getZimQuery(bool verbose) const {
|
||||
zim::Query query;
|
||||
if (verbose) {
|
||||
std::cout << "Performing query '" << pattern<< "'";
|
||||
}
|
||||
query.setQuery(pattern);
|
||||
if (geoQuery) {
|
||||
if (verbose) {
|
||||
std::cout << " with geo query '" << geoQuery.distance << "&(" << geoQuery.latitude << ";" << geoQuery.longitude << ")'";
|
||||
}
|
||||
query.setGeorange(geoQuery.latitude, geoQuery.longitude, geoQuery.distance);
|
||||
}
|
||||
if (verbose) {
|
||||
std::cout << std::endl;
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
|
||||
static IdNameMapper defaultNameMapper;
|
||||
|
||||
static MHD_Result staticHandlerCallback(void* cls,
|
||||
|
@ -499,43 +547,27 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
printf("** running handle_search\n");
|
||||
}
|
||||
|
||||
std::string patternString;
|
||||
try {
|
||||
patternString = request.get_argument("pattern");
|
||||
} catch (const std::out_of_range&) {}
|
||||
auto searchInfo = SearchInfo(request);
|
||||
|
||||
/* Retrive geo search */
|
||||
bool has_geo_query = false;
|
||||
float latitude = 0;
|
||||
float longitude = 0;
|
||||
float distance = 0;
|
||||
try {
|
||||
latitude = request.get_argument<float>("latitude");
|
||||
longitude = request.get_argument<float>("longitude");
|
||||
distance = request.get_argument<float>("distance");
|
||||
has_geo_query = true;
|
||||
} catch(const std::out_of_range&) {}
|
||||
catch(const std::invalid_argument&) {}
|
||||
|
||||
std::string bookName, bookId;
|
||||
std::string bookId;
|
||||
std::shared_ptr<zim::Archive> archive;
|
||||
if (!searchInfo.bookName.empty()) {
|
||||
try {
|
||||
bookName = request.get_argument("content");
|
||||
bookId = mp_nameMapper->getIdForName(bookName);
|
||||
bookId = mp_nameMapper->getIdForName(searchInfo.bookName);
|
||||
archive = mp_library->getArchiveById(bookId);
|
||||
} catch (const std::out_of_range&) {}
|
||||
|
||||
/* Make the search */
|
||||
if ( (!archive && !bookName.empty())
|
||||
|| (patternString.empty() && ! has_geo_query) ) {
|
||||
auto data = get_default_data();
|
||||
data.set("pattern", encodeDiples(patternString));
|
||||
data.set("root", m_root);
|
||||
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
|
||||
response->set_code(MHD_HTTP_NOT_FOUND);
|
||||
return withTaskbarInfo(bookName, archive.get(), std::move(response));
|
||||
} catch (const std::out_of_range&) {
|
||||
throw std::invalid_argument("The requested book doesn't exist.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Make the search */
|
||||
// Try to get a search from the searchInfo, else build it
|
||||
std::shared_ptr<zim::Search> search;
|
||||
try {
|
||||
search = searchCache.getOrPut(searchInfo,
|
||||
[=](){
|
||||
std::shared_ptr<zim::Searcher> searcher;
|
||||
if (archive) {
|
||||
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
|
||||
|
@ -551,6 +583,19 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
}
|
||||
}
|
||||
}
|
||||
return make_shared<zim::Search>(searcher->search(searchInfo.getZimQuery(m_verbose.load())));
|
||||
}
|
||||
);
|
||||
} catch(std::runtime_error& e) {
|
||||
// Searcher->search will throw a runtime error if there is no valid xapian database to do the search.
|
||||
// (in case of zim file not containing a index)
|
||||
auto data = get_default_data();
|
||||
data.set("pattern", encodeDiples(searchInfo.pattern));
|
||||
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
|
||||
response->set_code(MHD_HTTP_NOT_FOUND);
|
||||
return withTaskbarInfo(searchInfo.bookName, archive.get(), std::move(response));
|
||||
}
|
||||
|
||||
|
||||
auto start = 0;
|
||||
try {
|
||||
|
@ -569,41 +614,19 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
|
|||
}
|
||||
|
||||
/* Get the results */
|
||||
std::string queryString;
|
||||
try {
|
||||
zim::Query query;
|
||||
if (patternString.empty()) {
|
||||
// Execute geo-search
|
||||
if (m_verbose.load()) {
|
||||
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
|
||||
}
|
||||
|
||||
query.setQuery("");
|
||||
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
|
||||
query.setGeorange(latitude, longitude, distance);
|
||||
} else {
|
||||
// Execute Ft search
|
||||
if (m_verbose.load()) {
|
||||
cout << "Performing query `" << patternString << "'" << endl;
|
||||
}
|
||||
|
||||
queryString = "FT:" + removeAccents(patternString);
|
||||
query.setQuery(queryString);
|
||||
}
|
||||
queryString = bookId + queryString;
|
||||
|
||||
std::shared_ptr<zim::Search> search;
|
||||
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});
|
||||
|
||||
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
|
||||
search->getEstimatedMatches());
|
||||
renderer.setSearchPattern(patternString);
|
||||
renderer.setSearchContent(bookName);
|
||||
renderer.setSearchPattern(searchInfo.pattern);
|
||||
renderer.setSearchContent(searchInfo.bookName);
|
||||
renderer.setProtocolPrefix(m_root + "/");
|
||||
renderer.setSearchProtocolPrefix(m_root + "/search?");
|
||||
renderer.setPageLength(pageLength);
|
||||
auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8");
|
||||
return withTaskbarInfo(bookName, archive.get(), std::move(response));
|
||||
return withTaskbarInfo(searchInfo.bookName, archive.get(), std::move(response));
|
||||
} catch (const std::invalid_argument& e) {
|
||||
return HTTP400HtmlResponse(*this, request)
|
||||
+ invalidUrlMsg
|
||||
+ std::string(e.what());
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
return Response::build_500(*this, e.what());
|
||||
|
|
|
@ -43,9 +43,53 @@ extern "C" {
|
|||
|
||||
namespace kiwix {
|
||||
|
||||
struct GeoQuery {
|
||||
GeoQuery()
|
||||
: GeoQuery(0, 0, -1)
|
||||
{}
|
||||
|
||||
GeoQuery(float latitude, float longitude, float distance)
|
||||
: latitude(latitude), longitude(longitude), distance(distance)
|
||||
{}
|
||||
float latitude;
|
||||
float longitude;
|
||||
float distance;
|
||||
|
||||
explicit operator bool() const {
|
||||
return distance >= 0;
|
||||
}
|
||||
|
||||
friend bool operator<(const GeoQuery& l, const GeoQuery& r)
|
||||
{
|
||||
return std::tie(l.latitude, l.longitude, l.distance)
|
||||
< std::tie(r.latitude, r.longitude, r.distance); // keep the same order
|
||||
}
|
||||
};
|
||||
|
||||
class SearchInfo {
|
||||
public:
|
||||
SearchInfo(const std::string& pattern);
|
||||
SearchInfo(const std::string& pattern, GeoQuery geoQuery);
|
||||
SearchInfo(const RequestContext& request);
|
||||
|
||||
zim::Query getZimQuery(bool verbose) const;
|
||||
|
||||
friend bool operator<(const SearchInfo& l, const SearchInfo& r)
|
||||
{
|
||||
return std::tie(l.bookName, l.pattern, l.geoQuery)
|
||||
< std::tie(r.bookName, r.pattern, r.geoQuery); // keep the same order
|
||||
}
|
||||
|
||||
public: //data
|
||||
std::string pattern;
|
||||
GeoQuery geoQuery;
|
||||
std::string bookName;
|
||||
};
|
||||
|
||||
|
||||
typedef kainjow::mustache::data MustacheData;
|
||||
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
|
||||
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;
|
||||
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
|
||||
typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache;
|
||||
|
||||
class Entry;
|
||||
|
|
|
@ -110,6 +110,7 @@ std::unique_ptr<ContentResponse> Response::build_404(const InternalServer& serve
|
|||
}
|
||||
|
||||
extern const UrlNotFoundMsg urlNotFoundMsg;
|
||||
extern const InvalidUrlMsg invalidUrlMsg;
|
||||
|
||||
std::unique_ptr<ContentResponse> ContentResponseBlueprint::generateResponseObject() const
|
||||
{
|
||||
|
@ -145,6 +146,36 @@ HTTP404HtmlResponse& HTTP404HtmlResponse::operator+(const std::string& msg)
|
|||
return *this;
|
||||
}
|
||||
|
||||
HTTP400HtmlResponse::HTTP400HtmlResponse(const InternalServer& server,
|
||||
const RequestContext& request)
|
||||
: ContentResponseBlueprint(&server,
|
||||
&request,
|
||||
MHD_HTTP_BAD_REQUEST,
|
||||
"text/html",
|
||||
RESOURCE::templates::_400_html)
|
||||
{
|
||||
kainjow::mustache::list emptyList;
|
||||
this->m_data = kainjow::mustache::object{{"details", emptyList}};
|
||||
}
|
||||
|
||||
HTTP400HtmlResponse& HTTP400HtmlResponse::operator+(InvalidUrlMsg /*unused*/)
|
||||
{
|
||||
std::string requestUrl = m_request.get_full_url();
|
||||
const auto query = m_request.get_query();
|
||||
if (!query.empty()) {
|
||||
requestUrl += "?" + encodeDiples(query);
|
||||
}
|
||||
kainjow::mustache::mustache msgTmpl(R"(The requested URL "{{{url}}}" is not a valid request.)");
|
||||
return *this + msgTmpl.render({"url", requestUrl});
|
||||
}
|
||||
|
||||
HTTP400HtmlResponse& HTTP400HtmlResponse::operator+(const std::string& msg)
|
||||
{
|
||||
m_data["details"].push_back({"p", msg});
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
ContentResponseBlueprint& ContentResponseBlueprint::operator+(const TaskbarInfo& taskbarInfo)
|
||||
{
|
||||
this->m_taskbarInfo.reset(new TaskbarInfo(taskbarInfo));
|
||||
|
|
|
@ -207,6 +207,20 @@ struct HTTP404HtmlResponse : ContentResponseBlueprint
|
|||
HTTP404HtmlResponse& operator+(const std::string& errorDetails);
|
||||
};
|
||||
|
||||
class InvalidUrlMsg {};
|
||||
|
||||
extern const InvalidUrlMsg invalidUrlMsg;
|
||||
|
||||
struct HTTP400HtmlResponse : ContentResponseBlueprint
|
||||
{
|
||||
HTTP400HtmlResponse(const InternalServer& server,
|
||||
const RequestContext& request);
|
||||
|
||||
using ContentResponseBlueprint::operator+;
|
||||
HTTP400HtmlResponse& operator+(InvalidUrlMsg /*unused*/);
|
||||
HTTP400HtmlResponse& operator+(const std::string& errorDetails);
|
||||
};
|
||||
|
||||
class ItemResponse : public Response {
|
||||
public:
|
||||
ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
|
||||
|
|
|
@ -35,6 +35,7 @@ skin/block_external.js
|
|||
skin/search_results.css
|
||||
templates/search_result.html
|
||||
templates/no_search_result.html
|
||||
templates/400.html
|
||||
templates/404.html
|
||||
templates/500.html
|
||||
templates/index.html
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<meta content="text/html;charset=UTF-8" http-equiv="content-type" />
|
||||
<title>Invalid request</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Invalid request</h1>
|
||||
{{#details}}
|
||||
<p>
|
||||
{{{p}}}
|
||||
</p>
|
||||
{{/details}}
|
||||
</body>
|
||||
</html>
|
|
@ -107,11 +107,11 @@
|
|||
</b> of <b>
|
||||
{{count}}
|
||||
</b> for <b>
|
||||
{{searchPattern}}
|
||||
"{{{searchPattern}}}"
|
||||
</b>
|
||||
{{/hasResults}}
|
||||
{{^hasResults}}
|
||||
No results were found for <b>{{searchPattern}}</b>
|
||||
No results were found for <b>"{{{searchPattern}}}"</b>
|
||||
{{/hasResults}}
|
||||
</div>
|
||||
|
||||
|
|
125
test/server.cpp
125
test/server.cpp
|
@ -287,6 +287,22 @@ TEST_F(ServerTest, UncompressibleContentIsNotCompressed)
|
|||
}
|
||||
}
|
||||
|
||||
const char* urls400[] = {
|
||||
"/ROOT/search",
|
||||
"/ROOT/search?content=zimfile",
|
||||
"/ROOT/search?content=non-existing-book&pattern=asdfqwerty",
|
||||
"/ROOT/search?content=non-existing-book&pattern=asd<qwerty",
|
||||
"/ROOT/search?pattern"
|
||||
};
|
||||
|
||||
|
||||
TEST_F(ServerTest, 400)
|
||||
{
|
||||
for (const char* url: urls400 ) {
|
||||
EXPECT_EQ(400, zfs1_->GET(url)->status) << "url: " << url;
|
||||
}
|
||||
}
|
||||
|
||||
const char* urls404[] = {
|
||||
"/",
|
||||
"/zimfile",
|
||||
|
@ -302,8 +318,6 @@ const char* urls404[] = {
|
|||
"/ROOT/meta?content=non-existent-book&name=title",
|
||||
"/ROOT/random",
|
||||
"/ROOT/random?content=non-existent-book",
|
||||
"/ROOT/search",
|
||||
"/ROOT/search?content=non-existing-book&pattern=asdfqwerty",
|
||||
"/ROOT/suggest",
|
||||
"/ROOT/suggest?content=non-existent-book&term=abcd",
|
||||
"/ROOT/catch/external",
|
||||
|
@ -319,8 +333,9 @@ const char* urls404[] = {
|
|||
|
||||
TEST_F(ServerTest, 404)
|
||||
{
|
||||
for ( const char* url : urls404 )
|
||||
for ( const char* url : urls404 ) {
|
||||
EXPECT_EQ(404, zfs1_->GET(url)->status) << "url: " << url;
|
||||
}
|
||||
}
|
||||
|
||||
namespace TestingOfHtmlResponses
|
||||
|
@ -388,13 +403,14 @@ public:
|
|||
: ExpectedResponseData(erd)
|
||||
, url(url)
|
||||
{}
|
||||
virtual ~TestContentIn404HtmlResponse() = default;
|
||||
|
||||
const std::string url;
|
||||
|
||||
std::string expectedResponse() const;
|
||||
|
||||
private:
|
||||
std::string pageTitle() const;
|
||||
virtual std::string pageTitle() const;
|
||||
std::string pageCssLink() const;
|
||||
std::string hiddenBookNameInput() const;
|
||||
std::string searchPatternInput() const;
|
||||
|
@ -521,6 +537,25 @@ std::string TestContentIn404HtmlResponse::taskbarLinks() const
|
|||
+ R"("><button>🎲</button></a>)";
|
||||
}
|
||||
|
||||
class TestContentIn400HtmlResponse : public TestContentIn404HtmlResponse
|
||||
{
|
||||
public:
|
||||
TestContentIn400HtmlResponse(const std::string& url,
|
||||
const ExpectedResponseData& erd)
|
||||
: TestContentIn404HtmlResponse(url, erd)
|
||||
{}
|
||||
|
||||
private:
|
||||
std::string pageTitle() const;
|
||||
};
|
||||
|
||||
std::string TestContentIn400HtmlResponse::pageTitle() const {
|
||||
return expectedPageTitle.empty()
|
||||
? "Invalid request"
|
||||
: expectedPageTitle;
|
||||
}
|
||||
|
||||
|
||||
} // namespace TestingOfHtmlResponses
|
||||
|
||||
TEST_F(ServerTest, 404WithBodyTesting)
|
||||
|
@ -650,28 +685,70 @@ TEST_F(ServerTest, 404WithBodyTesting)
|
|||
Cannot find content entry invalid-article
|
||||
</p>
|
||||
)" },
|
||||
};
|
||||
|
||||
{ /* url */ "/ROOT/search?content=zimfile",
|
||||
expected_page_title=="Fulltext search unavailable" &&
|
||||
expected_css_url=="/ROOT/skin/search_results.css" &&
|
||||
book_name=="zimfile" &&
|
||||
book_title=="Ray Charles" &&
|
||||
expected_body==R"(
|
||||
<div class="header">Not found</div>
|
||||
for ( const auto& t : testData ) {
|
||||
const TestContext ctx{ {"url", t.url} };
|
||||
const auto r = zfs1_->GET(t.url.c_str());
|
||||
EXPECT_EQ(r->status, 404) << ctx;
|
||||
EXPECT_EQ(r->body, t.expectedResponse()) << ctx;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ServerTest, 400WithBodyTesting)
|
||||
{
|
||||
using namespace TestingOfHtmlResponses;
|
||||
const std::vector<TestContentIn400HtmlResponse> testData{
|
||||
{ /* url */ "/ROOT/search",
|
||||
expected_body== R"(
|
||||
<h1>Invalid request</h1>
|
||||
<p>
|
||||
There is no article with the title <b> ""</b>
|
||||
and the fulltext search engine is not available for this content.
|
||||
The requested URL "/ROOT/search" is not a valid request.
|
||||
</p>
|
||||
<p>
|
||||
No query provided.
|
||||
</p>
|
||||
)" },
|
||||
|
||||
{ /* url */ "/ROOT/search?content=non-existent-book&pattern=asdfqwerty",
|
||||
expected_page_title=="Fulltext search unavailable" &&
|
||||
expected_css_url=="/ROOT/skin/search_results.css" &&
|
||||
{ /* url */ "/ROOT/search?content=zimfile",
|
||||
expected_body==R"(
|
||||
<div class="header">Not found</div>
|
||||
<h1>Invalid request</h1>
|
||||
<p>
|
||||
There is no article with the title <b> "asdfqwerty"</b>
|
||||
and the fulltext search engine is not available for this content.
|
||||
The requested URL "/ROOT/search?content=zimfile" is not a valid request.
|
||||
</p>
|
||||
<p>
|
||||
No query provided.
|
||||
</p>
|
||||
)" },
|
||||
{ /* url */ "/ROOT/search?content=non-existing-book&pattern=asdfqwerty",
|
||||
expected_body==R"(
|
||||
<h1>Invalid request</h1>
|
||||
<p>
|
||||
The requested URL "/ROOT/search?content=non-existing-book&pattern=asdfqwerty" is not a valid request.
|
||||
</p>
|
||||
<p>
|
||||
The requested book doesn't exist.
|
||||
</p>
|
||||
)" },
|
||||
{ /* url */ "/ROOT/search?content=non-existing-book&pattern=a\"<script foo>",
|
||||
expected_body==R"(
|
||||
<h1>Invalid request</h1>
|
||||
<p>
|
||||
The requested URL "/ROOT/search?content=non-existing-book&pattern=a"<script foo>" is not a valid request.
|
||||
</p>
|
||||
<p>
|
||||
The requested book doesn't exist.
|
||||
</p>
|
||||
)" },
|
||||
// There is a flaw in our way to handle query string, we cannot differenciate
|
||||
// between `pattern` and `pattern=`
|
||||
{ /* url */ "/ROOT/search?pattern",
|
||||
expected_body==R"(
|
||||
<h1>Invalid request</h1>
|
||||
<p>
|
||||
The requested URL "/ROOT/search?pattern=" is not a valid request.
|
||||
</p>
|
||||
<p>
|
||||
No query provided.
|
||||
</p>
|
||||
)" },
|
||||
};
|
||||
|
@ -679,7 +756,7 @@ TEST_F(ServerTest, 404WithBodyTesting)
|
|||
for ( const auto& t : testData ) {
|
||||
const TestContext ctx{ {"url", t.url} };
|
||||
const auto r = zfs1_->GET(t.url.c_str());
|
||||
EXPECT_EQ(r->status, 404) << ctx;
|
||||
EXPECT_EQ(r->status, 400) << ctx;
|
||||
EXPECT_EQ(r->body, t.expectedResponse()) << ctx;
|
||||
}
|
||||
}
|
||||
|
@ -728,14 +805,16 @@ TEST_F(ServerTest, RawEntry)
|
|||
|
||||
TEST_F(ServerTest, HeadMethodIsSupported)
|
||||
{
|
||||
for ( const Resource& res : all200Resources() )
|
||||
for ( const Resource& res : all200Resources() ) {
|
||||
EXPECT_EQ(200, zfs1_->HEAD(res.url)->status) << res;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ServerTest, TheResponseToHeadRequestHasNoBody)
|
||||
{
|
||||
for ( const Resource& res : all200Resources() )
|
||||
for ( const Resource& res : all200Resources() ) {
|
||||
EXPECT_TRUE(zfs1_->HEAD(res.url)->body.empty()) << res;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ServerTest, HeadersAreTheSameInResponsesToHeadAndGetRequests)
|
||||
|
|
Loading…
Reference in New Issue