Merge pull request #536 from kiwix/internally_drop_reader_searcher

This commit is contained in:
Matthieu Gautier 2021-07-06 16:18:10 +02:00 committed by GitHub
commit 1c0b4502cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 510 additions and 152 deletions

View File

@ -7,6 +7,7 @@ files=(
"include/common/otherTools.h"
"include/common/regexTools.h"
"include/common/networkTools.h"
"include/common/archiveTools.h"
"include/manager.h"
"include/reader.h"
"include/kiwix.h"
@ -22,6 +23,7 @@ files=(
"src/common/pathTools.cpp"
"src/common/regexTools.cpp"
"src/common/otherTools.cpp"
"src/common/archiveTools.cpp"
"src/common/networkTools.cpp"
"src/common/stringTools.cpp"
"src/xapianSearcher.cpp"

View File

@ -24,6 +24,7 @@
#include <vector>
#include <map>
#include <memory>
#include <zim/archive.h>
#include "book.h"
#include "bookmark.h"
@ -146,6 +147,7 @@ class Library
{
std::map<std::string, kiwix::Book> m_books;
std::map<std::string, std::shared_ptr<Reader>> m_readers;
std::map<std::string, std::shared_ptr<zim::Archive>> m_archives;
std::vector<kiwix::Bookmark> m_bookmarks;
class BookDB;
std::unique_ptr<BookDB> m_bookDB;
@ -198,6 +200,7 @@ class Library
const Book& getBookByPath(const std::string& path) const;
Book& getBookByPath(const std::string& path);
std::shared_ptr<Reader> getReaderById(const std::string& id);
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
/**
* Remove a book from the library.

View File

@ -25,6 +25,7 @@ install_headers(
'tools/pathTools.h',
'tools/regexTools.h',
'tools/stringTools.h',
'tools/archiveTools.h',
subdir:'kiwix/tools'
)

View File

@ -41,11 +41,11 @@ namespace kiwix
* The SuggestionItem is a helper class that contains the info about a single
* suggestion item.
*/
class SuggestionItem
{
// Functions
private:
// Temporarily making the constructor public until the code move is complete
public:
// Create a sugggestion item.
explicit SuggestionItem(std::string title, std::string normalizedTitle,
std::string path, std::string snippet = "") :
@ -91,6 +91,13 @@ class Reader
* (.zim extesion).
*/
explicit Reader(const string zimFilePath);
/**
* Create a Reader to read a zim file given by the Archive.
*
* @param archive The shared pointer to the Archive object.
*/
explicit Reader(const std::shared_ptr<zim::Archive> archive);
#ifndef _WIN32
explicit Reader(int fd);
Reader(int fd, zim::offset_type offset, zim::size_type size);
@ -488,7 +495,7 @@ class Reader
zim::Archive* getZimArchive() const;
protected:
std::unique_ptr<zim::Archive> zimArchive;
std::shared_ptr<zim::Archive> zimArchive;
std::string zimFilePath;
SuggestionsList_t suggestions;

View File

@ -21,6 +21,7 @@
#define KIWIX_SEARCH_RENDERER_H
#include <string>
#include <zim/search.h>
namespace kiwix
{
@ -40,6 +41,8 @@ class SearchRenderer
* Used to generate pagination links.
*/
SearchRenderer(Searcher* searcher, NameMapper* mapper);
SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount);
~SearchRenderer();
@ -74,7 +77,7 @@ class SearchRenderer
protected:
std::string beautifyInteger(const unsigned int number);
Searcher* mp_searcher;
zim::SearchResultSet m_srs;
NameMapper* mp_nameMapper;
std::string searchContent;
std::string searchPattern;

View File

@ -32,6 +32,8 @@
#include "tools/pathTools.h"
#include "tools/stringTools.h"
#include <zim/search.h>
using namespace std;
namespace kiwix
@ -142,6 +144,11 @@ class Searcher
*/
unsigned int getEstimatedResultCount();
/**
* Get a SearchResultSet object for current search
*/
zim::SearchResultSet getSearchResultSet();
unsigned int getResultStart() { return resultStart; }
unsigned int getResultEnd() { return resultEnd; }

View File

@ -0,0 +1,47 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ARCHIVETOOLS_H
#define KIWIX_ARCHIVETOOLS_H
#include <zim/archive.h>
/**
* This file contains all the functions that would make handling data related to
* an archive easier.
**/
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name);
std::string getArchiveTitle(const zim::Archive& archive);
std::string getMetaDescription(const zim::Archive& archive);
std::string getMetaTags(const zim::Archive& archive, bool original = false);
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType);
std::string getMetaLanguage(const zim::Archive& archive);
std::string getMetaName(const zim::Archive& archive);
std::string getMetaDate(const zim::Archive& archive);
std::string getMetaCreator(const zim::Archive& archive);
std::string getMetaPublisher(const zim::Archive& archive);
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
}
#endif

View File

@ -70,5 +70,7 @@ T extractFromString(const std::string& str) {
}
bool startsWith(const std::string& base, const std::string& start);
std::vector<std::string> getTitleVariants(const std::string& title);
} //namespace kiwix
#endif

View File

@ -108,6 +108,7 @@ bool Library::removeBookById(const std::string& id)
{
m_bookDB->delete_document("Q" + id);
m_readers.erase(id);
m_archives.erase(id);
return m_books.erase(id) == 1;
}
@ -146,11 +147,35 @@ std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
return m_readers.at(id);
} catch (std::out_of_range& e) {}
try {
auto reader = make_shared<Reader>(m_archives.at(id));
m_readers[id] = reader;
return reader;
} catch (std::out_of_range& e) {}
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
auto sptr = make_shared<Reader>(book.getPath());
m_readers[id] = sptr;
auto archive = make_shared<zim::Archive>(book.getPath());
m_archives[id] = archive;
auto reader = make_shared<Reader>(archive);
m_readers[id] = reader;
return reader;
}
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
{
try {
return m_archives.at(id);
} catch (std::out_of_range& e) {}
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
auto sptr = make_shared<zim::Archive>(book.getPath());
m_archives[id] = sptr;
return sptr;
}

View File

@ -19,6 +19,7 @@ kiwix_sources = [
'tools/stringTools.cpp',
'tools/networkTools.cpp',
'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp',
'name_mapper.cpp',
'server/byte_range.cpp',

View File

@ -25,6 +25,7 @@
#include <zim/error.h>
#include "tools/otherTools.h"
#include "tools/archiveTools.h"
inline char hi(char v)
{
@ -86,6 +87,11 @@ Reader::Reader(const string zimFilePath)
srand(time(nullptr));
}
Reader::Reader(const std::shared_ptr<zim::Archive> archive)
: zimArchive(archive),
zimFilePath(archive->getFilename())
{}
#ifndef _WIN32
Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)),
@ -183,14 +189,7 @@ Entry Reader::getMainPage() const
bool Reader::getFavicon(string& content, string& mimeType) const
{
try {
auto item = zimArchive->getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
return kiwix::getArchiveFavicon(*zimArchive, content, mimeType);
}
string Reader::getZimFilePath() const
@ -212,47 +211,32 @@ bool Reader::getMetadata(const string& name, string& value) const
string Reader::getName() const
{
METADATA("Name")
return kiwix::getMetaName(*zimArchive);
}
string Reader::getTitle() const
{
string value = zimArchive->getMetadata("Title");
if (value.empty()) {
value = getLastPathElement(zimFilePath);
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
return kiwix::getArchiveTitle(*zimArchive);
}
string Reader::getCreator() const
{
METADATA("Creator")
return kiwix::getMetaCreator(*zimArchive);
}
string Reader::getPublisher() const
{
METADATA("Publisher")
return kiwix::getMetaPublisher(*zimArchive);
}
string Reader::getDate() const
{
METADATA("Date")
return kiwix::getMetaDate(*zimArchive);
}
string Reader::getDescription() const
{
string value;
this->getMetadata("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
this->getMetadata("Subtitle", value);
}
return value;
return kiwix::getMetaDescription(*zimArchive);
}
string Reader::getLongDescription() const
@ -262,7 +246,7 @@ string Reader::getLongDescription() const
string Reader::getLanguage() const
{
METADATA("Language")
return kiwix::getMetaLanguage(*zimArchive);
}
string Reader::getLicense() const
@ -272,13 +256,7 @@ string Reader::getLicense() const
string Reader::getTags(bool original) const
{
string tags_str;
getMetadata("Tags", tags_str);
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
return kiwix::getMetaTags(*zimArchive, original);
}
@ -342,12 +320,8 @@ string Reader::getOrigId() const
Entry Reader::getEntryFromPath(const std::string& path) const
{
if (path.empty() || path == "/") {
return getMainPage();
}
try {
return zimArchive->getEntryByPath(path);
return kiwix::getEntryFromPath(*zimArchive, path);
} catch (zim::EntryNotFound& e) {
throw NoEntry();
}
@ -460,12 +434,7 @@ bool Reader::searchSuggestions(const string& prefix,
std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const
{
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
return kiwix::getTitleVariants(title);
}

View File

@ -37,10 +37,22 @@ namespace kiwix
/* Constructor */
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
: mp_searcher(searcher),
: m_srs(searcher->getSearchResultSet()),
mp_nameMapper(mapper),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?")
searchProtocolPrefix("search://?"),
estimatedResultCount(searcher->getEstimatedResultCount()),
resultStart(searcher->getResultStart())
{}
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount)
: m_srs(srs),
mp_nameMapper(mapper),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
estimatedResultCount(estimatedResultCount),
resultStart(start)
{}
/* Destructor */
@ -70,29 +82,26 @@ std::string SearchRenderer::getHtml()
{
kainjow::mustache::data results{kainjow::mustache::data::type::list};
mp_searcher->restart_search();
Result* p_result = NULL;
while ((p_result = mp_searcher->getNextResult())) {
for (auto it = m_srs.begin(); it != m_srs.end(); it++) {
kainjow::mustache::data result;
result.set("title", p_result->get_title());
result.set("url", p_result->get_url());
result.set("snippet", p_result->get_snippet());
result.set("resultContentId", mp_nameMapper->getNameForId(p_result->get_zimId()));
result.set("title", it.getTitle());
result.set("url", it.getPath());
result.set("snippet", it.getSnippet());
std::ostringstream s;
s << it.getZimId();
result.set("resultContentId", mp_nameMapper->getNameForId(s.str()));
if (p_result->get_wordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount()));
if (it.getWordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(it.getWordCount()));
}
results.push_back(result);
delete p_result;
}
// pages
kainjow::mustache::data pages{kainjow::mustache::data::type::list};
auto resultStart = mp_searcher->getResultStart();
auto resultEnd = 0U;
auto estimatedResultCount = mp_searcher->getEstimatedResultCount();
auto currentPage = 0U;
auto pageStart = 0U;
auto pageEnd = 0U;

View File

@ -228,6 +228,11 @@ unsigned int Searcher::getEstimatedResultCount()
return this->estimatedResultCount;
}
zim::SearchResultSet Searcher::getSearchResultSet()
{
return *(this->internal);
}
_Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator)
{

View File

@ -47,6 +47,7 @@ extern "C" {
#include "tools/pathTools.h"
#include "tools/regexTools.h"
#include "tools/stringTools.h"
#include "tools/archiveTools.h"
#include "library.h"
#include "name_mapper.h"
#include "entry.h"
@ -55,6 +56,10 @@ extern "C" {
#include "opds_dumper.h"
#include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <mustache.hpp>
@ -323,22 +328,63 @@ std::unique_ptr<Response> InternalServer::build_homepage(const RequestContext& r
return ContentResponse::build(*this, RESOURCE::templates::index_html, get_default_data(), "text/html; charset=utf-8", true);
}
/**
* Archive and Zim handlers begin
**/
// TODO: retrieve searcher from caching mechanism
SuggestionsList_t getSuggestions(const zim::Archive* const archive,
const std::string& queryString, int suggestionCount)
{
SuggestionsList_t suggestions;
if (archive->hasTitleIndex()) {
auto searcher = zim::Searcher(*archive);
zim::Query suggestionQuery;
suggestionQuery.setQuery(queryString, true);
auto suggestionSearch = searcher.search(suggestionQuery);
auto suggestionResult = suggestionSearch.getResults(0, suggestionCount);
for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath(), it.getSnippet());
suggestions.push_back(suggestion);
}
} else {
// TODO: This case should be handled by libzim
std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
for (auto& entry: archive->findByTitle(*it)) {
SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()),
entry.getPath());
suggestions.push_back(suggestion);
currCount++;
}
}
}
return suggestions;
}
/**
* Archive and Zim handlers end
**/
std::unique_ptr<Response> InternalServer::handle_meta(const RequestContext& request)
{
std::string bookName;
std::string bookId;
std::string meta_name;
std::shared_ptr<Reader> reader;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
meta_name = request.get_argument("name");
reader = mp_library->getReaderById(bookId);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range& e) {
return Response::build_404(*this, request, bookName, "");
}
if (reader == nullptr) {
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
@ -346,23 +392,23 @@ std::unique_ptr<Response> InternalServer::handle_meta(const RequestContext& requ
std::string mimeType = "text";
if (meta_name == "title") {
content = reader->getTitle();
content = getArchiveTitle(*archive);
} else if (meta_name == "description") {
content = reader->getDescription();
content = getMetaDescription(*archive);
} else if (meta_name == "language") {
content = reader->getLanguage();
content = getMetaLanguage(*archive);
} else if (meta_name == "name") {
content = reader->getName();
content = getMetaName(*archive);
} else if (meta_name == "tags") {
content = reader->getTags();
content = getMetaTags(*archive);
} else if (meta_name == "date") {
content = reader->getDate();
content = getMetaDate(*archive);
} else if (meta_name == "creator") {
content = reader->getCreator();
content = getMetaCreator(*archive);
} else if (meta_name == "publisher") {
content = reader->getPublisher();
content = getMetaPublisher(*archive);
} else if (meta_name == "favicon") {
reader->getFavicon(content, mimeType);
getArchiveFavicon(*archive, content, mimeType);
} else {
return Response::build_404(*this, request, bookName, "");
}
@ -385,28 +431,31 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
std::string bookName;
std::string bookId;
std::string term;
std::shared_ptr<Reader> reader;
std::string queryString;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
term = request.get_argument("term");
reader = mp_library->getReaderById(bookId);
queryString = request.get_argument("term");
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, bookName, "");
}
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
if (m_verbose.load()) {
printf("Searching suggestions for: \"%s\"\n", term.c_str());
printf("Searching suggestions for: \"%s\"\n", queryString.c_str());
}
MustacheData results{MustacheData::type::list};
bool first = true;
if (reader != nullptr) {
/* Get the suggestions */
SuggestionsList_t suggestions;
reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions);
SuggestionsList_t suggestions = getSuggestions(archive.get(), queryString, maxSuggestionCount);
for(auto& suggestion:suggestions) {
MustacheData result;
result.set("label", suggestion.getTitle());
@ -423,13 +472,13 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
results.push_back(result);
suggestionCount++;
}
}
/* Propose the fulltext search if possible */
if (reader->hasFulltextIndex()) {
if (archive->hasFulltextIndex()) {
MustacheData result;
result.set("label", "containing '" + term + "'...");
result.set("value", term + " ");
result.set("label", "containing '" + queryString + "'...");
result.set("value", queryString + " ");
result.set("kind", "pattern");
result.set("first", first);
results.push_back(result);
@ -492,30 +541,34 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}
std::shared_ptr<Reader> reader(nullptr);
std::shared_ptr<zim::Archive> archive;
try {
reader = mp_library->getReaderById(bookId);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {}
/* Make the search */
if ( (!reader && !bookName.empty())
if ( (!archive && !bookName.empty())
|| (patternString.empty() && ! has_geo_query) ) {
auto data = get_default_data();
data.set("pattern", encodeDiples(patternString));
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
response->set_taskbar(bookName, reader ? reader->getTitle() : "");
response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : "");
response->set_code(MHD_HTTP_NOT_FOUND);
return std::move(response);
}
Searcher searcher;
if (reader) {
searcher.add_reader(reader.get());
std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = std::make_shared<zim::Searcher>(*archive);
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentReader = mp_library->getReaderById(bookId);
if (currentReader) {
searcher.add_reader(currentReader.get());
auto currentArchive = mp_library->getArchiveById(bookId);
if (currentArchive) {
if (! searcher) {
searcher = std::make_shared<zim::Searcher>(*currentArchive);
} else {
searcher->add_archive(*currentArchive);
}
}
}
}
@ -540,21 +593,37 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
/* Get the results */
try {
zim::Query query;
if (patternString.empty()) {
searcher.geo_search(latitude, longitude, distance,
start, end, m_verbose.load());
} else {
searcher.search(patternString,
start, end, m_verbose.load());
// Execute geo-search
if (m_verbose.load()) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
SearchRenderer renderer(&searcher, mp_nameMapper);
query.setVerbose(m_verbose.load());
query.setQuery("", false);
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
if (m_verbose.load()) {
cout << "Performing query `" << patternString << "'" << endl;
}
std::string queryString = removeAccents(patternString);
query.setQuery(queryString, false);
query.setVerbose(m_verbose.load());
}
zim::Search search = searcher->search(query);
SearchRenderer renderer(search.getResults(start, end), mp_nameMapper, start,
search.getEstimatedMatches());
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");
renderer.setSearchProtocolPrefix(m_root + "/search?");
renderer.setPageLength(pageLength);
auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8");
response->set_taskbar(bookName, reader ? reader->getTitle() : "");
response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : "");
return std::move(response);
} catch (const std::exception& e) {
@ -571,23 +640,23 @@ std::unique_ptr<Response> InternalServer::handle_random(const RequestContext& re
std::string bookName;
std::string bookId;
std::shared_ptr<Reader> reader;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {
return Response::build_404(*this, request, bookName, "");
}
if (reader == nullptr) {
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
try {
auto entry = reader->getRandomPage();
return build_redirect(bookName, entry.getFinalEntry());
} catch(kiwix::NoEntry& e) {
auto entry = archive->getRandomEntry();
return build_redirect(bookName, getFinalItem(*archive, entry));
} catch(zim::EntryNotFound& e) {
return Response::build_404(*this, request, bookName, "");
}
}
@ -734,22 +803,10 @@ std::string searchSuggestionHTML(const std::string& searchURL, const std::string
} // unnamed namespace
std::shared_ptr<Reader>
InternalServer::get_reader(const std::string& bookName) const
{
std::shared_ptr<Reader> reader;
try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
}
return reader;
}
std::unique_ptr<Response>
InternalServer::build_redirect(const std::string& bookName, const kiwix::Entry& entry) const
InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const
{
auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(entry.getPath());
auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(item.getPath());
return Response::build_redirect(*this, redirectUrl);
}
@ -765,8 +822,13 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
if (bookName.empty())
return build_homepage(request);
const std::shared_ptr<Reader> reader = get_reader(bookName);
if (reader == nullptr) {
std::shared_ptr<zim::Archive> archive;
try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range& e) {}
if (archive == nullptr) {
std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library.
const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern));
@ -779,31 +841,31 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
}
try {
auto entry = reader->getEntryFromPath(urlStr);
auto entry = getEntryFromPath(*archive, urlStr);
if (entry.isRedirect() || urlStr.empty()) {
// If urlStr is empty, we want to mainPage.
// We must do a redirection to the real page.
return build_redirect(bookName, entry.getFinalEntry());
return build_redirect(bookName, getFinalItem(*archive, entry));
}
auto response = ItemResponse::build(*this, request, entry.getZimEntry().getItem());
auto response = ItemResponse::build(*this, request, entry.getItem());
try {
dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, reader->getTitle());
dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, getArchiveTitle(*archive));
} catch (std::bad_cast& e) {}
if (m_verbose.load()) {
printf("Found %s\n", entry.getPath().c_str());
printf("mimeType: %s\n", entry.getMimetype().c_str());
printf("mimeType: %s\n", entry.getItem(true).getMimetype().c_str());
}
return response;
} catch(kiwix::NoEntry& e) {
} catch(zim::EntryNotFound& e) {
if (m_verbose.load())
printf("Failed to find %s\n", urlStr.c_str());
std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only.
const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern));
return Response::build_404(*this, request, bookName, reader->getTitle(), details);
return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), details);
}
}

View File

@ -69,7 +69,7 @@ class InternalServer {
private: // functions
std::unique_ptr<Response> handle_request(const RequestContext& request);
std::unique_ptr<Response> build_redirect(const std::string& bookName, const kiwix::Entry& entry) const;
std::unique_ptr<Response> build_redirect(const std::string& bookName, const zim::Item& item) const;
std::unique_ptr<Response> build_homepage(const RequestContext& request);
std::unique_ptr<Response> handle_skin(const RequestContext& request);
std::unique_ptr<Response> handle_catalog(const RequestContext& request);
@ -89,7 +89,6 @@ class InternalServer {
MustacheData get_default_data() const;
std::shared_ptr<Reader> get_reader(const std::string& bookName) const;
bool etag_not_needed(const RequestContext& r) const;
ETag get_matching_if_none_match_etag(const RequestContext& request) const;

119
src/tools/archiveTools.cpp Normal file
View File

@ -0,0 +1,119 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/archiveTools.h>
#include <tools/pathTools.h>
#include <tools/otherTools.h>
#include <tools/stringTools.h>
#include <zim/error.h>
#include <zim/item.h>
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name) {
try {
return archive.getMetadata(name);
} catch (zim::EntryNotFound& e) {
return "";
}
}
std::string getArchiveTitle(const zim::Archive& archive) {
std::string value = getMetadata(archive, "Title");
if (value.empty()) {
value = getLastPathElement(archive.getFilename());
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
std::string getMetaDescription(const zim::Archive& archive) {
std::string value;
value = getMetadata(archive, "Description");
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
value = getMetadata(archive, "Subtitle");
}
return value;
}
std::string getMetaTags(const zim::Archive& archive, bool original) {
std::string tags_str = getMetadata(archive, "Tags");
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
std::string getMetaLanguage(const zim::Archive& archive) {
return getMetadata(archive, "Language");
}
std::string getMetaName(const zim::Archive& archive) {
return getMetadata(archive, "Name");
}
std::string getMetaDate(const zim::Archive& archive) {
return getMetadata(archive, "Date");
}
std::string getMetaCreator(const zim::Archive& archive) {
return getMetadata(archive, "Creator");
}
std::string getMetaPublisher(const zim::Archive& archive) {
return getMetadata(archive, "Publisher");
}
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry)
{
return entry.getItem(true);
}
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path)
{
try {
return archive.getEntryByPath(path);
} catch (zim::EntryNotFound& e) {
if (path.empty() || path == "/") {
return archive.getMainEntry();
}
}
throw zim::EntryNotFound("Cannot find entry for non empty path");
}
} // kiwix

View File

@ -395,3 +395,11 @@ bool kiwix::startsWith(const std::string& base, const std::string& start)
&& std::equal(start.begin(), start.end(), base.begin());
}
std::vector<std::string> kiwix::getTitleVariants(const std::string& title) {
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}

View File

@ -9,6 +9,8 @@ tests = [
'book',
'manager',
'opds_catalog',
'reader',
'searcher'
]
if build_machine.system() != 'windows'

62
test/reader.cpp Normal file
View File

@ -0,0 +1,62 @@
#include "gtest/gtest.h"
#include "../include/reader.h"
#include "zim/archive.h"
namespace kiwix
{
/**
* This test file is written primarily to demonstrate how Reader is simply a
* wrapper over an archive. We will be dropping this wrapper soon.
**/
TEST (Reader, archiveWrapper) {
Reader reader("./test/zimfile.zim");
zim::Archive archive = *reader.getZimArchive();
std::ostringstream s;
s << archive.getUuid();
ASSERT_EQ(reader.getId(), s.str());
ASSERT_EQ(reader.getGlobalCount(), archive.getEntryCount());
ASSERT_EQ(reader.getMainPage().getTitle(), archive.getMainEntry().getTitle());
ASSERT_EQ(reader.hasFulltextIndex(), archive.hasFulltextIndex());
ASSERT_NO_THROW(reader.getRandomPage());
}
TEST (Reader, getFunctions) {
zim::Archive archive("./test/zimfile.zim");
Reader reader("./test/zimfile.zim");
auto archiveEntry = archive.getRandomEntry();
ASSERT_TRUE(reader.pathExists(archiveEntry.getPath()));
auto readerEntry = reader.getEntryFromPath(archiveEntry.getPath());
ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle());
ASSERT_FALSE(reader.pathExists("invalidEntryPath"));
ASSERT_THROW(reader.getEntryFromPath("invalidEntryPath"), NoEntry);
readerEntry = reader.getEntryFromTitle(archiveEntry.getTitle());
ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle());
}
TEST (Reader, suggestions) {
Reader reader("./test/zimfile.zim");
SuggestionsList_t suggestions;
reader.searchSuggestionsSmart("The Genius", 4, suggestions);
std::vector<std::string> suggestionResult, expectedResult;
std::string suggestionTitle;
for (auto it = suggestions.begin(); it != suggestions.end(); it++) {
suggestionResult.push_back(it->getTitle());
}
expectedResult = {
"The Genius After Hours",
"The Genius Hits the Road",
"The Genius Sings the Blues",
"The Genius of Ray Charles"
};
ASSERT_EQ(suggestionResult, expectedResult);
}
}

25
test/searcher.cpp Normal file
View File

@ -0,0 +1,25 @@
#include "gtest/gtest.h"
#include "../include/searcher.h"
#include "../include/reader.h"
namespace kiwix
{
TEST(Searcher, search) {
Reader reader("./test/example.zim");
Searcher searcher;
searcher.add_reader(&reader);
ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle());
searcher.search("wiki", 0, 2);
searcher.restart_search();
ASSERT_EQ(searcher.getEstimatedResultCount(), (unsigned int)2);
auto result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "FreedomBox for Communities/Offline Wikipedia - Wikibooks, open books for an open world");
result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "Wikibooks");
}
}