Merge pull request #536 from kiwix/internally_drop_reader_searcher

This commit is contained in:
Matthieu Gautier 2021-07-06 16:18:10 +02:00 committed by GitHub
commit 1c0b4502cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 510 additions and 152 deletions

View File

@ -7,6 +7,7 @@ files=(
"include/common/otherTools.h" "include/common/otherTools.h"
"include/common/regexTools.h" "include/common/regexTools.h"
"include/common/networkTools.h" "include/common/networkTools.h"
"include/common/archiveTools.h"
"include/manager.h" "include/manager.h"
"include/reader.h" "include/reader.h"
"include/kiwix.h" "include/kiwix.h"
@ -22,6 +23,7 @@ files=(
"src/common/pathTools.cpp" "src/common/pathTools.cpp"
"src/common/regexTools.cpp" "src/common/regexTools.cpp"
"src/common/otherTools.cpp" "src/common/otherTools.cpp"
"src/common/archiveTools.cpp"
"src/common/networkTools.cpp" "src/common/networkTools.cpp"
"src/common/stringTools.cpp" "src/common/stringTools.cpp"
"src/xapianSearcher.cpp" "src/xapianSearcher.cpp"

View File

@ -24,6 +24,7 @@
#include <vector> #include <vector>
#include <map> #include <map>
#include <memory> #include <memory>
#include <zim/archive.h>
#include "book.h" #include "book.h"
#include "bookmark.h" #include "bookmark.h"
@ -146,6 +147,7 @@ class Library
{ {
std::map<std::string, kiwix::Book> m_books; std::map<std::string, kiwix::Book> m_books;
std::map<std::string, std::shared_ptr<Reader>> m_readers; std::map<std::string, std::shared_ptr<Reader>> m_readers;
std::map<std::string, std::shared_ptr<zim::Archive>> m_archives;
std::vector<kiwix::Bookmark> m_bookmarks; std::vector<kiwix::Bookmark> m_bookmarks;
class BookDB; class BookDB;
std::unique_ptr<BookDB> m_bookDB; std::unique_ptr<BookDB> m_bookDB;
@ -198,6 +200,7 @@ class Library
const Book& getBookByPath(const std::string& path) const; const Book& getBookByPath(const std::string& path) const;
Book& getBookByPath(const std::string& path); Book& getBookByPath(const std::string& path);
std::shared_ptr<Reader> getReaderById(const std::string& id); std::shared_ptr<Reader> getReaderById(const std::string& id);
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
/** /**
* Remove a book from the library. * Remove a book from the library.

View File

@ -25,6 +25,7 @@ install_headers(
'tools/pathTools.h', 'tools/pathTools.h',
'tools/regexTools.h', 'tools/regexTools.h',
'tools/stringTools.h', 'tools/stringTools.h',
'tools/archiveTools.h',
subdir:'kiwix/tools' subdir:'kiwix/tools'
) )

View File

@ -41,11 +41,11 @@ namespace kiwix
* The SuggestionItem is a helper class that contains the info about a single * The SuggestionItem is a helper class that contains the info about a single
* suggestion item. * suggestion item.
*/ */
class SuggestionItem class SuggestionItem
{ {
// Functions // Functions
private: // Temporarily making the constructor public until the code move is complete
public:
// Create a sugggestion item. // Create a sugggestion item.
explicit SuggestionItem(std::string title, std::string normalizedTitle, explicit SuggestionItem(std::string title, std::string normalizedTitle,
std::string path, std::string snippet = "") : std::string path, std::string snippet = "") :
@ -91,6 +91,13 @@ class Reader
* (.zim extesion). * (.zim extesion).
*/ */
explicit Reader(const string zimFilePath); explicit Reader(const string zimFilePath);
/**
* Create a Reader to read a zim file given by the Archive.
*
* @param archive The shared pointer to the Archive object.
*/
explicit Reader(const std::shared_ptr<zim::Archive> archive);
#ifndef _WIN32 #ifndef _WIN32
explicit Reader(int fd); explicit Reader(int fd);
Reader(int fd, zim::offset_type offset, zim::size_type size); Reader(int fd, zim::offset_type offset, zim::size_type size);
@ -488,7 +495,7 @@ class Reader
zim::Archive* getZimArchive() const; zim::Archive* getZimArchive() const;
protected: protected:
std::unique_ptr<zim::Archive> zimArchive; std::shared_ptr<zim::Archive> zimArchive;
std::string zimFilePath; std::string zimFilePath;
SuggestionsList_t suggestions; SuggestionsList_t suggestions;

View File

@ -21,6 +21,7 @@
#define KIWIX_SEARCH_RENDERER_H #define KIWIX_SEARCH_RENDERER_H
#include <string> #include <string>
#include <zim/search.h>
namespace kiwix namespace kiwix
{ {
@ -40,6 +41,8 @@ class SearchRenderer
* Used to generate pagination links. * Used to generate pagination links.
*/ */
SearchRenderer(Searcher* searcher, NameMapper* mapper); SearchRenderer(Searcher* searcher, NameMapper* mapper);
SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount);
~SearchRenderer(); ~SearchRenderer();
@ -74,7 +77,7 @@ class SearchRenderer
protected: protected:
std::string beautifyInteger(const unsigned int number); std::string beautifyInteger(const unsigned int number);
Searcher* mp_searcher; zim::SearchResultSet m_srs;
NameMapper* mp_nameMapper; NameMapper* mp_nameMapper;
std::string searchContent; std::string searchContent;
std::string searchPattern; std::string searchPattern;

View File

@ -32,6 +32,8 @@
#include "tools/pathTools.h" #include "tools/pathTools.h"
#include "tools/stringTools.h" #include "tools/stringTools.h"
#include <zim/search.h>
using namespace std; using namespace std;
namespace kiwix namespace kiwix
@ -142,6 +144,11 @@ class Searcher
*/ */
unsigned int getEstimatedResultCount(); unsigned int getEstimatedResultCount();
/**
* Get a SearchResultSet object for current search
*/
zim::SearchResultSet getSearchResultSet();
unsigned int getResultStart() { return resultStart; } unsigned int getResultStart() { return resultStart; }
unsigned int getResultEnd() { return resultEnd; } unsigned int getResultEnd() { return resultEnd; }

View File

@ -0,0 +1,47 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ARCHIVETOOLS_H
#define KIWIX_ARCHIVETOOLS_H
#include <zim/archive.h>
/**
* This file contains all the functions that would make handling data related to
* an archive easier.
**/
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name);
std::string getArchiveTitle(const zim::Archive& archive);
std::string getMetaDescription(const zim::Archive& archive);
std::string getMetaTags(const zim::Archive& archive, bool original = false);
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType);
std::string getMetaLanguage(const zim::Archive& archive);
std::string getMetaName(const zim::Archive& archive);
std::string getMetaDate(const zim::Archive& archive);
std::string getMetaCreator(const zim::Archive& archive);
std::string getMetaPublisher(const zim::Archive& archive);
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry);
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path);
}
#endif

View File

@ -70,5 +70,7 @@ T extractFromString(const std::string& str) {
} }
bool startsWith(const std::string& base, const std::string& start); bool startsWith(const std::string& base, const std::string& start);
std::vector<std::string> getTitleVariants(const std::string& title);
} //namespace kiwix } //namespace kiwix
#endif #endif

View File

@ -108,6 +108,7 @@ bool Library::removeBookById(const std::string& id)
{ {
m_bookDB->delete_document("Q" + id); m_bookDB->delete_document("Q" + id);
m_readers.erase(id); m_readers.erase(id);
m_archives.erase(id);
return m_books.erase(id) == 1; return m_books.erase(id) == 1;
} }
@ -146,11 +147,35 @@ std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
return m_readers.at(id); return m_readers.at(id);
} catch (std::out_of_range& e) {} } catch (std::out_of_range& e) {}
try {
auto reader = make_shared<Reader>(m_archives.at(id));
m_readers[id] = reader;
return reader;
} catch (std::out_of_range& e) {}
auto book = getBookById(id); auto book = getBookById(id);
if (!book.isPathValid()) if (!book.isPathValid())
return nullptr; return nullptr;
auto sptr = make_shared<Reader>(book.getPath());
m_readers[id] = sptr; auto archive = make_shared<zim::Archive>(book.getPath());
m_archives[id] = archive;
auto reader = make_shared<Reader>(archive);
m_readers[id] = reader;
return reader;
}
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
{
try {
return m_archives.at(id);
} catch (std::out_of_range& e) {}
auto book = getBookById(id);
if (!book.isPathValid())
return nullptr;
auto sptr = make_shared<zim::Archive>(book.getPath());
m_archives[id] = sptr;
return sptr; return sptr;
} }

View File

@ -19,6 +19,7 @@ kiwix_sources = [
'tools/stringTools.cpp', 'tools/stringTools.cpp',
'tools/networkTools.cpp', 'tools/networkTools.cpp',
'tools/otherTools.cpp', 'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp', 'kiwixserve.cpp',
'name_mapper.cpp', 'name_mapper.cpp',
'server/byte_range.cpp', 'server/byte_range.cpp',

View File

@ -25,6 +25,7 @@
#include <zim/error.h> #include <zim/error.h>
#include "tools/otherTools.h" #include "tools/otherTools.h"
#include "tools/archiveTools.h"
inline char hi(char v) inline char hi(char v)
{ {
@ -86,6 +87,11 @@ Reader::Reader(const string zimFilePath)
srand(time(nullptr)); srand(time(nullptr));
} }
Reader::Reader(const std::shared_ptr<zim::Archive> archive)
: zimArchive(archive),
zimFilePath(archive->getFilename())
{}
#ifndef _WIN32 #ifndef _WIN32
Reader::Reader(int fd) Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)), : zimArchive(new zim::Archive(fd)),
@ -183,14 +189,7 @@ Entry Reader::getMainPage() const
bool Reader::getFavicon(string& content, string& mimeType) const bool Reader::getFavicon(string& content, string& mimeType) const
{ {
try { return kiwix::getArchiveFavicon(*zimArchive, content, mimeType);
auto item = zimArchive->getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
} }
string Reader::getZimFilePath() const string Reader::getZimFilePath() const
@ -212,47 +211,32 @@ bool Reader::getMetadata(const string& name, string& value) const
string Reader::getName() const string Reader::getName() const
{ {
METADATA("Name") return kiwix::getMetaName(*zimArchive);
} }
string Reader::getTitle() const string Reader::getTitle() const
{ {
string value = zimArchive->getMetadata("Title"); return kiwix::getArchiveTitle(*zimArchive);
if (value.empty()) {
value = getLastPathElement(zimFilePath);
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
} }
string Reader::getCreator() const string Reader::getCreator() const
{ {
METADATA("Creator") return kiwix::getMetaCreator(*zimArchive);
} }
string Reader::getPublisher() const string Reader::getPublisher() const
{ {
METADATA("Publisher") return kiwix::getMetaPublisher(*zimArchive);
} }
string Reader::getDate() const string Reader::getDate() const
{ {
METADATA("Date") return kiwix::getMetaDate(*zimArchive);
} }
string Reader::getDescription() const string Reader::getDescription() const
{ {
string value; return kiwix::getMetaDescription(*zimArchive);
this->getMetadata("Description", value);
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
this->getMetadata("Subtitle", value);
}
return value;
} }
string Reader::getLongDescription() const string Reader::getLongDescription() const
@ -262,7 +246,7 @@ string Reader::getLongDescription() const
string Reader::getLanguage() const string Reader::getLanguage() const
{ {
METADATA("Language") return kiwix::getMetaLanguage(*zimArchive);
} }
string Reader::getLicense() const string Reader::getLicense() const
@ -272,13 +256,7 @@ string Reader::getLicense() const
string Reader::getTags(bool original) const string Reader::getTags(bool original) const
{ {
string tags_str; return kiwix::getMetaTags(*zimArchive, original);
getMetadata("Tags", tags_str);
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
} }
@ -342,12 +320,8 @@ string Reader::getOrigId() const
Entry Reader::getEntryFromPath(const std::string& path) const Entry Reader::getEntryFromPath(const std::string& path) const
{ {
if (path.empty() || path == "/") {
return getMainPage();
}
try { try {
return zimArchive->getEntryByPath(path); return kiwix::getEntryFromPath(*zimArchive, path);
} catch (zim::EntryNotFound& e) { } catch (zim::EntryNotFound& e) {
throw NoEntry(); throw NoEntry();
} }
@ -460,12 +434,7 @@ bool Reader::searchSuggestions(const string& prefix,
std::vector<std::string> Reader::getTitleVariants( std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const const std::string& title) const
{ {
std::vector<std::string> variants; return kiwix::getTitleVariants(title);
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
} }

View File

@ -37,10 +37,22 @@ namespace kiwix
/* Constructor */ /* Constructor */
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper) SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
: mp_searcher(searcher), : m_srs(searcher->getSearchResultSet()),
mp_nameMapper(mapper), mp_nameMapper(mapper),
protocolPrefix("zim://"), protocolPrefix("zim://"),
searchProtocolPrefix("search://?") searchProtocolPrefix("search://?"),
estimatedResultCount(searcher->getEstimatedResultCount()),
resultStart(searcher->getResultStart())
{}
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount)
: m_srs(srs),
mp_nameMapper(mapper),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
estimatedResultCount(estimatedResultCount),
resultStart(start)
{} {}
/* Destructor */ /* Destructor */
@ -70,29 +82,26 @@ std::string SearchRenderer::getHtml()
{ {
kainjow::mustache::data results{kainjow::mustache::data::type::list}; kainjow::mustache::data results{kainjow::mustache::data::type::list};
mp_searcher->restart_search(); for (auto it = m_srs.begin(); it != m_srs.end(); it++) {
Result* p_result = NULL;
while ((p_result = mp_searcher->getNextResult())) {
kainjow::mustache::data result; kainjow::mustache::data result;
result.set("title", p_result->get_title()); result.set("title", it.getTitle());
result.set("url", p_result->get_url()); result.set("url", it.getPath());
result.set("snippet", p_result->get_snippet()); result.set("snippet", it.getSnippet());
result.set("resultContentId", mp_nameMapper->getNameForId(p_result->get_zimId())); std::ostringstream s;
s << it.getZimId();
result.set("resultContentId", mp_nameMapper->getNameForId(s.str()));
if (p_result->get_wordCount() >= 0) { if (it.getWordCount() >= 0) {
result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount())); result.set("wordCount", kiwix::beautifyInteger(it.getWordCount()));
} }
results.push_back(result); results.push_back(result);
delete p_result;
} }
// pages // pages
kainjow::mustache::data pages{kainjow::mustache::data::type::list}; kainjow::mustache::data pages{kainjow::mustache::data::type::list};
auto resultStart = mp_searcher->getResultStart();
auto resultEnd = 0U; auto resultEnd = 0U;
auto estimatedResultCount = mp_searcher->getEstimatedResultCount();
auto currentPage = 0U; auto currentPage = 0U;
auto pageStart = 0U; auto pageStart = 0U;
auto pageEnd = 0U; auto pageEnd = 0U;

View File

@ -228,6 +228,11 @@ unsigned int Searcher::getEstimatedResultCount()
return this->estimatedResultCount; return this->estimatedResultCount;
} }
zim::SearchResultSet Searcher::getSearchResultSet()
{
return *(this->internal);
}
_Result::_Result(zim::SearchResultSet::iterator iterator) _Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator) : iterator(iterator)
{ {

View File

@ -47,6 +47,7 @@ extern "C" {
#include "tools/pathTools.h" #include "tools/pathTools.h"
#include "tools/regexTools.h" #include "tools/regexTools.h"
#include "tools/stringTools.h" #include "tools/stringTools.h"
#include "tools/archiveTools.h"
#include "library.h" #include "library.h"
#include "name_mapper.h" #include "name_mapper.h"
#include "entry.h" #include "entry.h"
@ -55,6 +56,10 @@ extern "C" {
#include "opds_dumper.h" #include "opds_dumper.h"
#include <zim/uuid.h> #include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <mustache.hpp> #include <mustache.hpp>
@ -323,22 +328,63 @@ std::unique_ptr<Response> InternalServer::build_homepage(const RequestContext& r
return ContentResponse::build(*this, RESOURCE::templates::index_html, get_default_data(), "text/html; charset=utf-8", true); return ContentResponse::build(*this, RESOURCE::templates::index_html, get_default_data(), "text/html; charset=utf-8", true);
} }
/**
* Archive and Zim handlers begin
**/
// TODO: retrieve searcher from caching mechanism
SuggestionsList_t getSuggestions(const zim::Archive* const archive,
const std::string& queryString, int suggestionCount)
{
SuggestionsList_t suggestions;
if (archive->hasTitleIndex()) {
auto searcher = zim::Searcher(*archive);
zim::Query suggestionQuery;
suggestionQuery.setQuery(queryString, true);
auto suggestionSearch = searcher.search(suggestionQuery);
auto suggestionResult = suggestionSearch.getResults(0, suggestionCount);
for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath(), it.getSnippet());
suggestions.push_back(suggestion);
}
} else {
// TODO: This case should be handled by libzim
std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
for (auto& entry: archive->findByTitle(*it)) {
SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()),
entry.getPath());
suggestions.push_back(suggestion);
currCount++;
}
}
}
return suggestions;
}
/**
* Archive and Zim handlers end
**/
std::unique_ptr<Response> InternalServer::handle_meta(const RequestContext& request) std::unique_ptr<Response> InternalServer::handle_meta(const RequestContext& request)
{ {
std::string bookName; std::string bookName;
std::string bookId; std::string bookId;
std::string meta_name; std::string meta_name;
std::shared_ptr<Reader> reader; std::shared_ptr<zim::Archive> archive;
try { try {
bookName = request.get_argument("content"); bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName); bookId = mp_nameMapper->getIdForName(bookName);
meta_name = request.get_argument("name"); meta_name = request.get_argument("name");
reader = mp_library->getReaderById(bookId); archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range& e) { } catch (const std::out_of_range& e) {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
if (reader == nullptr) { if (archive == nullptr) {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
@ -346,23 +392,23 @@ std::unique_ptr<Response> InternalServer::handle_meta(const RequestContext& requ
std::string mimeType = "text"; std::string mimeType = "text";
if (meta_name == "title") { if (meta_name == "title") {
content = reader->getTitle(); content = getArchiveTitle(*archive);
} else if (meta_name == "description") { } else if (meta_name == "description") {
content = reader->getDescription(); content = getMetaDescription(*archive);
} else if (meta_name == "language") { } else if (meta_name == "language") {
content = reader->getLanguage(); content = getMetaLanguage(*archive);
} else if (meta_name == "name") { } else if (meta_name == "name") {
content = reader->getName(); content = getMetaName(*archive);
} else if (meta_name == "tags") { } else if (meta_name == "tags") {
content = reader->getTags(); content = getMetaTags(*archive);
} else if (meta_name == "date") { } else if (meta_name == "date") {
content = reader->getDate(); content = getMetaDate(*archive);
} else if (meta_name == "creator") { } else if (meta_name == "creator") {
content = reader->getCreator(); content = getMetaCreator(*archive);
} else if (meta_name == "publisher") { } else if (meta_name == "publisher") {
content = reader->getPublisher(); content = getMetaPublisher(*archive);
} else if (meta_name == "favicon") { } else if (meta_name == "favicon") {
reader->getFavicon(content, mimeType); getArchiveFavicon(*archive, content, mimeType);
} else { } else {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
@ -385,51 +431,54 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
std::string bookName; std::string bookName;
std::string bookId; std::string bookId;
std::string term; std::string queryString;
std::shared_ptr<Reader> reader; std::shared_ptr<zim::Archive> archive;
try { try {
bookName = request.get_argument("content"); bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName); bookId = mp_nameMapper->getIdForName(bookName);
term = request.get_argument("term"); queryString = request.get_argument("term");
reader = mp_library->getReaderById(bookId); archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) { } catch (const std::out_of_range&) {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
if (archive == nullptr) {
return Response::build_404(*this, request, bookName, "");
}
if (m_verbose.load()) { if (m_verbose.load()) {
printf("Searching suggestions for: \"%s\"\n", term.c_str()); printf("Searching suggestions for: \"%s\"\n", queryString.c_str());
} }
MustacheData results{MustacheData::type::list}; MustacheData results{MustacheData::type::list};
bool first = true; bool first = true;
if (reader != nullptr) {
/* Get the suggestions */
SuggestionsList_t suggestions;
reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions);
for(auto& suggestion:suggestions) {
MustacheData result;
result.set("label", suggestion.getTitle());
if (suggestion.hasSnippet()) { /* Get the suggestions */
result.set("label", suggestion.getSnippet()); SuggestionsList_t suggestions = getSuggestions(archive.get(), queryString, maxSuggestionCount);
} for(auto& suggestion:suggestions) {
MustacheData result;
result.set("label", suggestion.getTitle());
result.set("value", suggestion.getTitle()); if (suggestion.hasSnippet()) {
result.set("kind", "path"); result.set("label", suggestion.getSnippet());
result.set("path", suggestion.getPath());
result.set("first", first);
first = false;
results.push_back(result);
suggestionCount++;
} }
result.set("value", suggestion.getTitle());
result.set("kind", "path");
result.set("path", suggestion.getPath());
result.set("first", first);
first = false;
results.push_back(result);
suggestionCount++;
} }
/* Propose the fulltext search if possible */ /* Propose the fulltext search if possible */
if (reader->hasFulltextIndex()) { if (archive->hasFulltextIndex()) {
MustacheData result; MustacheData result;
result.set("label", "containing '" + term + "'..."); result.set("label", "containing '" + queryString + "'...");
result.set("value", term + " "); result.set("value", queryString + " ");
result.set("kind", "pattern"); result.set("kind", "pattern");
result.set("first", first); result.set("first", first);
results.push_back(result); results.push_back(result);
@ -492,30 +541,34 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
} catch(const std::out_of_range&) {} } catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {} catch(const std::invalid_argument&) {}
std::shared_ptr<Reader> reader(nullptr); std::shared_ptr<zim::Archive> archive;
try { try {
reader = mp_library->getReaderById(bookId); archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {} } catch (const std::out_of_range&) {}
/* Make the search */ /* Make the search */
if ( (!reader && !bookName.empty()) if ( (!archive && !bookName.empty())
|| (patternString.empty() && ! has_geo_query) ) { || (patternString.empty() && ! has_geo_query) ) {
auto data = get_default_data(); auto data = get_default_data();
data.set("pattern", encodeDiples(patternString)); data.set("pattern", encodeDiples(patternString));
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8"); auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
response->set_taskbar(bookName, reader ? reader->getTitle() : ""); response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : "");
response->set_code(MHD_HTTP_NOT_FOUND); response->set_code(MHD_HTTP_NOT_FOUND);
return std::move(response); return std::move(response);
} }
Searcher searcher; std::shared_ptr<zim::Searcher> searcher;
if (reader) { if (archive) {
searcher.add_reader(reader.get()); searcher = std::make_shared<zim::Searcher>(*archive);
} else { } else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentReader = mp_library->getReaderById(bookId); auto currentArchive = mp_library->getArchiveById(bookId);
if (currentReader) { if (currentArchive) {
searcher.add_reader(currentReader.get()); if (! searcher) {
searcher = std::make_shared<zim::Searcher>(*currentArchive);
} else {
searcher->add_archive(*currentArchive);
}
} }
} }
} }
@ -540,21 +593,37 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
/* Get the results */ /* Get the results */
try { try {
zim::Query query;
if (patternString.empty()) { if (patternString.empty()) {
searcher.geo_search(latitude, longitude, distance, // Execute geo-search
start, end, m_verbose.load()); if (m_verbose.load()) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
query.setVerbose(m_verbose.load());
query.setQuery("", false);
query.setGeorange(latitude, longitude, distance);
} else { } else {
searcher.search(patternString, // Execute Ft search
start, end, m_verbose.load()); if (m_verbose.load()) {
cout << "Performing query `" << patternString << "'" << endl;
}
std::string queryString = removeAccents(patternString);
query.setQuery(queryString, false);
query.setVerbose(m_verbose.load());
} }
SearchRenderer renderer(&searcher, mp_nameMapper);
zim::Search search = searcher->search(query);
SearchRenderer renderer(search.getResults(start, end), mp_nameMapper, start,
search.getEstimatedMatches());
renderer.setSearchPattern(patternString); renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName); renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/"); renderer.setProtocolPrefix(m_root + "/");
renderer.setSearchProtocolPrefix(m_root + "/search?"); renderer.setSearchProtocolPrefix(m_root + "/search?");
renderer.setPageLength(pageLength); renderer.setPageLength(pageLength);
auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8");
response->set_taskbar(bookName, reader ? reader->getTitle() : ""); response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : "");
return std::move(response); return std::move(response);
} catch (const std::exception& e) { } catch (const std::exception& e) {
@ -571,23 +640,23 @@ std::unique_ptr<Response> InternalServer::handle_random(const RequestContext& re
std::string bookName; std::string bookName;
std::string bookId; std::string bookId;
std::shared_ptr<Reader> reader; std::shared_ptr<zim::Archive> archive;
try { try {
bookName = request.get_argument("content"); bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName); bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId); archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) { } catch (const std::out_of_range&) {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
if (reader == nullptr) { if (archive == nullptr) {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
try { try {
auto entry = reader->getRandomPage(); auto entry = archive->getRandomEntry();
return build_redirect(bookName, entry.getFinalEntry()); return build_redirect(bookName, getFinalItem(*archive, entry));
} catch(kiwix::NoEntry& e) { } catch(zim::EntryNotFound& e) {
return Response::build_404(*this, request, bookName, ""); return Response::build_404(*this, request, bookName, "");
} }
} }
@ -734,22 +803,10 @@ std::string searchSuggestionHTML(const std::string& searchURL, const std::string
} // unnamed namespace } // unnamed namespace
std::shared_ptr<Reader>
InternalServer::get_reader(const std::string& bookName) const
{
std::shared_ptr<Reader> reader;
try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
reader = mp_library->getReaderById(bookId);
} catch (const std::out_of_range& e) {
}
return reader;
}
std::unique_ptr<Response> std::unique_ptr<Response>
InternalServer::build_redirect(const std::string& bookName, const kiwix::Entry& entry) const InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const
{ {
auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(entry.getPath()); auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(item.getPath());
return Response::build_redirect(*this, redirectUrl); return Response::build_redirect(*this, redirectUrl);
} }
@ -765,8 +822,13 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
if (bookName.empty()) if (bookName.empty())
return build_homepage(request); return build_homepage(request);
const std::shared_ptr<Reader> reader = get_reader(bookName); std::shared_ptr<zim::Archive> archive;
if (reader == nullptr) { try {
const std::string bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range& e) {}
if (archive == nullptr) {
std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library. std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library.
const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern));
@ -779,31 +841,31 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
} }
try { try {
auto entry = reader->getEntryFromPath(urlStr); auto entry = getEntryFromPath(*archive, urlStr);
if (entry.isRedirect() || urlStr.empty()) { if (entry.isRedirect() || urlStr.empty()) {
// If urlStr is empty, we want to mainPage. // If urlStr is empty, we want to mainPage.
// We must do a redirection to the real page. // We must do a redirection to the real page.
return build_redirect(bookName, entry.getFinalEntry()); return build_redirect(bookName, getFinalItem(*archive, entry));
} }
auto response = ItemResponse::build(*this, request, entry.getZimEntry().getItem()); auto response = ItemResponse::build(*this, request, entry.getItem());
try { try {
dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, reader->getTitle()); dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, getArchiveTitle(*archive));
} catch (std::bad_cast& e) {} } catch (std::bad_cast& e) {}
if (m_verbose.load()) { if (m_verbose.load()) {
printf("Found %s\n", entry.getPath().c_str()); printf("Found %s\n", entry.getPath().c_str());
printf("mimeType: %s\n", entry.getMimetype().c_str()); printf("mimeType: %s\n", entry.getItem(true).getMimetype().c_str());
} }
return response; return response;
} catch(kiwix::NoEntry& e) { } catch(zim::EntryNotFound& e) {
if (m_verbose.load()) if (m_verbose.load())
printf("Failed to find %s\n", urlStr.c_str()); printf("Failed to find %s\n", urlStr.c_str());
std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only. std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only.
const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern));
return Response::build_404(*this, request, bookName, reader->getTitle(), details); return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), details);
} }
} }

View File

@ -69,7 +69,7 @@ class InternalServer {
private: // functions private: // functions
std::unique_ptr<Response> handle_request(const RequestContext& request); std::unique_ptr<Response> handle_request(const RequestContext& request);
std::unique_ptr<Response> build_redirect(const std::string& bookName, const kiwix::Entry& entry) const; std::unique_ptr<Response> build_redirect(const std::string& bookName, const zim::Item& item) const;
std::unique_ptr<Response> build_homepage(const RequestContext& request); std::unique_ptr<Response> build_homepage(const RequestContext& request);
std::unique_ptr<Response> handle_skin(const RequestContext& request); std::unique_ptr<Response> handle_skin(const RequestContext& request);
std::unique_ptr<Response> handle_catalog(const RequestContext& request); std::unique_ptr<Response> handle_catalog(const RequestContext& request);
@ -89,7 +89,6 @@ class InternalServer {
MustacheData get_default_data() const; MustacheData get_default_data() const;
std::shared_ptr<Reader> get_reader(const std::string& bookName) const;
bool etag_not_needed(const RequestContext& r) const; bool etag_not_needed(const RequestContext& r) const;
ETag get_matching_if_none_match_etag(const RequestContext& request) const; ETag get_matching_if_none_match_etag(const RequestContext& request) const;

119
src/tools/archiveTools.cpp Normal file
View File

@ -0,0 +1,119 @@
/*
* Copyright 2021 Maneesh P M <manu.pm55@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/archiveTools.h>
#include <tools/pathTools.h>
#include <tools/otherTools.h>
#include <tools/stringTools.h>
#include <zim/error.h>
#include <zim/item.h>
namespace kiwix
{
std::string getMetadata(const zim::Archive& archive, const std::string& name) {
try {
return archive.getMetadata(name);
} catch (zim::EntryNotFound& e) {
return "";
}
}
std::string getArchiveTitle(const zim::Archive& archive) {
std::string value = getMetadata(archive, "Title");
if (value.empty()) {
value = getLastPathElement(archive.getFilename());
std::replace(value.begin(), value.end(), '_', ' ');
size_t pos = value.find(".zim");
value = value.substr(0, pos);
}
return value;
}
std::string getMetaDescription(const zim::Archive& archive) {
std::string value;
value = getMetadata(archive, "Description");
/* Mediawiki Collection tends to use the "Subtitle" name */
if (value.empty()) {
value = getMetadata(archive, "Subtitle");
}
return value;
}
std::string getMetaTags(const zim::Archive& archive, bool original) {
std::string tags_str = getMetadata(archive, "Tags");
if (original) {
return tags_str;
}
auto tags = convertTags(tags_str);
return join(tags, ";");
}
bool getArchiveFavicon(const zim::Archive& archive,
std::string& content, std::string& mimeType){
try {
auto item = archive.getIllustrationItem();
content = item.getData();
mimeType = item.getMimetype();
return true;
} catch(zim::EntryNotFound& e) {};
return false;
}
std::string getMetaLanguage(const zim::Archive& archive) {
return getMetadata(archive, "Language");
}
std::string getMetaName(const zim::Archive& archive) {
return getMetadata(archive, "Name");
}
std::string getMetaDate(const zim::Archive& archive) {
return getMetadata(archive, "Date");
}
std::string getMetaCreator(const zim::Archive& archive) {
return getMetadata(archive, "Creator");
}
std::string getMetaPublisher(const zim::Archive& archive) {
return getMetadata(archive, "Publisher");
}
zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry)
{
return entry.getItem(true);
}
zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path)
{
try {
return archive.getEntryByPath(path);
} catch (zim::EntryNotFound& e) {
if (path.empty() || path == "/") {
return archive.getMainEntry();
}
}
throw zim::EntryNotFound("Cannot find entry for non empty path");
}
} // kiwix

View File

@ -395,3 +395,11 @@ bool kiwix::startsWith(const std::string& base, const std::string& start)
&& std::equal(start.begin(), start.end(), base.begin()); && std::equal(start.begin(), start.end(), base.begin());
} }
std::vector<std::string> kiwix::getTitleVariants(const std::string& title) {
std::vector<std::string> variants;
variants.push_back(title);
variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title));
return variants;
}

View File

@ -9,6 +9,8 @@ tests = [
'book', 'book',
'manager', 'manager',
'opds_catalog', 'opds_catalog',
'reader',
'searcher'
] ]
if build_machine.system() != 'windows' if build_machine.system() != 'windows'

62
test/reader.cpp Normal file
View File

@ -0,0 +1,62 @@
#include "gtest/gtest.h"
#include "../include/reader.h"
#include "zim/archive.h"
namespace kiwix
{
/**
* This test file is written primarily to demonstrate how Reader is simply a
* wrapper over an archive. We will be dropping this wrapper soon.
**/
TEST (Reader, archiveWrapper) {
Reader reader("./test/zimfile.zim");
zim::Archive archive = *reader.getZimArchive();
std::ostringstream s;
s << archive.getUuid();
ASSERT_EQ(reader.getId(), s.str());
ASSERT_EQ(reader.getGlobalCount(), archive.getEntryCount());
ASSERT_EQ(reader.getMainPage().getTitle(), archive.getMainEntry().getTitle());
ASSERT_EQ(reader.hasFulltextIndex(), archive.hasFulltextIndex());
ASSERT_NO_THROW(reader.getRandomPage());
}
TEST (Reader, getFunctions) {
zim::Archive archive("./test/zimfile.zim");
Reader reader("./test/zimfile.zim");
auto archiveEntry = archive.getRandomEntry();
ASSERT_TRUE(reader.pathExists(archiveEntry.getPath()));
auto readerEntry = reader.getEntryFromPath(archiveEntry.getPath());
ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle());
ASSERT_FALSE(reader.pathExists("invalidEntryPath"));
ASSERT_THROW(reader.getEntryFromPath("invalidEntryPath"), NoEntry);
readerEntry = reader.getEntryFromTitle(archiveEntry.getTitle());
ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle());
}
TEST (Reader, suggestions) {
Reader reader("./test/zimfile.zim");
SuggestionsList_t suggestions;
reader.searchSuggestionsSmart("The Genius", 4, suggestions);
std::vector<std::string> suggestionResult, expectedResult;
std::string suggestionTitle;
for (auto it = suggestions.begin(); it != suggestions.end(); it++) {
suggestionResult.push_back(it->getTitle());
}
expectedResult = {
"The Genius After Hours",
"The Genius Hits the Road",
"The Genius Sings the Blues",
"The Genius of Ray Charles"
};
ASSERT_EQ(suggestionResult, expectedResult);
}
}

25
test/searcher.cpp Normal file
View File

@ -0,0 +1,25 @@
#include "gtest/gtest.h"
#include "../include/searcher.h"
#include "../include/reader.h"
namespace kiwix
{
TEST(Searcher, search) {
Reader reader("./test/example.zim");
Searcher searcher;
searcher.add_reader(&reader);
ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle());
searcher.search("wiki", 0, 2);
searcher.restart_search();
ASSERT_EQ(searcher.getEstimatedResultCount(), (unsigned int)2);
auto result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "FreedomBox for Communities/Offline Wikipedia - Wikibooks, open books for an open world");
result = searcher.getNextResult();
ASSERT_EQ(result->get_title(), "Wikibooks");
}
}