Remove libzim's wrapper.

It is time to remove them. They are deprecated since 10.0.0
This commit is contained in:
Matthieu Gautier 2022-06-22 17:50:31 +02:00 committed by Emmanuel Engelhart
parent 12e0fb6934
commit 69931fb347
No known key found for this signature in database
GPG Key ID: 120B30D020B553D3
21 changed files with 14 additions and 1848 deletions

View File

@ -38,7 +38,6 @@ namespace kiwix
{ {
class OPDSDumper; class OPDSDumper;
class Reader;
/** /**
* A class to store information about a book (a zim file) * A class to store information about a book (a zim file)
@ -69,7 +68,6 @@ class Book
~Book(); ~Book();
bool update(const Book& other); bool update(const Book& other);
DEPRECATED void update(const Reader& reader);
void update(const zim::Archive& archive); void update(const zim::Archive& archive);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir); void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost); void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);

View File

@ -1,193 +0,0 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ENTRY_H
#define KIWIX_ENTRY_H
#include <stdio.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <exception>
#include <string>
#include "common.h"
using namespace std;
namespace kiwix
{
class NoEntry : public std::exception {};
/**
* A entry represent an.. entry in a zim file.
*/
class Entry
{
public:
/**
* Construct an entry making reference to an zim article.
*
* @param article a zim::Article object
*/
DEPRECATED Entry(zim::Entry entry) : Entry(entry, true) {};
virtual ~Entry() = default;
/**
* Get the path of the entry.
*
* The path is the "key" of an entry.
*
* @return the path of the entry.
*/
std::string getPath() const { return entry.getPath(); }
/**
* Get the title of the entry.
*
* @return the title of the entry.
*/
std::string getTitle() const { return entry.getTitle(); }
/**
* Get the content of the entry.
*
* The string is a copy of the content.
* If you don't want to do a copy, use get_blob.
*
* @return the content of the entry.
*/
std::string getContent() const { return entry.getItem().getData(); }
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset = 0) const { return entry.getItem().getData(offset); }
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @param size The size of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset, size_type size) const { return entry.getItem().getData(offset, size); }
/**
* Get the info for direct access to the content of the entry.
*
* Some entry (ie binary ones) have their content plain stored
* in the zim file. Knowing the offset where the content is stored
* an user can directly read the content in the zim file bypassing the
* libkiwix/libzim.
*
* @return A pair specifying where to read the content.
* The string is the real file to read (may be different that .zim
* file if zim is cut).
* The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly.
*/
zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
/**
* Get the size of the entry.
*
* @return the size of the entry.
*/
size_type getSize() const;
/**
* Get the mime_type of the entry.
*
* @return the mime_type of the entry.
*/
std::string getMimetype() const;
/**
* Get if the entry is a redirect entry.
*
* @return True if the entry is a redirect.
*/
bool isRedirect() const;
/**
* Get if the entry is a link target entry.
*
* @return True if the entry is a link target.
*/
bool isLinkTarget() const;
/**
* Get if the entry is a deleted entry.
*
* @return True if the entry is a deleted entry.
*/
bool isDeleted() const;
/**
* Get the entry pointed by this entry.
*
* @return the entry pointed.
* @throw NoEntry if the entry is not a redirected entry.
*/
Entry getRedirectEntry() const;
/**
* Get the final entry pointed by this entry.
*
* Follow the redirection until a "not redirecting" entry is found.
* If the entry is not a redirected entry, return the entry itself.
*
* @return the final entry.
*/
Entry getFinalEntry() const;
/**
* Get the zim entry wrapped by this (kiwix) entry
*
* @return the zim entry
*/
const zim::Entry& getZimEntry() const { return entry; }
private:
zim::Entry entry;
private:
// Entry is deprecated, so we've marked the constructor as deprecated.
// But we still need to construct the entry (in our deprecated code)
// To avoid warning because we use deprecated function, we create a second
// constructor not deprecated. The `bool marker` is unused, it sole purpose
// is to change the signature to have two different constructor.
// This one is not deprecated and we must use it in our private code.
Entry(zim::Entry entry, bool marker);
friend class Reader;
};
}
#endif // KIWIX_ENTRY_H

View File

@ -223,7 +223,6 @@ class Library
Book getBookByIdThreadSafe(const std::string& id) const; Book getBookByIdThreadSafe(const std::string& id) const;
DEPRECATED std::shared_ptr<Reader> getReaderById(const std::string& id);
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id); std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) { std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) {
return getSearcherByIds(BookIdSet{id}); return getSearcherByIds(BookIdSet{id});

View File

@ -22,7 +22,6 @@
#include "book.h" #include "book.h"
#include "library.h" #include "library.h"
#include "reader.h"
#include <string> #include <string>
#include <vector> #include <vector>

View File

@ -7,9 +7,6 @@ headers = [
'libxml_dumper.h', 'libxml_dumper.h',
'opds_dumper.h', 'opds_dumper.h',
'downloader.h', 'downloader.h',
'reader.h',
'entry.h',
'searcher.h',
'search_renderer.h', 'search_renderer.h',
'server.h', 'server.h',
'kiwixserve.h', 'kiwixserve.h',

View File

@ -27,7 +27,6 @@
#include <pugixml.hpp> #include <pugixml.hpp>
#include "library.h" #include "library.h"
#include "reader.h"
using namespace std; using namespace std;

View File

@ -1,506 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_READER_H
#define KIWIX_READER_H
#include <stdio.h>
#include <zim/zim.h>
#include <zim/archive.h>
#include <exception>
#include <map>
#include <sstream>
#include <string>
#include "common.h"
#include "entry.h"
using namespace std;
namespace kiwix
{
/**
* The SuggestionItem is a helper class that contains the info about a single
* suggestion item.
*/
class SuggestionItem
{
// Functions
public:
// Create a sugggestion item.
explicit SuggestionItem(const std::string& title, const std::string& normalizedTitle,
const std::string& path, const std::string& snippet = "") :
title(title),
normalizedTitle(normalizedTitle),
path(path),
snippet(snippet) {}
public:
const std::string& getTitle() const { return title;}
const std::string& getNormalizedTitle() const { return normalizedTitle;}
const std::string& getPath() const { return path;}
const std::string& getSnippet() const { return snippet;}
bool hasSnippet() const { return !snippet.empty();}
// Data
private:
std::string title;
std::string normalizedTitle;
std::string path;
std::string snippet;
};
/**
* The Reader class is the class who allow to get an entry content from a zim
* file.
*
* Reader is now deprecated. Directly use `zim::Archive`.
*/
using SuggestionsList_t = std::vector<SuggestionItem>;
class Reader
{
public:
/**
* Create a Reader to read a zim file specified by zimFilePath.
*
* @param zimFilePath The path to the zim file to read.
* The zim file can be splitted (.zimaa, .zimab, ...).
* In this case, the file path must still point to the
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
explicit DEPRECATED Reader(const string zimFilePath);
/**
* Create a Reader to read a zim file given by the Archive.
*
* @param archive The shared pointer to the Archive object.
*/
explicit DEPRECATED Reader(const std::shared_ptr<zim::Archive> archive)
: Reader(archive, true) {};
#ifndef _WIN32
explicit DEPRECATED Reader(int fd);
DEPRECATED Reader(int fd, zim::offset_type offset, zim::size_type size);
#endif
~Reader() = default;
/**
* Get the number of "displayable" entries in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'text/html' MIMEtype specified in the metadata.
* Else return the number of entries in the 'A' namespace.
*/
unsigned int getArticleCount() const;
/**
* Get the number of media in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
* the metadata.
* Else return the number of entries in the 'I' namespace.
*/
unsigned int getMediaCount() const;
/**
* Get the number of all entries in the zim file.
*
* @return Return the number of all the entries, whatever their MIMEtype or
* their namespace.
*/
unsigned int getGlobalCount() const;
/**
* Get the path of the zim file.
*
* @return the path of the zim file as given in the constructor.
*/
string getZimFilePath() const;
/**
* Get the Id of the zim file.
*
* @return The uuid stored in the zim file.
*/
string getId() const;
/**
* Get a random page.
*
* @return A random Entry. The entry is picked from all entries in
* the 'A' namespace.
* The main entry is excluded from the potential results.
*/
Entry getRandomPage() const;
/**
* Get the entry of the main page.
*
* @return Entry of the main page as specified in the zim file.
*/
Entry getMainPage() const;
/**
* Get the content of a metadata.
*
* @param[in] name The name of the metadata.
* @param[out] value The value will be set to the content of the metadata.
* @return True if it was possible to get the content of the metadata.
*/
bool getMetadata(const string& name, string& value) const;
/**
* Get the name of the zim file.
*
* @return The name of the zim file as specified in the zim metadata.
*/
string getName() const;
/**
* Get the title of the zim file.
*
* @return The title of zim file as specified in the zim metadata.
* If no title has been set, return a title computed from the
* file path.
*/
string getTitle() const;
/**
* Get the creator of the zim file.
*
* @return The creator of the zim file as specified in the zim metadata.
*/
string getCreator() const;
/**
* Get the publisher of the zim file.
*
* @return The publisher of the zim file as specified in the zim metadata.
*/
string getPublisher() const;
/**
* Get the date of the zim file.
*
* @return The date of the zim file as specified in the zim metadata.
*/
string getDate() const;
/**
* Get the description of the zim file.
*
* @return The description of the zim file as specified in the zim metadata.
* If no description has been set, return the subtitle.
*/
string getDescription() const;
/**
* Get the long description of the zim file.
*
* @return The long description of the zim file as specifed in the zim metadata.
*/
string getLongDescription() const;
/**
* Get the language of the zim file.
*
* @return The language of the zim file as specified in the zim metadata.
*/
string getLanguage() const;
/**
* Get the license of the zim file.
*
* @return The license of the zim file as specified in the zim metadata.
*/
string getLicense() const;
/**
* Get the tags of the zim file.
*
* @param original If true, return the original tags as specified in the zim metadata.
* Else, try to convert it to the new 'normalized' format.
* @return The tags of the zim file.
*/
string getTags(bool original=false) const;
/**
* Get the value (as a string) of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
*/
string getTagStr(const std::string& tagName) const;
/**
* Get the boolean value of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The boolean value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
* std::domain_error if the value of the tag cannot be convert to bool.
*/
bool getTagBool(const std::string& tagName) const;
/**
* Get the relations of the zim file.
*
* @return The relation of the zim file as specified in the zim metadata.
*/
string getRelation() const;
/**
* Get the flavour of the zim file.
*
* @return The flavour of the zim file as specified in the zim metadata.
*/
string getFlavour() const;
/**
* Get the source of the zim file.
*
* @return The source of the zim file as specified in the zim metadata.
*/
string getSource() const;
/**
* Get the scraper of the zim file.
*
* @return The scraper of the zim file as specified in the zim metadata.
*/
string getScraper() const;
/**
* Get the favicon of the zim file.
*
* @param[out] content The content of the favicon.
* @param[out] mimeType The mimeType of the favicon.
* @return True if a favicon has been found.
*/
bool getFavicon(string& content, string& mimeType) const;
/**
* Get an entry associated to an path.
*
* @param path The path of the entry.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromPath(const std::string& path) const;
/**
* Get an entry associated to an url encoded path.
*
* Equivalent to `getEntryFromPath(urlDecode(path));`
*
* @param path The url encoded path.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromEncodedPath(const std::string& path) const;
/**
* Get un entry associated to a title.
*
* @param title The title.
* @return The entry
* throw NoEntry If no entry correspond to the url.
*/
Entry getEntryFromTitle(const std::string& title) const;
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestions(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param reset If true, remove previous suggestions in the internal vector.
* If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more
* suggestion))
* @return True if some suggestions have been added to the internal vector.
*/
DEPRECATED bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are added to the `result` vector.
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param result The vector where to store the suggestions.
* @return True if some suggestions have been added to the vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& resuls);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestionsSmart(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
*/
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
* @param results The vector where to store the suggestions
* @return True if some suggestions have been added to the results.
*/
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results);
/**
* Check if the path exists in the zim file.
*
* @param path the path to check.
* @return True if the path exists in the zim file.
*/
bool pathExists(const string& path) const;
/**
* Check if the zim file has a embedded fulltext index.
*
* @return True if the zim file has a embedded fulltext index
* and is not split (else the fulltext is not accessible).
*/
bool hasFulltextIndex() const;
/**
* Get potential case title variations for a title.
*
* @param title a title.
* @return the list of variantions.
*/
std::vector<std::string> getTitleVariants(const std::string& title) const;
/**
* Get the next suggestion title.
*
* @param[out] title the title of the suggestion.
* @return True if title has been set.
*/
DEPRECATED bool getNextSuggestion(string& title);
/**
* Get the next suggestion title and url.
*
* @param[out] title the title of the suggestion.
* @param[out] url the url of the suggestion.
* @return True if title and url have been set.
*/
DEPRECATED bool getNextSuggestion(string& title, string& url);
/**
* Get if we can check zim file integrity (has a checksum).
*
* @return True if zim file have a checksum.
*/
bool canCheckIntegrity() const;
/**
* Check is zim file is corrupted.
*
* @return True if zim file is corrupted.
*/
bool isCorrupted() const;
/**
* Return the total size of the zim file.
*
* If zim file is split, return the sum of all parts' size.
*
* @return Size of the size file is KiB.
*/
unsigned int getFileSize() const;
/**
* Get the zim file handler.
*
* @return The libzim file handler.
*/
zim::Archive* getZimArchive() const;
protected:
std::shared_ptr<zim::Archive> zimArchive;
std::string zimFilePath;
SuggestionsList_t suggestions;
SuggestionsList_t::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;
// Reader is deprecated, so we've marked the constructor as deprecated.
// But we still need to construct the reader (in our deprecated code)
// To avoid warning because we use deprecated function, we create a
// constructor not deprecated. The `bool marker` is unused, it sole purpose
// is to change the signature to have a different constructor.
// This one is not deprecated and we must use it in our private code.
Reader(const std::shared_ptr<zim::Archive> archive, bool marker);
friend class Library;
};
}
#endif

View File

@ -27,7 +27,6 @@
namespace kiwix namespace kiwix
{ {
class Searcher;
class NameMapper; class NameMapper;
/** /**
* The SearcherRenderer class is used to render a search result to a html page. * The SearcherRenderer class is used to render a search result to a html page.
@ -35,17 +34,6 @@ class NameMapper;
class SearchRenderer class SearchRenderer
{ {
public: public:
/**
* Construct a SearchRenderer from a Searcher.
*
* This method is now deprecated. Construct the renderer from a
* `zim::SearchResultSet`
*
* @param searcher The `Searcher` to render.
* @param mapper The `NameMapper` to use to do the rendering.
*/
DEPRECATED SearchRenderer(Searcher* searcher, NameMapper* mapper);
/** /**
* Construct a SearchRenderer from a SearchResultSet. * Construct a SearchRenderer from a SearchResultSet.
* *

View File

@ -1,180 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SEARCHER_H
#define KIWIX_SEARCHER_H
#include <stdio.h>
#include <stdlib.h>
#include <unicode/putil.h>
#include <algorithm>
#include <cctype>
#include <locale>
#include <string>
#include <memory>
#include <vector>
#include <zim/search.h>
using namespace std;
namespace kiwix
{
class Reader;
class Result
{
public:
virtual ~Result(){};
virtual std::string get_url() = 0;
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
virtual std::string get_zimId() = 0;
};
struct SearcherInternal;
struct SuggestionInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
*
* The Searcher is now deprecated. Use libzim search feature.
*/
class Searcher
{
public:
/**
* The default constructor.
*/
DEPRECATED Searcher();
~Searcher();
/**
* Add a reader (containing embedded fulltext index) to the search.
*
* @param reader The Reader for the zim containing the fulltext index.
* @return true if the reader has been added.
* false if the reader cannot be added (no embedded fulltext index present)
*/
bool add_reader(std::shared_ptr<Reader> reader);
std::shared_ptr<Reader> get_reader(int index);
/**
* Start a search on the zim associated to the Searcher.
*
* Search results should be retrived using the getNextResult method.
*
* @param search The search query.
* @param resultStart the start offset of the search results (used for pagination).
* @param maxResultCount Maximum results to get from start (used for pagination).
* @param verbose print some info on stdout if true.
*/
void search(const std::string& search,
unsigned int resultStart,
unsigned int maxResultCount,
const bool verbose = false);
/**
* Start a geographique search.
* The search return result for entry in a disc of center latitude/longitude
* and radius distance.
*
* Search results should be retrived using the getNextResult method.
*
* @param latitude The latitude of the center point.
* @param longitude The longitude of the center point.
* @param distance The radius of the disc.
* @param resultStart the start offset of the search results (used for pagination).
* @param maxResultCount Maximum number of results to get from start (used for pagination).
* @param verbose print some info on stdout if true.
*/
void geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int maxResultCount,
const bool verbose = false);
/**
* Start a suggestion search.
* The search made depend of the "version" of the embedded index.
* - If the index is newer enough and have a title namespace, the search is
* made in the titles only.
* - Else the search is made on the whole article content.
* In any case, the search is made "partial" (as adding '*' at the end of the query)
*
* @param search The search query.
* @param verbose print some info on stdout if true.
*/
void suggestions(std::string& search, const bool verbose = false);
/**
* Get the next result of a started search.
* This is the method to use to loop hover the search results.
*/
Result* getNextResult();
/**
* Restart the previous search.
* Next call to getNextResult will return the first result.
*/
void restart_search();
/**
* Get a estimation of the result count.
*/
unsigned int getEstimatedResultCount();
/**
* Get a SearchResultSet object for current search
*/
zim::SearchResultSet getSearchResultSet();
unsigned int getResultStart() { return resultStart; }
unsigned int getMaxResultCount() { return maxResultCount; }
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex();
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int maxResultCount,
const bool verbose = false);
std::vector<std::shared_ptr<Reader>> readers;
std::unique_ptr<SearcherInternal> internal;
std::unique_ptr<SuggestionInternal> suggestionInternal;
std::string searchPattern;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int maxResultCount;
private:
void reset();
};
}
#endif

View File

@ -18,7 +18,6 @@
*/ */
#include "book.h" #include "book.h"
#include "reader.h"
#include "tools.h" #include "tools.h"
#include "tools/base64.h" #include "tools/base64.h"
@ -30,7 +29,7 @@
#include "tools/archiveTools.h" #include "tools/archiveTools.h"
#include <zim/archive.h> #include <zim/archive.h>
#include <zim/item.h>
#include <pugixml.hpp> #include <pugixml.hpp>
namespace kiwix namespace kiwix
@ -64,11 +63,6 @@ bool Book::update(const kiwix::Book& other)
return true; return true;
} }
void Book::update(const kiwix::Reader& reader)
{
update(*reader.getZimArchive());
}
void Book::update(const zim::Archive& archive) { void Book::update(const zim::Archive& archive) {
m_path = archive.getFilename(); m_path = archive.getFilename();
m_pathValid = true; m_pathValid = true;

View File

@ -1,73 +0,0 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "reader.h"
#include <time.h>
namespace kiwix
{
Entry::Entry(zim::Entry entry, bool _marker)
: entry(entry)
{
}
size_type Entry::getSize() const
{
if (entry.isRedirect()) {
return 0;
} else {
return entry.getItem().getSize();
}
}
std::string Entry::getMimetype() const
{
return entry.getItem(true).getMimetype();
}
bool Entry::isRedirect() const
{
return entry.isRedirect();
}
Entry Entry::getRedirectEntry() const
{
if ( !entry.isRedirect() ) {
throw NoEntry();
}
return Entry(entry.getRedirectEntry(), true);
}
Entry Entry::getFinalEntry() const
{
int loopCounter = 42;
auto final_entry = entry;
while (final_entry.isRedirect() && loopCounter--) {
final_entry = final_entry.getRedirectEntry();
}
// Prevent infinite loops.
if (final_entry.isRedirect()) {
throw NoEntry();
}
return Entry(final_entry, true);
}
}

View File

@ -19,7 +19,6 @@
#include "library.h" #include "library.h"
#include "book.h" #include "book.h"
#include "reader.h"
#include "libxml_dumper.h" #include "libxml_dumper.h"
#include "tools.h" #include "tools.h"
@ -278,16 +277,6 @@ const Book& Library::getBookByPath(const std::string& path) const
throw std::out_of_range(ss.str()); throw std::out_of_range(ss.str());
} }
std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
{
auto archive = getArchiveById(id);
if(archive) {
return std::shared_ptr<Reader>(new Reader(archive, true));
} else {
return nullptr;
}
}
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id) std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
{ {
try { try {

View File

@ -6,10 +6,7 @@ kiwix_sources = [
'libxml_dumper.cpp', 'libxml_dumper.cpp',
'opds_dumper.cpp', 'opds_dumper.cpp',
'downloader.cpp', 'downloader.cpp',
'reader.cpp',
'entry.cpp',
'server.cpp', 'server.cpp',
'searcher.cpp',
'search_renderer.cpp', 'search_renderer.cpp',
'subprocess.cpp', 'subprocess.cpp',
'aria2.cpp', 'aria2.cpp',

View File

@ -1,472 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "reader.h"
#include <time.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/item.h>
#include <zim/error.h>
#include "tools.h"
#include "tools/stringTools.h"
#include "tools/otherTools.h"
#include "tools/archiveTools.h"
namespace kiwix
{
/* Constructor */
Reader::Reader(const string zimFilePath)
: zimArchive(nullptr),
zimFilePath(zimFilePath)
{
string tmpZimFilePath = zimFilePath;
/* Remove potential trailing zimaa */
size_t found = tmpZimFilePath.rfind("zimaa");
if (found != string::npos && tmpZimFilePath.size() > 5
&& found == tmpZimFilePath.size() - 5) {
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
}
zimArchive.reset(new zim::Archive(tmpZimFilePath));
/* initialize random seed: */
srand(time(nullptr));
}
Reader::Reader(const std::shared_ptr<zim::Archive> archive, bool _marker)
: zimArchive(archive),
zimFilePath(archive->getFilename())
{}
#ifndef _WIN32
Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
Reader::Reader(int fd, zim::offset_type offset, zim::size_type size)
: zimArchive(new zim::Archive(fd, offset, size)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
#endif // #ifndef _WIN32
zim::Archive* Reader::getZimArchive() const
{
return zimArchive.get();
}
MimeCounterType Reader::parseCounterMetadata() const
{
return kiwix::parseArchiveCounter(*zimArchive);
}
/* Get the count of articles which can be indexed/displayed */
unsigned int Reader::getArticleCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0;
for(auto &pair:counterMap) {
if (startsWith(pair.first, "text/html")) {
counter += pair.second;
}
}
return counter;
}
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
return kiwix::getArchiveMediaCount(*zimArchive);
}
/* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() const
{
return zimArchive->getEntryCount();
}
/* Return the UID of the ZIM file */
string Reader::getId() const
{
return kiwix::getArchiveId(*zimArchive);
}
Entry Reader::getRandomPage() const
{
try {
return Entry(zimArchive->getRandomEntry(), true);
} catch(...) {
throw NoEntry();
}
}
Entry Reader::getMainPage() const
{
return Entry(zimArchive->getMainEntry(), true);
}
bool Reader::getFavicon(string& content, string& mimeType) const
{
return kiwix::getArchiveFavicon(*zimArchive, 48, content, mimeType);
}
string Reader::getZimFilePath() const
{
return zimFilePath;
}
/* Return a metatag value */
bool Reader::getMetadata(const string& name, string& value) const
{
try {
value = zimArchive->getMetadata(name);
return true;
} catch(zim::EntryNotFound& e) {
return false;
}
}
#define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
string Reader::getName() const
{
return kiwix::getMetaName(*zimArchive);
}
string Reader::getTitle() const
{
return kiwix::getArchiveTitle(*zimArchive);
}
string Reader::getCreator() const
{
return kiwix::getMetaCreator(*zimArchive);
}
string Reader::getPublisher() const
{
return kiwix::getMetaPublisher(*zimArchive);
}
string Reader::getDate() const
{
return kiwix::getMetaDate(*zimArchive);
}
string Reader::getDescription() const
{
return kiwix::getMetaDescription(*zimArchive);
}
string Reader::getLongDescription() const
{
METADATA("LongDescription")
}
string Reader::getLanguage() const
{
return kiwix::getMetaLanguage(*zimArchive);
}
string Reader::getLicense() const
{
METADATA("License")
}
string Reader::getTags(bool original) const
{
return kiwix::getMetaTags(*zimArchive, original);
}
string Reader::getTagStr(const std::string& tagName) const
{
string tags_str;
getMetadata("Tags", tags_str);
return getTagValueFromTagList(convertTags(tags_str), tagName);
}
bool Reader::getTagBool(const std::string& tagName) const
{
return convertStrToBool(getTagStr(tagName));
}
string Reader::getRelation() const
{
METADATA("Relation")
}
string Reader::getFlavour() const
{
return kiwix::getMetaFlavour(*zimArchive);
}
string Reader::getSource() const
{
METADATA("Source")
}
string Reader::getScraper() const
{
METADATA("Scraper")
}
#undef METADATA
Entry Reader::getEntryFromPath(const std::string& path) const
{
try {
return Entry(kiwix::getEntryFromPath(*zimArchive, path), true);
} catch (zim::EntryNotFound& e) {
throw NoEntry();
}
}
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
{
return getEntryFromPath(urlDecode(path, true));
}
Entry Reader::getEntryFromTitle(const std::string& title) const
{
try {
return Entry(zimArchive->getEntryByTitle(title), true);
} catch(zim::EntryNotFound& e) {
throw NoEntry();
}
}
bool Reader::pathExists(const string& path) const
{
return zimArchive->hasEntryByPath(path);
}
/* Does the ZIM file has a fulltext index */
bool Reader::hasFulltextIndex() const
{
return zimArchive->hasFulltextIndex();
}
/* Search titles by prefix */
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset)
{
/* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */
if (reset) {
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
} else {
if (this->suggestions.size() > suggestionsCount) {
return false;
}
}
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
/* Set the cursor to the begining */
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
bool retVal = false;
/* Return if no prefix */
if (prefix.size() == 0) {
return false;
}
for (auto& entry: zimArchive->findByTitle(prefix)) {
if (results.size() >= suggestionsCount) {
break;
}
/* Extract the interesting part of article title & url */
std::string normalizedArticleTitle
= kiwix::normalize(entry.getTitle());
// Get the final path.
auto item = entry.getItem(true);
std::string articleFinalUrl = item.getPath();
/* Go through all already found suggestions and skip if this
article is already in the suggestions list (with an other
title) */
bool insert = true;
std::vector<SuggestionItem>::iterator suggestionItr;
for (suggestionItr = results.begin();
suggestionItr != results.end();
suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr).getNormalizedTitle());
if (result == 0 && articleFinalUrl.compare((*suggestionItr).getPath()) == 0) {
insert = false;
break;
} else if (result < 0) {
break;
}
}
/* Insert if possible */
if (insert) {
SuggestionItem suggestion(entry.getTitle(), normalizedArticleTitle, articleFinalUrl);
results.insert(suggestionItr, suggestion);
}
/* Suggestions where found */
retVal = true;
}
return retVal;
}
std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const
{
return kiwix::getTitleVariants(title);
}
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount)
{
this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin();
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
this->suggestionsOffset = this->suggestions.begin();
return ret;
}
/* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results)
{
std::vector<std::string> variants = this->getTitleVariants(prefix);
auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive);
if (zimArchive->hasTitleIndex()) {
auto suggestionSearch = suggestionSearcher.suggest(prefix);
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
for (auto current : suggestions) {
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
} else {
// Check some of the variants of the prefix
for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end();
variantsItr++) {
auto suggestionSearch = suggestionSearcher.suggest(*variantsItr);
for (auto current : suggestionSearch.getResults(0, suggestionsCount)) {
if (results.size() >= suggestionsCount) {
break;
}
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
current.getPath(), current.getSnippet());
results.push_back(suggestion);
}
}
}
return results.size() > 0;
}
/* Get next suggestion */
bool Reader::getNextSuggestion(string& title)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset)).getTitle();
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
bool Reader::getNextSuggestion(string& title, string& url)
{
if (this->suggestionsOffset != this->suggestions.end()) {
/* title */
title = (*(this->suggestionsOffset)).getTitle();
url = (*(this->suggestionsOffset)).getPath();
/* increment the cursor for the next call */
this->suggestionsOffset++;
return true;
}
return false;
}
/* Check if the file has as checksum */
bool Reader::canCheckIntegrity() const
{
return zimArchive->hasChecksum();
}
/* Return true if corrupted, false otherwise */
bool Reader::isCorrupted() const
{
try {
if (zimArchive->check() == true) {
return false;
}
} catch (exception& e) {
cerr << e.what() << endl;
return true;
}
return true;
}
/* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const
{
return kiwix::getArchiveFileSize(*zimArchive);
}
}

View File

@ -21,8 +21,6 @@
#include <cmath> #include <cmath>
#include "search_renderer.h" #include "search_renderer.h"
#include "searcher.h"
#include "reader.h"
#include "library.h" #include "library.h"
#include "name_mapper.h" #include "name_mapper.h"
@ -38,16 +36,6 @@ namespace kiwix
{ {
/* Constructor */ /* Constructor */
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
: SearchRenderer(
/* srs */ searcher->getSearchResultSet(),
/* mapper */ mapper,
/* library */ nullptr,
/* start */ searcher->getResultStart(),
/* estimatedResultCount */ searcher->getEstimatedResultCount()
)
{}
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
unsigned int start, unsigned int estimatedResultCount) unsigned int start, unsigned int estimatedResultCount)
: SearchRenderer(srs, mapper, nullptr, start, estimatedResultCount) : SearchRenderer(srs, mapper, nullptr, start, estimatedResultCount)

View File

@ -1,330 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "searcher.h"
#include "reader.h"
#include <zim/search.h>
#include <zim/suggestion.h>
#include <mustache.hpp>
#include <cmath>
#include "tools/stringTools.h"
#include "kiwixlib-resources.h"
#define MAX_SEARCH_LEN 140
namespace kiwix
{
class _Result : public Result
{
public:
_Result(zim::SearchResultSet::iterator iterator);
_Result(SuggestionItem suggestionItem);
virtual ~_Result(){};
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual std::string get_content();
virtual int get_wordCount();
virtual int get_size();
virtual std::string get_zimId();
private:
zim::SearchResultSet::iterator iterator;
SuggestionItem suggestionItem;
bool isSuggestion;
};
struct SearcherInternal : zim::SearchResultSet {
explicit SearcherInternal(const zim::SearchResultSet& srs)
: zim::SearchResultSet(srs)
, current_iterator(srs.begin())
{
}
zim::SearchResultSet::iterator current_iterator;
};
struct SuggestionInternal : zim::SuggestionResultSet {
explicit SuggestionInternal(const zim::SuggestionResultSet& srs)
: zim::SuggestionResultSet(srs),
currentIterator(srs.begin()) {}
zim::SuggestionResultSet::iterator currentIterator;
};
/* Constructor */
Searcher::Searcher()
: searchPattern(""),
estimatedResultCount(0),
resultStart(0),
maxResultCount(0)
{
loadICUExternalTables();
}
/* Destructor */
Searcher::~Searcher()
{
}
bool Searcher::add_reader(std::shared_ptr<Reader> reader)
{
if (!reader->hasFulltextIndex()) {
return false;
}
for ( auto existing_reader : readers ) {
if ( existing_reader->getZimArchive()->getUuid() == reader->getZimArchive()->getUuid() )
return false;
}
this->readers.push_back(reader);
return true;
}
std::shared_ptr<Reader> Searcher::get_reader(int readerIndex)
{
return readers.at(readerIndex);
}
/* Search strings in the database */
void Searcher::search(const std::string& search,
unsigned int resultStart,
unsigned int maxResultCount,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing query `" << search << "'" << endl;
}
this->searchPattern = search;
this->resultStart = resultStart;
this->maxResultCount = maxResultCount;
/* Try to find results */
if (maxResultCount != 0) {
/* Perform the search */
string unaccentedSearch = removeAccents(search);
std::vector<zim::Archive> archives;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
if ( (*current)->hasFulltextIndex() ) {
archives.push_back(*(*current)->getZimArchive());
}
}
zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query;
query.setQuery(unaccentedSearch);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
}
return;
}
void Searcher::geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int maxResultCount,
const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}
/* Perform the search */
std::ostringstream oss;
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
this->searchPattern = oss.str();
this->resultStart = resultStart;
this->maxResultCount = maxResultCount;
/* Try to find results */
if (maxResultCount == 0) {
return;
}
std::vector<zim::Archive> archives;
for (auto current = this->readers.begin(); current != this->readers.end();
current++) {
archives.push_back(*(*current)->getZimArchive());
}
zim::Searcher searcher(archives);
searcher.setVerbose(verbose);
zim::Query query;
query.setQuery("");
query.setGeorange(latitude, longitude, distance);
zim::Search search = searcher.search(query);
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
}
void Searcher::restart_search()
{
if (internal.get()) {
internal->current_iterator = internal->begin();
}
}
Result* Searcher::getNextResult()
{
if (internal.get() && internal->current_iterator != internal->end()) {
Result* result = new _Result(internal->current_iterator);
internal->current_iterator++;
return result;
} else if (suggestionInternal.get() &&
suggestionInternal->currentIterator != suggestionInternal->end()) {
SuggestionItem item(
suggestionInternal->currentIterator->getTitle(),
normalize(suggestionInternal->currentIterator->getTitle()),
suggestionInternal->currentIterator->getPath(),
suggestionInternal->currentIterator->getSnippet()
);
Result* result = new _Result(item);
suggestionInternal->currentIterator++;
return result;
}
return NULL;
}
/* Reset the results */
void Searcher::reset()
{
this->estimatedResultCount = 0;
this->searchPattern = "";
return;
}
void Searcher::suggestions(std::string& searchPattern, const bool verbose)
{
this->reset();
if (verbose == true) {
cout << "Performing suggestion query `" << searchPattern << "`" << endl;
}
this->searchPattern = searchPattern;
this->resultStart = 0;
this->maxResultCount = 10;
string unaccentedSearch = removeAccents(searchPattern);
// Multizim suggestion is not supported as of now! taking only one archive
zim::Archive archive = *(*this->readers.begin())->getZimArchive();
zim::SuggestionSearcher searcher(archive);
searcher.setVerbose(verbose);
zim::SuggestionSearch search = searcher.suggest(searchPattern);
suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount)));
this->estimatedResultCount = search.getEstimatedMatches();
}
/* Return the result count estimation */
unsigned int Searcher::getEstimatedResultCount()
{
return this->estimatedResultCount;
}
zim::SearchResultSet Searcher::getSearchResultSet()
{
return *(this->internal);
}
_Result::_Result(zim::SearchResultSet::iterator iterator)
: iterator(iterator),
suggestionItem("", "", ""),
isSuggestion(false)
{}
_Result::_Result(SuggestionItem item)
: iterator(),
suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()),
isSuggestion(true)
{}
std::string _Result::get_url()
{
if (isSuggestion) {
return suggestionItem.getPath();
}
return iterator.getPath();
}
std::string _Result::get_title()
{
if (isSuggestion) {
return suggestionItem.getTitle();
}
return iterator.getTitle();
}
int _Result::get_score()
{
if (isSuggestion) {
return 0;
}
return iterator.getScore();
}
std::string _Result::get_snippet()
{
if (isSuggestion) {
return suggestionItem.getSnippet();
}
return iterator.getSnippet();
}
std::string _Result::get_content()
{
if (isSuggestion) return "";
return iterator->getItem(true).getData();
}
int _Result::get_size()
{
if (isSuggestion) {
return 0;
}
return iterator.getSize();
}
int _Result::get_wordCount()
{
if (isSuggestion) {
return 0;
}
return iterator.getWordCount();
}
std::string _Result::get_zimId()
{
if (isSuggestion) {
return "";
}
std::ostringstream s;
s << iterator.getZimId();
return s.str();
}
}

View File

@ -24,6 +24,7 @@
#include <string> #include <string>
#include <zim/item.h>
#include "server/internalServer.h" #include "server/internalServer.h"
namespace kiwix { namespace kiwix {

View File

@ -51,8 +51,6 @@ extern "C" {
#include "tools/networkTools.h" #include "tools/networkTools.h"
#include "library.h" #include "library.h"
#include "name_mapper.h" #include "name_mapper.h"
#include "entry.h"
#include "searcher.h"
#include "search_renderer.h" #include "search_renderer.h"
#include "opds_dumper.h" #include "opds_dumper.h"
#include "i18n.h" #include "i18n.h"
@ -61,6 +59,7 @@ extern "C" {
#include <zim/error.h> #include <zim/error.h>
#include <zim/entry.h> #include <zim/entry.h>
#include <zim/item.h> #include <zim/item.h>
#include <zim/suggestion.h>
#include <mustache.hpp> #include <mustache.hpp>
@ -618,40 +617,6 @@ std::unique_ptr<Response> InternalServer::build_homepage(const RequestContext& r
* Archive and Zim handlers begin * Archive and Zim handlers begin
**/ **/
SuggestionsList_t getSuggestions(SuggestionSearcherCache& cache, const zim::Archive* const archive,
const std::string& bookId, const std::string& queryString, int start, int suggestionCount)
{
SuggestionsList_t suggestions;
std::shared_ptr<zim::SuggestionSearcher> searcher;
searcher = cache.getOrPut(bookId, [=](){ return make_shared<zim::SuggestionSearcher>(*archive); });
if (archive->hasTitleIndex()) {
auto search = searcher->suggest(queryString);
auto srs = search.getResults(start, suggestionCount);
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath(), it.getSnippet());
suggestions.push_back(suggestion);
}
} else {
// TODO: This case should be handled by libzim
std::vector<std::string> variants = getTitleVariants(queryString);
int currCount = 0;
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
auto search = searcher->suggest(queryString);
auto srs = search.getResults(0, suggestionCount);
for (auto it : srs) {
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
it.getPath());
suggestions.push_back(suggestion);
currCount++;
}
}
}
return suggestions;
}
std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& request) std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& request)
{ {
if (m_verbose.load()) { if (m_verbose.load()) {
@ -690,9 +655,13 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
bool first = true; bool first = true;
/* Get the suggestions */ /* Get the suggestions */
SuggestionsList_t suggestions = getSuggestions(suggestionSearcherCache, archive.get(), auto searcher = suggestionSearcherCache.getOrPut(bookId,
bookId, queryString, start, count); [=](){ return make_shared<zim::SuggestionSearcher>(*archive); }
for(auto& suggestion:suggestions) { );
auto search = searcher->suggest(queryString);
auto srs = search.getResults(start, count);
for(auto& suggestion: srs) {
MustacheData result; MustacheData result;
result.set("label", suggestion.getTitle()); result.set("label", suggestion.getTitle());

View File

@ -91,7 +91,6 @@ typedef kainjow::mustache::data MustacheData;
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache; typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache; typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache;
class Entry;
class OPDSDumper; class OPDSDumper;
class InternalServer { class InternalServer {

View File

@ -26,10 +26,11 @@
#include <mustache.hpp> #include <mustache.hpp>
#include "byte_range.h" #include "byte_range.h"
#include "entry.h"
#include "etag.h" #include "etag.h"
#include "i18n.h" #include "i18n.h"
#include <zim/item.h>
extern "C" { extern "C" {
#include "microhttpd_wrapper.h" #include "microhttpd_wrapper.h"
} }

View File

@ -3,6 +3,8 @@
#include "../include/server.h" #include "../include/server.h"
#include "../include/name_mapper.h" #include "../include/name_mapper.h"
#include "../include/tools.h" #include "../include/tools.h"
#include <zim/entry.h>
#include <zim/item.h>
// Output generated via mustache templates sometimes contains end-of-line // Output generated via mustache templates sometimes contains end-of-line
// whitespace. This complicates representing the expected output of a unit-test // whitespace. This complicates representing the expected output of a unit-test