Remove libzim's wrapper.

It is time to remove them. They are deprecated since 10.0.0
This commit is contained in:
Matthieu Gautier
2022-06-22 17:50:31 +02:00
committed by Emmanuel Engelhart
parent 12e0fb6934
commit 69931fb347
21 changed files with 14 additions and 1848 deletions

View File

@ -38,7 +38,6 @@ namespace kiwix
{
class OPDSDumper;
class Reader;
/**
* A class to store information about a book (a zim file)
@ -69,7 +68,6 @@ class Book
~Book();
bool update(const Book& other);
DEPRECATED void update(const Reader& reader);
void update(const zim::Archive& archive);
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);

View File

@ -1,193 +0,0 @@
/*
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_ENTRY_H
#define KIWIX_ENTRY_H
#include <stdio.h>
#include <zim/entry.h>
#include <zim/item.h>
#include <exception>
#include <string>
#include "common.h"
using namespace std;
namespace kiwix
{
class NoEntry : public std::exception {};
/**
* A entry represent an.. entry in a zim file.
*/
class Entry
{
public:
/**
* Construct an entry making reference to an zim article.
*
* @param article a zim::Article object
*/
DEPRECATED Entry(zim::Entry entry) : Entry(entry, true) {};
virtual ~Entry() = default;
/**
* Get the path of the entry.
*
* The path is the "key" of an entry.
*
* @return the path of the entry.
*/
std::string getPath() const { return entry.getPath(); }
/**
* Get the title of the entry.
*
* @return the title of the entry.
*/
std::string getTitle() const { return entry.getTitle(); }
/**
* Get the content of the entry.
*
* The string is a copy of the content.
* If you don't want to do a copy, use get_blob.
*
* @return the content of the entry.
*/
std::string getContent() const { return entry.getItem().getData(); }
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset = 0) const { return entry.getItem().getData(offset); }
/**
* Get the blob of the entry.
*
* A blob make reference to the content without copying it.
*
* @param offset The starting offset of the blob.
* @param size The size of the blob.
* @return the blob of the entry.
*/
zim::Blob getBlob(offset_type offset, size_type size) const { return entry.getItem().getData(offset, size); }
/**
* Get the info for direct access to the content of the entry.
*
* Some entry (ie binary ones) have their content plain stored
* in the zim file. Knowing the offset where the content is stored
* an user can directly read the content in the zim file bypassing the
* libkiwix/libzim.
*
* @return A pair specifying where to read the content.
* The string is the real file to read (may be different that .zim
* file if zim is cut).
* The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly.
*/
zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
/**
* Get the size of the entry.
*
* @return the size of the entry.
*/
size_type getSize() const;
/**
* Get the mime_type of the entry.
*
* @return the mime_type of the entry.
*/
std::string getMimetype() const;
/**
* Get if the entry is a redirect entry.
*
* @return True if the entry is a redirect.
*/
bool isRedirect() const;
/**
* Get if the entry is a link target entry.
*
* @return True if the entry is a link target.
*/
bool isLinkTarget() const;
/**
* Get if the entry is a deleted entry.
*
* @return True if the entry is a deleted entry.
*/
bool isDeleted() const;
/**
* Get the entry pointed by this entry.
*
* @return the entry pointed.
* @throw NoEntry if the entry is not a redirected entry.
*/
Entry getRedirectEntry() const;
/**
* Get the final entry pointed by this entry.
*
* Follow the redirection until a "not redirecting" entry is found.
* If the entry is not a redirected entry, return the entry itself.
*
* @return the final entry.
*/
Entry getFinalEntry() const;
/**
* Get the zim entry wrapped by this (kiwix) entry
*
* @return the zim entry
*/
const zim::Entry& getZimEntry() const { return entry; }
private:
zim::Entry entry;
private:
// Entry is deprecated, so we've marked the constructor as deprecated.
// But we still need to construct the entry (in our deprecated code)
// To avoid warning because we use deprecated function, we create a second
// constructor not deprecated. The `bool marker` is unused, it sole purpose
// is to change the signature to have two different constructor.
// This one is not deprecated and we must use it in our private code.
Entry(zim::Entry entry, bool marker);
friend class Reader;
};
}
#endif // KIWIX_ENTRY_H

View File

@ -223,7 +223,6 @@ class Library
Book getBookByIdThreadSafe(const std::string& id) const;
DEPRECATED std::shared_ptr<Reader> getReaderById(const std::string& id);
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) {
return getSearcherByIds(BookIdSet{id});

View File

@ -22,7 +22,6 @@
#include "book.h"
#include "library.h"
#include "reader.h"
#include <string>
#include <vector>

View File

@ -7,9 +7,6 @@ headers = [
'libxml_dumper.h',
'opds_dumper.h',
'downloader.h',
'reader.h',
'entry.h',
'searcher.h',
'search_renderer.h',
'server.h',
'kiwixserve.h',

View File

@ -27,7 +27,6 @@
#include <pugixml.hpp>
#include "library.h"
#include "reader.h"
using namespace std;

View File

@ -1,506 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_READER_H
#define KIWIX_READER_H
#include <stdio.h>
#include <zim/zim.h>
#include <zim/archive.h>
#include <exception>
#include <map>
#include <sstream>
#include <string>
#include "common.h"
#include "entry.h"
using namespace std;
namespace kiwix
{
/**
* The SuggestionItem is a helper class that contains the info about a single
* suggestion item.
*/
class SuggestionItem
{
// Functions
public:
// Create a sugggestion item.
explicit SuggestionItem(const std::string& title, const std::string& normalizedTitle,
const std::string& path, const std::string& snippet = "") :
title(title),
normalizedTitle(normalizedTitle),
path(path),
snippet(snippet) {}
public:
const std::string& getTitle() const { return title;}
const std::string& getNormalizedTitle() const { return normalizedTitle;}
const std::string& getPath() const { return path;}
const std::string& getSnippet() const { return snippet;}
bool hasSnippet() const { return !snippet.empty();}
// Data
private:
std::string title;
std::string normalizedTitle;
std::string path;
std::string snippet;
};
/**
* The Reader class is the class who allow to get an entry content from a zim
* file.
*
* Reader is now deprecated. Directly use `zim::Archive`.
*/
using SuggestionsList_t = std::vector<SuggestionItem>;
class Reader
{
public:
/**
* Create a Reader to read a zim file specified by zimFilePath.
*
* @param zimFilePath The path to the zim file to read.
* The zim file can be splitted (.zimaa, .zimab, ...).
* In this case, the file path must still point to the
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
explicit DEPRECATED Reader(const string zimFilePath);
/**
* Create a Reader to read a zim file given by the Archive.
*
* @param archive The shared pointer to the Archive object.
*/
explicit DEPRECATED Reader(const std::shared_ptr<zim::Archive> archive)
: Reader(archive, true) {};
#ifndef _WIN32
explicit DEPRECATED Reader(int fd);
DEPRECATED Reader(int fd, zim::offset_type offset, zim::size_type size);
#endif
~Reader() = default;
/**
* Get the number of "displayable" entries in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'text/html' MIMEtype specified in the metadata.
* Else return the number of entries in the 'A' namespace.
*/
unsigned int getArticleCount() const;
/**
* Get the number of media in the zim file.
*
* @return If the zim file has a /M/Counter metadata, return the number of
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
* the metadata.
* Else return the number of entries in the 'I' namespace.
*/
unsigned int getMediaCount() const;
/**
* Get the number of all entries in the zim file.
*
* @return Return the number of all the entries, whatever their MIMEtype or
* their namespace.
*/
unsigned int getGlobalCount() const;
/**
* Get the path of the zim file.
*
* @return the path of the zim file as given in the constructor.
*/
string getZimFilePath() const;
/**
* Get the Id of the zim file.
*
* @return The uuid stored in the zim file.
*/
string getId() const;
/**
* Get a random page.
*
* @return A random Entry. The entry is picked from all entries in
* the 'A' namespace.
* The main entry is excluded from the potential results.
*/
Entry getRandomPage() const;
/**
* Get the entry of the main page.
*
* @return Entry of the main page as specified in the zim file.
*/
Entry getMainPage() const;
/**
* Get the content of a metadata.
*
* @param[in] name The name of the metadata.
* @param[out] value The value will be set to the content of the metadata.
* @return True if it was possible to get the content of the metadata.
*/
bool getMetadata(const string& name, string& value) const;
/**
* Get the name of the zim file.
*
* @return The name of the zim file as specified in the zim metadata.
*/
string getName() const;
/**
* Get the title of the zim file.
*
* @return The title of zim file as specified in the zim metadata.
* If no title has been set, return a title computed from the
* file path.
*/
string getTitle() const;
/**
* Get the creator of the zim file.
*
* @return The creator of the zim file as specified in the zim metadata.
*/
string getCreator() const;
/**
* Get the publisher of the zim file.
*
* @return The publisher of the zim file as specified in the zim metadata.
*/
string getPublisher() const;
/**
* Get the date of the zim file.
*
* @return The date of the zim file as specified in the zim metadata.
*/
string getDate() const;
/**
* Get the description of the zim file.
*
* @return The description of the zim file as specified in the zim metadata.
* If no description has been set, return the subtitle.
*/
string getDescription() const;
/**
* Get the long description of the zim file.
*
* @return The long description of the zim file as specifed in the zim metadata.
*/
string getLongDescription() const;
/**
* Get the language of the zim file.
*
* @return The language of the zim file as specified in the zim metadata.
*/
string getLanguage() const;
/**
* Get the license of the zim file.
*
* @return The license of the zim file as specified in the zim metadata.
*/
string getLicense() const;
/**
* Get the tags of the zim file.
*
* @param original If true, return the original tags as specified in the zim metadata.
* Else, try to convert it to the new 'normalized' format.
* @return The tags of the zim file.
*/
string getTags(bool original=false) const;
/**
* Get the value (as a string) of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
*/
string getTagStr(const std::string& tagName) const;
/**
* Get the boolean value of a specific tag.
*
* According to https://wiki.openzim.org/wiki/Tags
*
* @return The boolean value of the specified tag.
* @throw std::out_of_range if the specified tag is not found.
* std::domain_error if the value of the tag cannot be convert to bool.
*/
bool getTagBool(const std::string& tagName) const;
/**
* Get the relations of the zim file.
*
* @return The relation of the zim file as specified in the zim metadata.
*/
string getRelation() const;
/**
* Get the flavour of the zim file.
*
* @return The flavour of the zim file as specified in the zim metadata.
*/
string getFlavour() const;
/**
* Get the source of the zim file.
*
* @return The source of the zim file as specified in the zim metadata.
*/
string getSource() const;
/**
* Get the scraper of the zim file.
*
* @return The scraper of the zim file as specified in the zim metadata.
*/
string getScraper() const;
/**
* Get the favicon of the zim file.
*
* @param[out] content The content of the favicon.
* @param[out] mimeType The mimeType of the favicon.
* @return True if a favicon has been found.
*/
bool getFavicon(string& content, string& mimeType) const;
/**
* Get an entry associated to an path.
*
* @param path The path of the entry.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromPath(const std::string& path) const;
/**
* Get an entry associated to an url encoded path.
*
* Equivalent to `getEntryFromPath(urlDecode(path));`
*
* @param path The url encoded path.
* @return The entry.
* @throw NoEntry If no entry correspond to the path.
*/
Entry getEntryFromEncodedPath(const std::string& path) const;
/**
* Get un entry associated to a title.
*
* @param title The title.
* @return The entry
* throw NoEntry If no entry correspond to the url.
*/
Entry getEntryFromTitle(const std::string& title) const;
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are stored in an internal vector and can be retrieved using
* `getNextSuggestion` method.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestions(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param reset If true, remove previous suggestions in the internal vector.
* If false, add suggestions to the internal vector
* (until internal vector size is suggestionCount (or no more
* suggestion))
* @return True if some suggestions have been added to the internal vector.
*/
DEPRECATED bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
/**
* Search for entries with title starting with prefix (case sensitive).
*
* Suggestions are added to the `result` vector.
*
* @param prefix The prefix to search.
* @param suggestionsCount How many suggestions to search for.
* @param result The vector where to store the suggestions.
* @return True if some suggestions have been added to the vector.
*/
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& resuls);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
* This method is not thread safe and is deprecated. Use :
* bool searchSuggestionsSmart(const string& prefix,
* unsigned int suggestionsCount,
* SuggestionsList_t& results);
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
*/
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
/**
* Search for entries for the given prefix.
*
* If the zim file has a internal fulltext index, the suggestions will be
* searched using it.
* Else the suggestions will be search using `searchSuggestions` while trying
* to be smart about case sensitivity (using `getTitleVariants`).
*
* In any case, suggestions are stored in an internal vector and can be
* retrieved using `getNextSuggestion` method.
* The internal vector will be reset.
*
* @param prefix The prefix to search for.
* @param suggestionsCount How many suggestions to search for.
* @param results The vector where to store the suggestions
* @return True if some suggestions have been added to the results.
*/
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount,
SuggestionsList_t& results);
/**
* Check if the path exists in the zim file.
*
* @param path the path to check.
* @return True if the path exists in the zim file.
*/
bool pathExists(const string& path) const;
/**
* Check if the zim file has a embedded fulltext index.
*
* @return True if the zim file has a embedded fulltext index
* and is not split (else the fulltext is not accessible).
*/
bool hasFulltextIndex() const;
/**
* Get potential case title variations for a title.
*
* @param title a title.
* @return the list of variantions.
*/
std::vector<std::string> getTitleVariants(const std::string& title) const;
/**
* Get the next suggestion title.
*
* @param[out] title the title of the suggestion.
* @return True if title has been set.
*/
DEPRECATED bool getNextSuggestion(string& title);
/**
* Get the next suggestion title and url.
*
* @param[out] title the title of the suggestion.
* @param[out] url the url of the suggestion.
* @return True if title and url have been set.
*/
DEPRECATED bool getNextSuggestion(string& title, string& url);
/**
* Get if we can check zim file integrity (has a checksum).
*
* @return True if zim file have a checksum.
*/
bool canCheckIntegrity() const;
/**
* Check is zim file is corrupted.
*
* @return True if zim file is corrupted.
*/
bool isCorrupted() const;
/**
* Return the total size of the zim file.
*
* If zim file is split, return the sum of all parts' size.
*
* @return Size of the size file is KiB.
*/
unsigned int getFileSize() const;
/**
* Get the zim file handler.
*
* @return The libzim file handler.
*/
zim::Archive* getZimArchive() const;
protected:
std::shared_ptr<zim::Archive> zimArchive;
std::string zimFilePath;
SuggestionsList_t suggestions;
SuggestionsList_t::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;
// Reader is deprecated, so we've marked the constructor as deprecated.
// But we still need to construct the reader (in our deprecated code)
// To avoid warning because we use deprecated function, we create a
// constructor not deprecated. The `bool marker` is unused, it sole purpose
// is to change the signature to have a different constructor.
// This one is not deprecated and we must use it in our private code.
Reader(const std::shared_ptr<zim::Archive> archive, bool marker);
friend class Library;
};
}
#endif

View File

@ -27,7 +27,6 @@
namespace kiwix
{
class Searcher;
class NameMapper;
/**
* The SearcherRenderer class is used to render a search result to a html page.
@ -35,17 +34,6 @@ class NameMapper;
class SearchRenderer
{
public:
/**
* Construct a SearchRenderer from a Searcher.
*
* This method is now deprecated. Construct the renderer from a
* `zim::SearchResultSet`
*
* @param searcher The `Searcher` to render.
* @param mapper The `NameMapper` to use to do the rendering.
*/
DEPRECATED SearchRenderer(Searcher* searcher, NameMapper* mapper);
/**
* Construct a SearchRenderer from a SearchResultSet.
*

View File

@ -1,180 +0,0 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef KIWIX_SEARCHER_H
#define KIWIX_SEARCHER_H
#include <stdio.h>
#include <stdlib.h>
#include <unicode/putil.h>
#include <algorithm>
#include <cctype>
#include <locale>
#include <string>
#include <memory>
#include <vector>
#include <zim/search.h>
using namespace std;
namespace kiwix
{
class Reader;
class Result
{
public:
virtual ~Result(){};
virtual std::string get_url() = 0;
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual std::string get_content() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
virtual std::string get_zimId() = 0;
};
struct SearcherInternal;
struct SuggestionInternal;
/**
* The Searcher class is reponsible to do different kind of search using the
* fulltext index.
*
* The Searcher is now deprecated. Use libzim search feature.
*/
class Searcher
{
public:
/**
* The default constructor.
*/
DEPRECATED Searcher();
~Searcher();
/**
* Add a reader (containing embedded fulltext index) to the search.
*
* @param reader The Reader for the zim containing the fulltext index.
* @return true if the reader has been added.
* false if the reader cannot be added (no embedded fulltext index present)
*/
bool add_reader(std::shared_ptr<Reader> reader);
std::shared_ptr<Reader> get_reader(int index);
/**
* Start a search on the zim associated to the Searcher.
*
* Search results should be retrived using the getNextResult method.
*
* @param search The search query.
* @param resultStart the start offset of the search results (used for pagination).
* @param maxResultCount Maximum results to get from start (used for pagination).
* @param verbose print some info on stdout if true.
*/
void search(const std::string& search,
unsigned int resultStart,
unsigned int maxResultCount,
const bool verbose = false);
/**
* Start a geographique search.
* The search return result for entry in a disc of center latitude/longitude
* and radius distance.
*
* Search results should be retrived using the getNextResult method.
*
* @param latitude The latitude of the center point.
* @param longitude The longitude of the center point.
* @param distance The radius of the disc.
* @param resultStart the start offset of the search results (used for pagination).
* @param maxResultCount Maximum number of results to get from start (used for pagination).
* @param verbose print some info on stdout if true.
*/
void geo_search(float latitude, float longitude, float distance,
unsigned int resultStart,
unsigned int maxResultCount,
const bool verbose = false);
/**
* Start a suggestion search.
* The search made depend of the "version" of the embedded index.
* - If the index is newer enough and have a title namespace, the search is
* made in the titles only.
* - Else the search is made on the whole article content.
* In any case, the search is made "partial" (as adding '*' at the end of the query)
*
* @param search The search query.
* @param verbose print some info on stdout if true.
*/
void suggestions(std::string& search, const bool verbose = false);
/**
* Get the next result of a started search.
* This is the method to use to loop hover the search results.
*/
Result* getNextResult();
/**
* Restart the previous search.
* Next call to getNextResult will return the first result.
*/
void restart_search();
/**
* Get a estimation of the result count.
*/
unsigned int getEstimatedResultCount();
/**
* Get a SearchResultSet object for current search
*/
zim::SearchResultSet getSearchResultSet();
unsigned int getResultStart() { return resultStart; }
unsigned int getMaxResultCount() { return maxResultCount; }
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex();
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int maxResultCount,
const bool verbose = false);
std::vector<std::shared_ptr<Reader>> readers;
std::unique_ptr<SearcherInternal> internal;
std::unique_ptr<SuggestionInternal> suggestionInternal;
std::string searchPattern;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int maxResultCount;
private:
void reset();
};
}
#endif