mirror of https://github.com/kiwix/libkiwix.git
Remove libzim's wrapper.
It is time to remove them. They are deprecated since 10.0.0
This commit is contained in:
parent
12e0fb6934
commit
69931fb347
|
@ -38,7 +38,6 @@ namespace kiwix
|
||||||
{
|
{
|
||||||
|
|
||||||
class OPDSDumper;
|
class OPDSDumper;
|
||||||
class Reader;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class to store information about a book (a zim file)
|
* A class to store information about a book (a zim file)
|
||||||
|
@ -69,7 +68,6 @@ class Book
|
||||||
~Book();
|
~Book();
|
||||||
|
|
||||||
bool update(const Book& other);
|
bool update(const Book& other);
|
||||||
DEPRECATED void update(const Reader& reader);
|
|
||||||
void update(const zim::Archive& archive);
|
void update(const zim::Archive& archive);
|
||||||
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
|
void updateFromXml(const pugi::xml_node& node, const std::string& baseDir);
|
||||||
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
|
void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost);
|
||||||
|
|
193
include/entry.h
193
include/entry.h
|
@ -1,193 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
* MA 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KIWIX_ENTRY_H
|
|
||||||
#define KIWIX_ENTRY_H
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <zim/entry.h>
|
|
||||||
#include <zim/item.h>
|
|
||||||
#include <exception>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace kiwix
|
|
||||||
{
|
|
||||||
|
|
||||||
|
|
||||||
class NoEntry : public std::exception {};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A entry represent an.. entry in a zim file.
|
|
||||||
*/
|
|
||||||
class Entry
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Construct an entry making reference to an zim article.
|
|
||||||
*
|
|
||||||
* @param article a zim::Article object
|
|
||||||
*/
|
|
||||||
DEPRECATED Entry(zim::Entry entry) : Entry(entry, true) {};
|
|
||||||
virtual ~Entry() = default;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the path of the entry.
|
|
||||||
*
|
|
||||||
* The path is the "key" of an entry.
|
|
||||||
*
|
|
||||||
* @return the path of the entry.
|
|
||||||
*/
|
|
||||||
std::string getPath() const { return entry.getPath(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the title of the entry.
|
|
||||||
*
|
|
||||||
* @return the title of the entry.
|
|
||||||
*/
|
|
||||||
std::string getTitle() const { return entry.getTitle(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the content of the entry.
|
|
||||||
*
|
|
||||||
* The string is a copy of the content.
|
|
||||||
* If you don't want to do a copy, use get_blob.
|
|
||||||
*
|
|
||||||
* @return the content of the entry.
|
|
||||||
*/
|
|
||||||
std::string getContent() const { return entry.getItem().getData(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the blob of the entry.
|
|
||||||
*
|
|
||||||
* A blob make reference to the content without copying it.
|
|
||||||
*
|
|
||||||
* @param offset The starting offset of the blob.
|
|
||||||
* @return the blob of the entry.
|
|
||||||
*/
|
|
||||||
zim::Blob getBlob(offset_type offset = 0) const { return entry.getItem().getData(offset); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the blob of the entry.
|
|
||||||
*
|
|
||||||
* A blob make reference to the content without copying it.
|
|
||||||
*
|
|
||||||
* @param offset The starting offset of the blob.
|
|
||||||
* @param size The size of the blob.
|
|
||||||
* @return the blob of the entry.
|
|
||||||
*/
|
|
||||||
zim::Blob getBlob(offset_type offset, size_type size) const { return entry.getItem().getData(offset, size); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the info for direct access to the content of the entry.
|
|
||||||
*
|
|
||||||
* Some entry (ie binary ones) have their content plain stored
|
|
||||||
* in the zim file. Knowing the offset where the content is stored
|
|
||||||
* an user can directly read the content in the zim file bypassing the
|
|
||||||
* libkiwix/libzim.
|
|
||||||
*
|
|
||||||
* @return A pair specifying where to read the content.
|
|
||||||
* The string is the real file to read (may be different that .zim
|
|
||||||
* file if zim is cut).
|
|
||||||
* The offset is the offset to read in the file.
|
|
||||||
* Return <"",0> if is not possible to read directly.
|
|
||||||
*/
|
|
||||||
zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the size of the entry.
|
|
||||||
*
|
|
||||||
* @return the size of the entry.
|
|
||||||
*/
|
|
||||||
size_type getSize() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the mime_type of the entry.
|
|
||||||
*
|
|
||||||
* @return the mime_type of the entry.
|
|
||||||
*/
|
|
||||||
std::string getMimetype() const;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get if the entry is a redirect entry.
|
|
||||||
*
|
|
||||||
* @return True if the entry is a redirect.
|
|
||||||
*/
|
|
||||||
bool isRedirect() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get if the entry is a link target entry.
|
|
||||||
*
|
|
||||||
* @return True if the entry is a link target.
|
|
||||||
*/
|
|
||||||
bool isLinkTarget() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get if the entry is a deleted entry.
|
|
||||||
*
|
|
||||||
* @return True if the entry is a deleted entry.
|
|
||||||
*/
|
|
||||||
bool isDeleted() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the entry pointed by this entry.
|
|
||||||
*
|
|
||||||
* @return the entry pointed.
|
|
||||||
* @throw NoEntry if the entry is not a redirected entry.
|
|
||||||
*/
|
|
||||||
Entry getRedirectEntry() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the final entry pointed by this entry.
|
|
||||||
*
|
|
||||||
* Follow the redirection until a "not redirecting" entry is found.
|
|
||||||
* If the entry is not a redirected entry, return the entry itself.
|
|
||||||
*
|
|
||||||
* @return the final entry.
|
|
||||||
*/
|
|
||||||
Entry getFinalEntry() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the zim entry wrapped by this (kiwix) entry
|
|
||||||
*
|
|
||||||
* @return the zim entry
|
|
||||||
*/
|
|
||||||
const zim::Entry& getZimEntry() const { return entry; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
zim::Entry entry;
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Entry is deprecated, so we've marked the constructor as deprecated.
|
|
||||||
// But we still need to construct the entry (in our deprecated code)
|
|
||||||
// To avoid warning because we use deprecated function, we create a second
|
|
||||||
// constructor not deprecated. The `bool marker` is unused, it sole purpose
|
|
||||||
// is to change the signature to have two different constructor.
|
|
||||||
// This one is not deprecated and we must use it in our private code.
|
|
||||||
Entry(zim::Entry entry, bool marker);
|
|
||||||
friend class Reader;
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // KIWIX_ENTRY_H
|
|
|
@ -223,7 +223,6 @@ class Library
|
||||||
|
|
||||||
Book getBookByIdThreadSafe(const std::string& id) const;
|
Book getBookByIdThreadSafe(const std::string& id) const;
|
||||||
|
|
||||||
DEPRECATED std::shared_ptr<Reader> getReaderById(const std::string& id);
|
|
||||||
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
|
std::shared_ptr<zim::Archive> getArchiveById(const std::string& id);
|
||||||
std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) {
|
std::shared_ptr<ZimSearcher> getSearcherById(const std::string& id) {
|
||||||
return getSearcherByIds(BookIdSet{id});
|
return getSearcherByIds(BookIdSet{id});
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
|
|
||||||
#include "book.h"
|
#include "book.h"
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "reader.h"
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
|
@ -7,9 +7,6 @@ headers = [
|
||||||
'libxml_dumper.h',
|
'libxml_dumper.h',
|
||||||
'opds_dumper.h',
|
'opds_dumper.h',
|
||||||
'downloader.h',
|
'downloader.h',
|
||||||
'reader.h',
|
|
||||||
'entry.h',
|
|
||||||
'searcher.h',
|
|
||||||
'search_renderer.h',
|
'search_renderer.h',
|
||||||
'server.h',
|
'server.h',
|
||||||
'kiwixserve.h',
|
'kiwixserve.h',
|
||||||
|
|
|
@ -27,7 +27,6 @@
|
||||||
#include <pugixml.hpp>
|
#include <pugixml.hpp>
|
||||||
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "reader.h"
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
506
include/reader.h
506
include/reader.h
|
@ -1,506 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
* MA 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KIWIX_READER_H
|
|
||||||
#define KIWIX_READER_H
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <zim/zim.h>
|
|
||||||
#include <zim/archive.h>
|
|
||||||
#include <exception>
|
|
||||||
#include <map>
|
|
||||||
#include <sstream>
|
|
||||||
#include <string>
|
|
||||||
#include "common.h"
|
|
||||||
#include "entry.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace kiwix
|
|
||||||
{
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The SuggestionItem is a helper class that contains the info about a single
|
|
||||||
* suggestion item.
|
|
||||||
*/
|
|
||||||
class SuggestionItem
|
|
||||||
{
|
|
||||||
// Functions
|
|
||||||
public:
|
|
||||||
// Create a sugggestion item.
|
|
||||||
explicit SuggestionItem(const std::string& title, const std::string& normalizedTitle,
|
|
||||||
const std::string& path, const std::string& snippet = "") :
|
|
||||||
title(title),
|
|
||||||
normalizedTitle(normalizedTitle),
|
|
||||||
path(path),
|
|
||||||
snippet(snippet) {}
|
|
||||||
|
|
||||||
public:
|
|
||||||
const std::string& getTitle() const { return title;}
|
|
||||||
const std::string& getNormalizedTitle() const { return normalizedTitle;}
|
|
||||||
const std::string& getPath() const { return path;}
|
|
||||||
const std::string& getSnippet() const { return snippet;}
|
|
||||||
|
|
||||||
bool hasSnippet() const { return !snippet.empty();}
|
|
||||||
|
|
||||||
// Data
|
|
||||||
private:
|
|
||||||
std::string title;
|
|
||||||
std::string normalizedTitle;
|
|
||||||
std::string path;
|
|
||||||
std::string snippet;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The Reader class is the class who allow to get an entry content from a zim
|
|
||||||
* file.
|
|
||||||
*
|
|
||||||
* Reader is now deprecated. Directly use `zim::Archive`.
|
|
||||||
*/
|
|
||||||
|
|
||||||
using SuggestionsList_t = std::vector<SuggestionItem>;
|
|
||||||
class Reader
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* Create a Reader to read a zim file specified by zimFilePath.
|
|
||||||
*
|
|
||||||
* @param zimFilePath The path to the zim file to read.
|
|
||||||
* The zim file can be splitted (.zimaa, .zimab, ...).
|
|
||||||
* In this case, the file path must still point to the
|
|
||||||
* unsplitted path as if the file were not splitted
|
|
||||||
* (.zim extesion).
|
|
||||||
*/
|
|
||||||
explicit DEPRECATED Reader(const string zimFilePath);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a Reader to read a zim file given by the Archive.
|
|
||||||
*
|
|
||||||
* @param archive The shared pointer to the Archive object.
|
|
||||||
*/
|
|
||||||
explicit DEPRECATED Reader(const std::shared_ptr<zim::Archive> archive)
|
|
||||||
: Reader(archive, true) {};
|
|
||||||
#ifndef _WIN32
|
|
||||||
explicit DEPRECATED Reader(int fd);
|
|
||||||
DEPRECATED Reader(int fd, zim::offset_type offset, zim::size_type size);
|
|
||||||
#endif
|
|
||||||
~Reader() = default;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the number of "displayable" entries in the zim file.
|
|
||||||
*
|
|
||||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
|
||||||
* entries with the 'text/html' MIMEtype specified in the metadata.
|
|
||||||
* Else return the number of entries in the 'A' namespace.
|
|
||||||
*/
|
|
||||||
unsigned int getArticleCount() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the number of media in the zim file.
|
|
||||||
*
|
|
||||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
|
||||||
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
|
|
||||||
* the metadata.
|
|
||||||
* Else return the number of entries in the 'I' namespace.
|
|
||||||
*/
|
|
||||||
unsigned int getMediaCount() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the number of all entries in the zim file.
|
|
||||||
*
|
|
||||||
* @return Return the number of all the entries, whatever their MIMEtype or
|
|
||||||
* their namespace.
|
|
||||||
*/
|
|
||||||
unsigned int getGlobalCount() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the path of the zim file.
|
|
||||||
*
|
|
||||||
* @return the path of the zim file as given in the constructor.
|
|
||||||
*/
|
|
||||||
string getZimFilePath() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the Id of the zim file.
|
|
||||||
*
|
|
||||||
* @return The uuid stored in the zim file.
|
|
||||||
*/
|
|
||||||
string getId() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a random page.
|
|
||||||
*
|
|
||||||
* @return A random Entry. The entry is picked from all entries in
|
|
||||||
* the 'A' namespace.
|
|
||||||
* The main entry is excluded from the potential results.
|
|
||||||
*/
|
|
||||||
Entry getRandomPage() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the entry of the main page.
|
|
||||||
*
|
|
||||||
* @return Entry of the main page as specified in the zim file.
|
|
||||||
*/
|
|
||||||
Entry getMainPage() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the content of a metadata.
|
|
||||||
*
|
|
||||||
* @param[in] name The name of the metadata.
|
|
||||||
* @param[out] value The value will be set to the content of the metadata.
|
|
||||||
* @return True if it was possible to get the content of the metadata.
|
|
||||||
*/
|
|
||||||
bool getMetadata(const string& name, string& value) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the name of the zim file.
|
|
||||||
*
|
|
||||||
* @return The name of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getName() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the title of the zim file.
|
|
||||||
*
|
|
||||||
* @return The title of zim file as specified in the zim metadata.
|
|
||||||
* If no title has been set, return a title computed from the
|
|
||||||
* file path.
|
|
||||||
*/
|
|
||||||
string getTitle() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the creator of the zim file.
|
|
||||||
*
|
|
||||||
* @return The creator of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getCreator() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the publisher of the zim file.
|
|
||||||
*
|
|
||||||
* @return The publisher of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getPublisher() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the date of the zim file.
|
|
||||||
*
|
|
||||||
* @return The date of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getDate() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the description of the zim file.
|
|
||||||
*
|
|
||||||
* @return The description of the zim file as specified in the zim metadata.
|
|
||||||
* If no description has been set, return the subtitle.
|
|
||||||
*/
|
|
||||||
string getDescription() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the long description of the zim file.
|
|
||||||
*
|
|
||||||
* @return The long description of the zim file as specifed in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getLongDescription() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the language of the zim file.
|
|
||||||
*
|
|
||||||
* @return The language of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getLanguage() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the license of the zim file.
|
|
||||||
*
|
|
||||||
* @return The license of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getLicense() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the tags of the zim file.
|
|
||||||
*
|
|
||||||
* @param original If true, return the original tags as specified in the zim metadata.
|
|
||||||
* Else, try to convert it to the new 'normalized' format.
|
|
||||||
* @return The tags of the zim file.
|
|
||||||
*/
|
|
||||||
string getTags(bool original=false) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the value (as a string) of a specific tag.
|
|
||||||
*
|
|
||||||
* According to https://wiki.openzim.org/wiki/Tags
|
|
||||||
*
|
|
||||||
* @return The value of the specified tag.
|
|
||||||
* @throw std::out_of_range if the specified tag is not found.
|
|
||||||
*/
|
|
||||||
string getTagStr(const std::string& tagName) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the boolean value of a specific tag.
|
|
||||||
*
|
|
||||||
* According to https://wiki.openzim.org/wiki/Tags
|
|
||||||
*
|
|
||||||
* @return The boolean value of the specified tag.
|
|
||||||
* @throw std::out_of_range if the specified tag is not found.
|
|
||||||
* std::domain_error if the value of the tag cannot be convert to bool.
|
|
||||||
*/
|
|
||||||
bool getTagBool(const std::string& tagName) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the relations of the zim file.
|
|
||||||
*
|
|
||||||
* @return The relation of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getRelation() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the flavour of the zim file.
|
|
||||||
*
|
|
||||||
* @return The flavour of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getFlavour() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the source of the zim file.
|
|
||||||
*
|
|
||||||
* @return The source of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getSource() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the scraper of the zim file.
|
|
||||||
*
|
|
||||||
* @return The scraper of the zim file as specified in the zim metadata.
|
|
||||||
*/
|
|
||||||
string getScraper() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the favicon of the zim file.
|
|
||||||
*
|
|
||||||
* @param[out] content The content of the favicon.
|
|
||||||
* @param[out] mimeType The mimeType of the favicon.
|
|
||||||
* @return True if a favicon has been found.
|
|
||||||
*/
|
|
||||||
bool getFavicon(string& content, string& mimeType) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get an entry associated to an path.
|
|
||||||
*
|
|
||||||
* @param path The path of the entry.
|
|
||||||
* @return The entry.
|
|
||||||
* @throw NoEntry If no entry correspond to the path.
|
|
||||||
*/
|
|
||||||
Entry getEntryFromPath(const std::string& path) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get an entry associated to an url encoded path.
|
|
||||||
*
|
|
||||||
* Equivalent to `getEntryFromPath(urlDecode(path));`
|
|
||||||
*
|
|
||||||
* @param path The url encoded path.
|
|
||||||
* @return The entry.
|
|
||||||
* @throw NoEntry If no entry correspond to the path.
|
|
||||||
*/
|
|
||||||
Entry getEntryFromEncodedPath(const std::string& path) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get un entry associated to a title.
|
|
||||||
*
|
|
||||||
* @param title The title.
|
|
||||||
* @return The entry
|
|
||||||
* throw NoEntry If no entry correspond to the url.
|
|
||||||
*/
|
|
||||||
Entry getEntryFromTitle(const std::string& title) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search for entries with title starting with prefix (case sensitive).
|
|
||||||
*
|
|
||||||
* Suggestions are stored in an internal vector and can be retrieved using
|
|
||||||
* `getNextSuggestion` method.
|
|
||||||
* This method is not thread safe and is deprecated. Use :
|
|
||||||
* bool searchSuggestions(const string& prefix,
|
|
||||||
* unsigned int suggestionsCount,
|
|
||||||
* SuggestionsList_t& results);
|
|
||||||
*
|
|
||||||
* @param prefix The prefix to search.
|
|
||||||
* @param suggestionsCount How many suggestions to search for.
|
|
||||||
* @param reset If true, remove previous suggestions in the internal vector.
|
|
||||||
* If false, add suggestions to the internal vector
|
|
||||||
* (until internal vector size is suggestionCount (or no more
|
|
||||||
* suggestion))
|
|
||||||
* @return True if some suggestions have been added to the internal vector.
|
|
||||||
*/
|
|
||||||
DEPRECATED bool searchSuggestions(const string& prefix,
|
|
||||||
unsigned int suggestionsCount,
|
|
||||||
const bool reset = true);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search for entries with title starting with prefix (case sensitive).
|
|
||||||
*
|
|
||||||
* Suggestions are added to the `result` vector.
|
|
||||||
*
|
|
||||||
* @param prefix The prefix to search.
|
|
||||||
* @param suggestionsCount How many suggestions to search for.
|
|
||||||
* @param result The vector where to store the suggestions.
|
|
||||||
* @return True if some suggestions have been added to the vector.
|
|
||||||
*/
|
|
||||||
|
|
||||||
bool searchSuggestions(const string& prefix,
|
|
||||||
unsigned int suggestionsCount,
|
|
||||||
SuggestionsList_t& resuls);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search for entries for the given prefix.
|
|
||||||
*
|
|
||||||
* If the zim file has a internal fulltext index, the suggestions will be
|
|
||||||
* searched using it.
|
|
||||||
* Else the suggestions will be search using `searchSuggestions` while trying
|
|
||||||
* to be smart about case sensitivity (using `getTitleVariants`).
|
|
||||||
*
|
|
||||||
* In any case, suggestions are stored in an internal vector and can be
|
|
||||||
* retrieved using `getNextSuggestion` method.
|
|
||||||
* The internal vector will be reset.
|
|
||||||
* This method is not thread safe and is deprecated. Use :
|
|
||||||
* bool searchSuggestionsSmart(const string& prefix,
|
|
||||||
* unsigned int suggestionsCount,
|
|
||||||
* SuggestionsList_t& results);
|
|
||||||
*
|
|
||||||
* @param prefix The prefix to search for.
|
|
||||||
* @param suggestionsCount How many suggestions to search for.
|
|
||||||
*/
|
|
||||||
DEPRECATED bool searchSuggestionsSmart(const string& prefix,
|
|
||||||
unsigned int suggestionsCount);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Search for entries for the given prefix.
|
|
||||||
*
|
|
||||||
* If the zim file has a internal fulltext index, the suggestions will be
|
|
||||||
* searched using it.
|
|
||||||
* Else the suggestions will be search using `searchSuggestions` while trying
|
|
||||||
* to be smart about case sensitivity (using `getTitleVariants`).
|
|
||||||
*
|
|
||||||
* In any case, suggestions are stored in an internal vector and can be
|
|
||||||
* retrieved using `getNextSuggestion` method.
|
|
||||||
* The internal vector will be reset.
|
|
||||||
*
|
|
||||||
* @param prefix The prefix to search for.
|
|
||||||
* @param suggestionsCount How many suggestions to search for.
|
|
||||||
* @param results The vector where to store the suggestions
|
|
||||||
* @return True if some suggestions have been added to the results.
|
|
||||||
*/
|
|
||||||
bool searchSuggestionsSmart(const string& prefix,
|
|
||||||
unsigned int suggestionsCount,
|
|
||||||
SuggestionsList_t& results);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the path exists in the zim file.
|
|
||||||
*
|
|
||||||
* @param path the path to check.
|
|
||||||
* @return True if the path exists in the zim file.
|
|
||||||
*/
|
|
||||||
bool pathExists(const string& path) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the zim file has a embedded fulltext index.
|
|
||||||
*
|
|
||||||
* @return True if the zim file has a embedded fulltext index
|
|
||||||
* and is not split (else the fulltext is not accessible).
|
|
||||||
*/
|
|
||||||
bool hasFulltextIndex() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get potential case title variations for a title.
|
|
||||||
*
|
|
||||||
* @param title a title.
|
|
||||||
* @return the list of variantions.
|
|
||||||
*/
|
|
||||||
std::vector<std::string> getTitleVariants(const std::string& title) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the next suggestion title.
|
|
||||||
*
|
|
||||||
* @param[out] title the title of the suggestion.
|
|
||||||
* @return True if title has been set.
|
|
||||||
*/
|
|
||||||
DEPRECATED bool getNextSuggestion(string& title);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the next suggestion title and url.
|
|
||||||
*
|
|
||||||
* @param[out] title the title of the suggestion.
|
|
||||||
* @param[out] url the url of the suggestion.
|
|
||||||
* @return True if title and url have been set.
|
|
||||||
*/
|
|
||||||
DEPRECATED bool getNextSuggestion(string& title, string& url);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get if we can check zim file integrity (has a checksum).
|
|
||||||
*
|
|
||||||
* @return True if zim file have a checksum.
|
|
||||||
*/
|
|
||||||
bool canCheckIntegrity() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check is zim file is corrupted.
|
|
||||||
*
|
|
||||||
* @return True if zim file is corrupted.
|
|
||||||
*/
|
|
||||||
bool isCorrupted() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the total size of the zim file.
|
|
||||||
*
|
|
||||||
* If zim file is split, return the sum of all parts' size.
|
|
||||||
*
|
|
||||||
* @return Size of the size file is KiB.
|
|
||||||
*/
|
|
||||||
unsigned int getFileSize() const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the zim file handler.
|
|
||||||
*
|
|
||||||
* @return The libzim file handler.
|
|
||||||
*/
|
|
||||||
zim::Archive* getZimArchive() const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
std::shared_ptr<zim::Archive> zimArchive;
|
|
||||||
std::string zimFilePath;
|
|
||||||
|
|
||||||
SuggestionsList_t suggestions;
|
|
||||||
SuggestionsList_t::iterator suggestionsOffset;
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::map<const std::string, unsigned int> parseCounterMetadata() const;
|
|
||||||
|
|
||||||
// Reader is deprecated, so we've marked the constructor as deprecated.
|
|
||||||
// But we still need to construct the reader (in our deprecated code)
|
|
||||||
// To avoid warning because we use deprecated function, we create a
|
|
||||||
// constructor not deprecated. The `bool marker` is unused, it sole purpose
|
|
||||||
// is to change the signature to have a different constructor.
|
|
||||||
// This one is not deprecated and we must use it in our private code.
|
|
||||||
Reader(const std::shared_ptr<zim::Archive> archive, bool marker);
|
|
||||||
friend class Library;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -27,7 +27,6 @@
|
||||||
namespace kiwix
|
namespace kiwix
|
||||||
{
|
{
|
||||||
|
|
||||||
class Searcher;
|
|
||||||
class NameMapper;
|
class NameMapper;
|
||||||
/**
|
/**
|
||||||
* The SearcherRenderer class is used to render a search result to a html page.
|
* The SearcherRenderer class is used to render a search result to a html page.
|
||||||
|
@ -35,17 +34,6 @@ class NameMapper;
|
||||||
class SearchRenderer
|
class SearchRenderer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/**
|
|
||||||
* Construct a SearchRenderer from a Searcher.
|
|
||||||
*
|
|
||||||
* This method is now deprecated. Construct the renderer from a
|
|
||||||
* `zim::SearchResultSet`
|
|
||||||
*
|
|
||||||
* @param searcher The `Searcher` to render.
|
|
||||||
* @param mapper The `NameMapper` to use to do the rendering.
|
|
||||||
*/
|
|
||||||
DEPRECATED SearchRenderer(Searcher* searcher, NameMapper* mapper);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a SearchRenderer from a SearchResultSet.
|
* Construct a SearchRenderer from a SearchResultSet.
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,180 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
* MA 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef KIWIX_SEARCHER_H
|
|
||||||
#define KIWIX_SEARCHER_H
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unicode/putil.h>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cctype>
|
|
||||||
#include <locale>
|
|
||||||
#include <string>
|
|
||||||
#include <memory>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include <zim/search.h>
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace kiwix
|
|
||||||
{
|
|
||||||
class Reader;
|
|
||||||
class Result
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual ~Result(){};
|
|
||||||
virtual std::string get_url() = 0;
|
|
||||||
virtual std::string get_title() = 0;
|
|
||||||
virtual int get_score() = 0;
|
|
||||||
virtual std::string get_snippet() = 0;
|
|
||||||
virtual std::string get_content() = 0;
|
|
||||||
virtual int get_wordCount() = 0;
|
|
||||||
virtual int get_size() = 0;
|
|
||||||
virtual std::string get_zimId() = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SearcherInternal;
|
|
||||||
struct SuggestionInternal;
|
|
||||||
/**
|
|
||||||
* The Searcher class is reponsible to do different kind of search using the
|
|
||||||
* fulltext index.
|
|
||||||
*
|
|
||||||
* The Searcher is now deprecated. Use libzim search feature.
|
|
||||||
*/
|
|
||||||
class Searcher
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
/**
|
|
||||||
* The default constructor.
|
|
||||||
*/
|
|
||||||
DEPRECATED Searcher();
|
|
||||||
|
|
||||||
~Searcher();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a reader (containing embedded fulltext index) to the search.
|
|
||||||
*
|
|
||||||
* @param reader The Reader for the zim containing the fulltext index.
|
|
||||||
* @return true if the reader has been added.
|
|
||||||
* false if the reader cannot be added (no embedded fulltext index present)
|
|
||||||
*/
|
|
||||||
bool add_reader(std::shared_ptr<Reader> reader);
|
|
||||||
|
|
||||||
|
|
||||||
std::shared_ptr<Reader> get_reader(int index);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start a search on the zim associated to the Searcher.
|
|
||||||
*
|
|
||||||
* Search results should be retrived using the getNextResult method.
|
|
||||||
*
|
|
||||||
* @param search The search query.
|
|
||||||
* @param resultStart the start offset of the search results (used for pagination).
|
|
||||||
* @param maxResultCount Maximum results to get from start (used for pagination).
|
|
||||||
* @param verbose print some info on stdout if true.
|
|
||||||
*/
|
|
||||||
void search(const std::string& search,
|
|
||||||
unsigned int resultStart,
|
|
||||||
unsigned int maxResultCount,
|
|
||||||
const bool verbose = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start a geographique search.
|
|
||||||
* The search return result for entry in a disc of center latitude/longitude
|
|
||||||
* and radius distance.
|
|
||||||
*
|
|
||||||
* Search results should be retrived using the getNextResult method.
|
|
||||||
*
|
|
||||||
* @param latitude The latitude of the center point.
|
|
||||||
* @param longitude The longitude of the center point.
|
|
||||||
* @param distance The radius of the disc.
|
|
||||||
* @param resultStart the start offset of the search results (used for pagination).
|
|
||||||
* @param maxResultCount Maximum number of results to get from start (used for pagination).
|
|
||||||
* @param verbose print some info on stdout if true.
|
|
||||||
*/
|
|
||||||
void geo_search(float latitude, float longitude, float distance,
|
|
||||||
unsigned int resultStart,
|
|
||||||
unsigned int maxResultCount,
|
|
||||||
const bool verbose = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Start a suggestion search.
|
|
||||||
* The search made depend of the "version" of the embedded index.
|
|
||||||
* - If the index is newer enough and have a title namespace, the search is
|
|
||||||
* made in the titles only.
|
|
||||||
* - Else the search is made on the whole article content.
|
|
||||||
* In any case, the search is made "partial" (as adding '*' at the end of the query)
|
|
||||||
*
|
|
||||||
* @param search The search query.
|
|
||||||
* @param verbose print some info on stdout if true.
|
|
||||||
*/
|
|
||||||
void suggestions(std::string& search, const bool verbose = false);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the next result of a started search.
|
|
||||||
* This is the method to use to loop hover the search results.
|
|
||||||
*/
|
|
||||||
Result* getNextResult();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Restart the previous search.
|
|
||||||
* Next call to getNextResult will return the first result.
|
|
||||||
*/
|
|
||||||
void restart_search();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a estimation of the result count.
|
|
||||||
*/
|
|
||||||
unsigned int getEstimatedResultCount();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a SearchResultSet object for current search
|
|
||||||
*/
|
|
||||||
zim::SearchResultSet getSearchResultSet();
|
|
||||||
|
|
||||||
unsigned int getResultStart() { return resultStart; }
|
|
||||||
unsigned int getMaxResultCount() { return maxResultCount; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
std::string beautifyInteger(const unsigned int number);
|
|
||||||
void closeIndex();
|
|
||||||
void searchInIndex(string& search,
|
|
||||||
const unsigned int resultStart,
|
|
||||||
const unsigned int maxResultCount,
|
|
||||||
const bool verbose = false);
|
|
||||||
|
|
||||||
std::vector<std::shared_ptr<Reader>> readers;
|
|
||||||
std::unique_ptr<SearcherInternal> internal;
|
|
||||||
std::unique_ptr<SuggestionInternal> suggestionInternal;
|
|
||||||
std::string searchPattern;
|
|
||||||
unsigned int estimatedResultCount;
|
|
||||||
unsigned int resultStart;
|
|
||||||
unsigned int maxResultCount;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void reset();
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -18,7 +18,6 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "book.h"
|
#include "book.h"
|
||||||
#include "reader.h"
|
|
||||||
|
|
||||||
#include "tools.h"
|
#include "tools.h"
|
||||||
#include "tools/base64.h"
|
#include "tools/base64.h"
|
||||||
|
@ -30,7 +29,7 @@
|
||||||
#include "tools/archiveTools.h"
|
#include "tools/archiveTools.h"
|
||||||
|
|
||||||
#include <zim/archive.h>
|
#include <zim/archive.h>
|
||||||
|
#include <zim/item.h>
|
||||||
#include <pugixml.hpp>
|
#include <pugixml.hpp>
|
||||||
|
|
||||||
namespace kiwix
|
namespace kiwix
|
||||||
|
@ -64,11 +63,6 @@ bool Book::update(const kiwix::Book& other)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Book::update(const kiwix::Reader& reader)
|
|
||||||
{
|
|
||||||
update(*reader.getZimArchive());
|
|
||||||
}
|
|
||||||
|
|
||||||
void Book::update(const zim::Archive& archive) {
|
void Book::update(const zim::Archive& archive) {
|
||||||
m_path = archive.getFilename();
|
m_path = archive.getFilename();
|
||||||
m_pathValid = true;
|
m_pathValid = true;
|
||||||
|
|
|
@ -1,73 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
* MA 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "reader.h"
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
namespace kiwix
|
|
||||||
{
|
|
||||||
|
|
||||||
Entry::Entry(zim::Entry entry, bool _marker)
|
|
||||||
: entry(entry)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_type Entry::getSize() const
|
|
||||||
{
|
|
||||||
if (entry.isRedirect()) {
|
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
return entry.getItem().getSize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string Entry::getMimetype() const
|
|
||||||
{
|
|
||||||
return entry.getItem(true).getMimetype();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Entry::isRedirect() const
|
|
||||||
{
|
|
||||||
return entry.isRedirect();
|
|
||||||
}
|
|
||||||
|
|
||||||
Entry Entry::getRedirectEntry() const
|
|
||||||
{
|
|
||||||
if ( !entry.isRedirect() ) {
|
|
||||||
throw NoEntry();
|
|
||||||
}
|
|
||||||
|
|
||||||
return Entry(entry.getRedirectEntry(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
Entry Entry::getFinalEntry() const
|
|
||||||
{
|
|
||||||
int loopCounter = 42;
|
|
||||||
auto final_entry = entry;
|
|
||||||
while (final_entry.isRedirect() && loopCounter--) {
|
|
||||||
final_entry = final_entry.getRedirectEntry();
|
|
||||||
}
|
|
||||||
// Prevent infinite loops.
|
|
||||||
if (final_entry.isRedirect()) {
|
|
||||||
throw NoEntry();
|
|
||||||
}
|
|
||||||
return Entry(final_entry, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "book.h"
|
#include "book.h"
|
||||||
#include "reader.h"
|
|
||||||
#include "libxml_dumper.h"
|
#include "libxml_dumper.h"
|
||||||
|
|
||||||
#include "tools.h"
|
#include "tools.h"
|
||||||
|
@ -278,16 +277,6 @@ const Book& Library::getBookByPath(const std::string& path) const
|
||||||
throw std::out_of_range(ss.str());
|
throw std::out_of_range(ss.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Reader> Library::getReaderById(const std::string& id)
|
|
||||||
{
|
|
||||||
auto archive = getArchiveById(id);
|
|
||||||
if(archive) {
|
|
||||||
return std::shared_ptr<Reader>(new Reader(archive, true));
|
|
||||||
} else {
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
|
std::shared_ptr<zim::Archive> Library::getArchiveById(const std::string& id)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -6,10 +6,7 @@ kiwix_sources = [
|
||||||
'libxml_dumper.cpp',
|
'libxml_dumper.cpp',
|
||||||
'opds_dumper.cpp',
|
'opds_dumper.cpp',
|
||||||
'downloader.cpp',
|
'downloader.cpp',
|
||||||
'reader.cpp',
|
|
||||||
'entry.cpp',
|
|
||||||
'server.cpp',
|
'server.cpp',
|
||||||
'searcher.cpp',
|
|
||||||
'search_renderer.cpp',
|
'search_renderer.cpp',
|
||||||
'subprocess.cpp',
|
'subprocess.cpp',
|
||||||
'aria2.cpp',
|
'aria2.cpp',
|
||||||
|
|
472
src/reader.cpp
472
src/reader.cpp
|
@ -1,472 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
* MA 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "reader.h"
|
|
||||||
#include <time.h>
|
|
||||||
|
|
||||||
#include <zim/search.h>
|
|
||||||
#include <zim/suggestion.h>
|
|
||||||
#include <zim/item.h>
|
|
||||||
#include <zim/error.h>
|
|
||||||
|
|
||||||
#include "tools.h"
|
|
||||||
#include "tools/stringTools.h"
|
|
||||||
#include "tools/otherTools.h"
|
|
||||||
#include "tools/archiveTools.h"
|
|
||||||
|
|
||||||
namespace kiwix
|
|
||||||
{
|
|
||||||
/* Constructor */
|
|
||||||
Reader::Reader(const string zimFilePath)
|
|
||||||
: zimArchive(nullptr),
|
|
||||||
zimFilePath(zimFilePath)
|
|
||||||
{
|
|
||||||
string tmpZimFilePath = zimFilePath;
|
|
||||||
|
|
||||||
/* Remove potential trailing zimaa */
|
|
||||||
size_t found = tmpZimFilePath.rfind("zimaa");
|
|
||||||
if (found != string::npos && tmpZimFilePath.size() > 5
|
|
||||||
&& found == tmpZimFilePath.size() - 5) {
|
|
||||||
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
zimArchive.reset(new zim::Archive(tmpZimFilePath));
|
|
||||||
|
|
||||||
/* initialize random seed: */
|
|
||||||
srand(time(nullptr));
|
|
||||||
}
|
|
||||||
|
|
||||||
Reader::Reader(const std::shared_ptr<zim::Archive> archive, bool _marker)
|
|
||||||
: zimArchive(archive),
|
|
||||||
zimFilePath(archive->getFilename())
|
|
||||||
{}
|
|
||||||
|
|
||||||
#ifndef _WIN32
|
|
||||||
Reader::Reader(int fd)
|
|
||||||
: zimArchive(new zim::Archive(fd)),
|
|
||||||
zimFilePath("")
|
|
||||||
{
|
|
||||||
/* initialize random seed: */
|
|
||||||
srand(time(nullptr));
|
|
||||||
}
|
|
||||||
|
|
||||||
Reader::Reader(int fd, zim::offset_type offset, zim::size_type size)
|
|
||||||
: zimArchive(new zim::Archive(fd, offset, size)),
|
|
||||||
zimFilePath("")
|
|
||||||
{
|
|
||||||
/* initialize random seed: */
|
|
||||||
srand(time(nullptr));
|
|
||||||
}
|
|
||||||
#endif // #ifndef _WIN32
|
|
||||||
|
|
||||||
zim::Archive* Reader::getZimArchive() const
|
|
||||||
{
|
|
||||||
return zimArchive.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
MimeCounterType Reader::parseCounterMetadata() const
|
|
||||||
{
|
|
||||||
return kiwix::parseArchiveCounter(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get the count of articles which can be indexed/displayed */
|
|
||||||
unsigned int Reader::getArticleCount() const
|
|
||||||
{
|
|
||||||
std::map<const std::string, unsigned int> counterMap
|
|
||||||
= this->parseCounterMetadata();
|
|
||||||
unsigned int counter = 0;
|
|
||||||
|
|
||||||
for(auto &pair:counterMap) {
|
|
||||||
if (startsWith(pair.first, "text/html")) {
|
|
||||||
counter += pair.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return counter;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get the count of medias content in the ZIM file */
|
|
||||||
unsigned int Reader::getMediaCount() const
|
|
||||||
{
|
|
||||||
return kiwix::getArchiveMediaCount(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get the total of all items of a ZIM file, redirects included */
|
|
||||||
unsigned int Reader::getGlobalCount() const
|
|
||||||
{
|
|
||||||
return zimArchive->getEntryCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return the UID of the ZIM file */
|
|
||||||
string Reader::getId() const
|
|
||||||
{
|
|
||||||
return kiwix::getArchiveId(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
Entry Reader::getRandomPage() const
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
return Entry(zimArchive->getRandomEntry(), true);
|
|
||||||
} catch(...) {
|
|
||||||
throw NoEntry();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Entry Reader::getMainPage() const
|
|
||||||
{
|
|
||||||
return Entry(zimArchive->getMainEntry(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Reader::getFavicon(string& content, string& mimeType) const
|
|
||||||
{
|
|
||||||
return kiwix::getArchiveFavicon(*zimArchive, 48, content, mimeType);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getZimFilePath() const
|
|
||||||
{
|
|
||||||
return zimFilePath;
|
|
||||||
}
|
|
||||||
/* Return a metatag value */
|
|
||||||
bool Reader::getMetadata(const string& name, string& value) const
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
value = zimArchive->getMetadata(name);
|
|
||||||
return true;
|
|
||||||
} catch(zim::EntryNotFound& e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define METADATA(NAME) std::string v; getMetadata(NAME, v); return v;
|
|
||||||
|
|
||||||
string Reader::getName() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaName(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getTitle() const
|
|
||||||
{
|
|
||||||
return kiwix::getArchiveTitle(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getCreator() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaCreator(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getPublisher() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaPublisher(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getDate() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaDate(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getDescription() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaDescription(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getLongDescription() const
|
|
||||||
{
|
|
||||||
METADATA("LongDescription")
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getLanguage() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaLanguage(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getLicense() const
|
|
||||||
{
|
|
||||||
METADATA("License")
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getTags(bool original) const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaTags(*zimArchive, original);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
string Reader::getTagStr(const std::string& tagName) const
|
|
||||||
{
|
|
||||||
string tags_str;
|
|
||||||
getMetadata("Tags", tags_str);
|
|
||||||
return getTagValueFromTagList(convertTags(tags_str), tagName);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Reader::getTagBool(const std::string& tagName) const
|
|
||||||
{
|
|
||||||
return convertStrToBool(getTagStr(tagName));
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getRelation() const
|
|
||||||
{
|
|
||||||
METADATA("Relation")
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getFlavour() const
|
|
||||||
{
|
|
||||||
return kiwix::getMetaFlavour(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getSource() const
|
|
||||||
{
|
|
||||||
METADATA("Source")
|
|
||||||
}
|
|
||||||
|
|
||||||
string Reader::getScraper() const
|
|
||||||
{
|
|
||||||
METADATA("Scraper")
|
|
||||||
}
|
|
||||||
#undef METADATA
|
|
||||||
|
|
||||||
Entry Reader::getEntryFromPath(const std::string& path) const
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
return Entry(kiwix::getEntryFromPath(*zimArchive, path), true);
|
|
||||||
} catch (zim::EntryNotFound& e) {
|
|
||||||
throw NoEntry();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
|
|
||||||
{
|
|
||||||
return getEntryFromPath(urlDecode(path, true));
|
|
||||||
}
|
|
||||||
|
|
||||||
Entry Reader::getEntryFromTitle(const std::string& title) const
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
return Entry(zimArchive->getEntryByTitle(title), true);
|
|
||||||
} catch(zim::EntryNotFound& e) {
|
|
||||||
throw NoEntry();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Reader::pathExists(const string& path) const
|
|
||||||
{
|
|
||||||
return zimArchive->hasEntryByPath(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Does the ZIM file has a fulltext index */
|
|
||||||
bool Reader::hasFulltextIndex() const
|
|
||||||
{
|
|
||||||
return zimArchive->hasFulltextIndex();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Search titles by prefix */
|
|
||||||
|
|
||||||
bool Reader::searchSuggestions(const string& prefix,
|
|
||||||
unsigned int suggestionsCount,
|
|
||||||
const bool reset)
|
|
||||||
{
|
|
||||||
/* Reset the suggestions otherwise check if the suggestions number is less
|
|
||||||
* than the suggestionsCount */
|
|
||||||
if (reset) {
|
|
||||||
this->suggestions.clear();
|
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
} else {
|
|
||||||
if (this->suggestions.size() > suggestionsCount) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ret = searchSuggestions(prefix, suggestionsCount, this->suggestions);
|
|
||||||
|
|
||||||
/* Set the cursor to the begining */
|
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool Reader::searchSuggestions(const string& prefix,
|
|
||||||
unsigned int suggestionsCount,
|
|
||||||
SuggestionsList_t& results)
|
|
||||||
{
|
|
||||||
bool retVal = false;
|
|
||||||
|
|
||||||
/* Return if no prefix */
|
|
||||||
if (prefix.size() == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto& entry: zimArchive->findByTitle(prefix)) {
|
|
||||||
if (results.size() >= suggestionsCount) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Extract the interesting part of article title & url */
|
|
||||||
std::string normalizedArticleTitle
|
|
||||||
= kiwix::normalize(entry.getTitle());
|
|
||||||
|
|
||||||
// Get the final path.
|
|
||||||
auto item = entry.getItem(true);
|
|
||||||
std::string articleFinalUrl = item.getPath();
|
|
||||||
|
|
||||||
/* Go through all already found suggestions and skip if this
|
|
||||||
article is already in the suggestions list (with an other
|
|
||||||
title) */
|
|
||||||
bool insert = true;
|
|
||||||
std::vector<SuggestionItem>::iterator suggestionItr;
|
|
||||||
for (suggestionItr = results.begin();
|
|
||||||
suggestionItr != results.end();
|
|
||||||
suggestionItr++) {
|
|
||||||
int result = normalizedArticleTitle.compare((*suggestionItr).getNormalizedTitle());
|
|
||||||
if (result == 0 && articleFinalUrl.compare((*suggestionItr).getPath()) == 0) {
|
|
||||||
insert = false;
|
|
||||||
break;
|
|
||||||
} else if (result < 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Insert if possible */
|
|
||||||
if (insert) {
|
|
||||||
SuggestionItem suggestion(entry.getTitle(), normalizedArticleTitle, articleFinalUrl);
|
|
||||||
results.insert(suggestionItr, suggestion);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Suggestions where found */
|
|
||||||
retVal = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return retVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> Reader::getTitleVariants(
|
|
||||||
const std::string& title) const
|
|
||||||
{
|
|
||||||
return kiwix::getTitleVariants(title);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool Reader::searchSuggestionsSmart(const string& prefix,
|
|
||||||
unsigned int suggestionsCount)
|
|
||||||
{
|
|
||||||
this->suggestions.clear();
|
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
|
|
||||||
auto ret = searchSuggestionsSmart(prefix, suggestionsCount, this->suggestions);
|
|
||||||
|
|
||||||
this->suggestionsOffset = this->suggestions.begin();
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try also a few variations of the prefix to have better results */
|
|
||||||
bool Reader::searchSuggestionsSmart(const string& prefix,
|
|
||||||
unsigned int suggestionsCount,
|
|
||||||
SuggestionsList_t& results)
|
|
||||||
{
|
|
||||||
std::vector<std::string> variants = this->getTitleVariants(prefix);
|
|
||||||
|
|
||||||
auto suggestionSearcher = zim::SuggestionSearcher(*zimArchive);
|
|
||||||
|
|
||||||
if (zimArchive->hasTitleIndex()) {
|
|
||||||
auto suggestionSearch = suggestionSearcher.suggest(prefix);
|
|
||||||
const auto suggestions = suggestionSearch.getResults(0, suggestionsCount);
|
|
||||||
for (auto current : suggestions) {
|
|
||||||
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
|
|
||||||
current.getPath(), current.getSnippet());
|
|
||||||
results.push_back(suggestion);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Check some of the variants of the prefix
|
|
||||||
for (std::vector<std::string>::iterator variantsItr = variants.begin();
|
|
||||||
variantsItr != variants.end();
|
|
||||||
variantsItr++) {
|
|
||||||
auto suggestionSearch = suggestionSearcher.suggest(*variantsItr);
|
|
||||||
for (auto current : suggestionSearch.getResults(0, suggestionsCount)) {
|
|
||||||
if (results.size() >= suggestionsCount) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
SuggestionItem suggestion(current.getTitle(), kiwix::normalize(current.getTitle()),
|
|
||||||
current.getPath(), current.getSnippet());
|
|
||||||
results.push_back(suggestion);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return results.size() > 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Get next suggestion */
|
|
||||||
bool Reader::getNextSuggestion(string& title)
|
|
||||||
{
|
|
||||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
|
||||||
/* title */
|
|
||||||
title = (*(this->suggestionsOffset)).getTitle();
|
|
||||||
|
|
||||||
/* increment the cursor for the next call */
|
|
||||||
this->suggestionsOffset++;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Reader::getNextSuggestion(string& title, string& url)
|
|
||||||
{
|
|
||||||
if (this->suggestionsOffset != this->suggestions.end()) {
|
|
||||||
/* title */
|
|
||||||
title = (*(this->suggestionsOffset)).getTitle();
|
|
||||||
url = (*(this->suggestionsOffset)).getPath();
|
|
||||||
|
|
||||||
/* increment the cursor for the next call */
|
|
||||||
this->suggestionsOffset++;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check if the file has as checksum */
|
|
||||||
bool Reader::canCheckIntegrity() const
|
|
||||||
{
|
|
||||||
return zimArchive->hasChecksum();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return true if corrupted, false otherwise */
|
|
||||||
bool Reader::isCorrupted() const
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
if (zimArchive->check() == true) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} catch (exception& e) {
|
|
||||||
cerr << e.what() << endl;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return the file size, works also for splitted files */
|
|
||||||
unsigned int Reader::getFileSize() const
|
|
||||||
{
|
|
||||||
return kiwix::getArchiveFileSize(*zimArchive);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -21,8 +21,6 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "search_renderer.h"
|
#include "search_renderer.h"
|
||||||
#include "searcher.h"
|
|
||||||
#include "reader.h"
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "name_mapper.h"
|
#include "name_mapper.h"
|
||||||
|
|
||||||
|
@ -38,16 +36,6 @@ namespace kiwix
|
||||||
{
|
{
|
||||||
|
|
||||||
/* Constructor */
|
/* Constructor */
|
||||||
SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper)
|
|
||||||
: SearchRenderer(
|
|
||||||
/* srs */ searcher->getSearchResultSet(),
|
|
||||||
/* mapper */ mapper,
|
|
||||||
/* library */ nullptr,
|
|
||||||
/* start */ searcher->getResultStart(),
|
|
||||||
/* estimatedResultCount */ searcher->getEstimatedResultCount()
|
|
||||||
)
|
|
||||||
{}
|
|
||||||
|
|
||||||
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
|
SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper,
|
||||||
unsigned int start, unsigned int estimatedResultCount)
|
unsigned int start, unsigned int estimatedResultCount)
|
||||||
: SearchRenderer(srs, mapper, nullptr, start, estimatedResultCount)
|
: SearchRenderer(srs, mapper, nullptr, start, estimatedResultCount)
|
||||||
|
|
330
src/searcher.cpp
330
src/searcher.cpp
|
@ -1,330 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
|
||||||
*
|
|
||||||
* This program is free software; you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation; either version 3 of the License, or
|
|
||||||
* any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program; if not, write to the Free Software
|
|
||||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
||||||
* MA 02110-1301, USA.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "searcher.h"
|
|
||||||
#include "reader.h"
|
|
||||||
|
|
||||||
#include <zim/search.h>
|
|
||||||
#include <zim/suggestion.h>
|
|
||||||
|
|
||||||
#include <mustache.hpp>
|
|
||||||
#include <cmath>
|
|
||||||
#include "tools/stringTools.h"
|
|
||||||
#include "kiwixlib-resources.h"
|
|
||||||
|
|
||||||
#define MAX_SEARCH_LEN 140
|
|
||||||
|
|
||||||
namespace kiwix
|
|
||||||
{
|
|
||||||
class _Result : public Result
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
_Result(zim::SearchResultSet::iterator iterator);
|
|
||||||
_Result(SuggestionItem suggestionItem);
|
|
||||||
virtual ~_Result(){};
|
|
||||||
|
|
||||||
virtual std::string get_url();
|
|
||||||
virtual std::string get_title();
|
|
||||||
virtual int get_score();
|
|
||||||
virtual std::string get_snippet();
|
|
||||||
virtual std::string get_content();
|
|
||||||
virtual int get_wordCount();
|
|
||||||
virtual int get_size();
|
|
||||||
virtual std::string get_zimId();
|
|
||||||
|
|
||||||
private:
|
|
||||||
zim::SearchResultSet::iterator iterator;
|
|
||||||
SuggestionItem suggestionItem;
|
|
||||||
bool isSuggestion;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SearcherInternal : zim::SearchResultSet {
|
|
||||||
explicit SearcherInternal(const zim::SearchResultSet& srs)
|
|
||||||
: zim::SearchResultSet(srs)
|
|
||||||
, current_iterator(srs.begin())
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
zim::SearchResultSet::iterator current_iterator;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SuggestionInternal : zim::SuggestionResultSet {
|
|
||||||
explicit SuggestionInternal(const zim::SuggestionResultSet& srs)
|
|
||||||
: zim::SuggestionResultSet(srs),
|
|
||||||
currentIterator(srs.begin()) {}
|
|
||||||
|
|
||||||
zim::SuggestionResultSet::iterator currentIterator;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Constructor */
|
|
||||||
Searcher::Searcher()
|
|
||||||
: searchPattern(""),
|
|
||||||
estimatedResultCount(0),
|
|
||||||
resultStart(0),
|
|
||||||
maxResultCount(0)
|
|
||||||
{
|
|
||||||
loadICUExternalTables();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Destructor */
|
|
||||||
Searcher::~Searcher()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Searcher::add_reader(std::shared_ptr<Reader> reader)
|
|
||||||
{
|
|
||||||
if (!reader->hasFulltextIndex()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( auto existing_reader : readers ) {
|
|
||||||
if ( existing_reader->getZimArchive()->getUuid() == reader->getZimArchive()->getUuid() )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
this->readers.push_back(reader);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::shared_ptr<Reader> Searcher::get_reader(int readerIndex)
|
|
||||||
{
|
|
||||||
return readers.at(readerIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Search strings in the database */
|
|
||||||
void Searcher::search(const std::string& search,
|
|
||||||
unsigned int resultStart,
|
|
||||||
unsigned int maxResultCount,
|
|
||||||
const bool verbose)
|
|
||||||
{
|
|
||||||
this->reset();
|
|
||||||
|
|
||||||
if (verbose == true) {
|
|
||||||
cout << "Performing query `" << search << "'" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
this->searchPattern = search;
|
|
||||||
this->resultStart = resultStart;
|
|
||||||
this->maxResultCount = maxResultCount;
|
|
||||||
/* Try to find results */
|
|
||||||
if (maxResultCount != 0) {
|
|
||||||
/* Perform the search */
|
|
||||||
string unaccentedSearch = removeAccents(search);
|
|
||||||
std::vector<zim::Archive> archives;
|
|
||||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
|
||||||
current++) {
|
|
||||||
if ( (*current)->hasFulltextIndex() ) {
|
|
||||||
archives.push_back(*(*current)->getZimArchive());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
zim::Searcher searcher(archives);
|
|
||||||
searcher.setVerbose(verbose);
|
|
||||||
zim::Query query;
|
|
||||||
query.setQuery(unaccentedSearch);
|
|
||||||
zim::Search search = searcher.search(query);
|
|
||||||
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
|
||||||
this->estimatedResultCount = search.getEstimatedMatches();
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void Searcher::geo_search(float latitude, float longitude, float distance,
|
|
||||||
unsigned int resultStart,
|
|
||||||
unsigned int maxResultCount,
|
|
||||||
const bool verbose)
|
|
||||||
{
|
|
||||||
this->reset();
|
|
||||||
|
|
||||||
if (verbose == true) {
|
|
||||||
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Perform the search */
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude;
|
|
||||||
this->searchPattern = oss.str();
|
|
||||||
this->resultStart = resultStart;
|
|
||||||
this->maxResultCount = maxResultCount;
|
|
||||||
|
|
||||||
/* Try to find results */
|
|
||||||
if (maxResultCount == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<zim::Archive> archives;
|
|
||||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
|
||||||
current++) {
|
|
||||||
archives.push_back(*(*current)->getZimArchive());
|
|
||||||
}
|
|
||||||
zim::Searcher searcher(archives);
|
|
||||||
searcher.setVerbose(verbose);
|
|
||||||
zim::Query query;
|
|
||||||
query.setQuery("");
|
|
||||||
query.setGeorange(latitude, longitude, distance);
|
|
||||||
zim::Search search = searcher.search(query);
|
|
||||||
internal.reset(new SearcherInternal(search.getResults(resultStart, maxResultCount)));
|
|
||||||
this->estimatedResultCount = search.getEstimatedMatches();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void Searcher::restart_search()
|
|
||||||
{
|
|
||||||
if (internal.get()) {
|
|
||||||
internal->current_iterator = internal->begin();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Result* Searcher::getNextResult()
|
|
||||||
{
|
|
||||||
if (internal.get() && internal->current_iterator != internal->end()) {
|
|
||||||
Result* result = new _Result(internal->current_iterator);
|
|
||||||
internal->current_iterator++;
|
|
||||||
return result;
|
|
||||||
} else if (suggestionInternal.get() &&
|
|
||||||
suggestionInternal->currentIterator != suggestionInternal->end()) {
|
|
||||||
SuggestionItem item(
|
|
||||||
suggestionInternal->currentIterator->getTitle(),
|
|
||||||
normalize(suggestionInternal->currentIterator->getTitle()),
|
|
||||||
suggestionInternal->currentIterator->getPath(),
|
|
||||||
suggestionInternal->currentIterator->getSnippet()
|
|
||||||
);
|
|
||||||
Result* result = new _Result(item);
|
|
||||||
suggestionInternal->currentIterator++;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Reset the results */
|
|
||||||
void Searcher::reset()
|
|
||||||
{
|
|
||||||
this->estimatedResultCount = 0;
|
|
||||||
this->searchPattern = "";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Searcher::suggestions(std::string& searchPattern, const bool verbose)
|
|
||||||
{
|
|
||||||
this->reset();
|
|
||||||
|
|
||||||
if (verbose == true) {
|
|
||||||
cout << "Performing suggestion query `" << searchPattern << "`" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
this->searchPattern = searchPattern;
|
|
||||||
this->resultStart = 0;
|
|
||||||
this->maxResultCount = 10;
|
|
||||||
string unaccentedSearch = removeAccents(searchPattern);
|
|
||||||
|
|
||||||
// Multizim suggestion is not supported as of now! taking only one archive
|
|
||||||
zim::Archive archive = *(*this->readers.begin())->getZimArchive();
|
|
||||||
zim::SuggestionSearcher searcher(archive);
|
|
||||||
searcher.setVerbose(verbose);
|
|
||||||
zim::SuggestionSearch search = searcher.suggest(searchPattern);
|
|
||||||
suggestionInternal.reset(new SuggestionInternal(search.getResults(resultStart, maxResultCount)));
|
|
||||||
this->estimatedResultCount = search.getEstimatedMatches();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return the result count estimation */
|
|
||||||
unsigned int Searcher::getEstimatedResultCount()
|
|
||||||
{
|
|
||||||
return this->estimatedResultCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
zim::SearchResultSet Searcher::getSearchResultSet()
|
|
||||||
{
|
|
||||||
return *(this->internal);
|
|
||||||
}
|
|
||||||
|
|
||||||
_Result::_Result(zim::SearchResultSet::iterator iterator)
|
|
||||||
: iterator(iterator),
|
|
||||||
suggestionItem("", "", ""),
|
|
||||||
isSuggestion(false)
|
|
||||||
{}
|
|
||||||
|
|
||||||
_Result::_Result(SuggestionItem item)
|
|
||||||
: iterator(),
|
|
||||||
suggestionItem(item.getTitle(), item.getNormalizedTitle(), item.getPath(), item.getSnippet()),
|
|
||||||
isSuggestion(true)
|
|
||||||
{}
|
|
||||||
|
|
||||||
std::string _Result::get_url()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return suggestionItem.getPath();
|
|
||||||
}
|
|
||||||
return iterator.getPath();
|
|
||||||
}
|
|
||||||
std::string _Result::get_title()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return suggestionItem.getTitle();
|
|
||||||
}
|
|
||||||
return iterator.getTitle();
|
|
||||||
}
|
|
||||||
int _Result::get_score()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return iterator.getScore();
|
|
||||||
}
|
|
||||||
std::string _Result::get_snippet()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return suggestionItem.getSnippet();
|
|
||||||
}
|
|
||||||
return iterator.getSnippet();
|
|
||||||
}
|
|
||||||
std::string _Result::get_content()
|
|
||||||
{
|
|
||||||
if (isSuggestion) return "";
|
|
||||||
return iterator->getItem(true).getData();
|
|
||||||
}
|
|
||||||
int _Result::get_size()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return iterator.getSize();
|
|
||||||
}
|
|
||||||
int _Result::get_wordCount()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return iterator.getWordCount();
|
|
||||||
}
|
|
||||||
std::string _Result::get_zimId()
|
|
||||||
{
|
|
||||||
if (isSuggestion) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
std::ostringstream s;
|
|
||||||
s << iterator.getZimId();
|
|
||||||
return s.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include <zim/item.h>
|
||||||
#include "server/internalServer.h"
|
#include "server/internalServer.h"
|
||||||
|
|
||||||
namespace kiwix {
|
namespace kiwix {
|
||||||
|
|
|
@ -51,8 +51,6 @@ extern "C" {
|
||||||
#include "tools/networkTools.h"
|
#include "tools/networkTools.h"
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
#include "name_mapper.h"
|
#include "name_mapper.h"
|
||||||
#include "entry.h"
|
|
||||||
#include "searcher.h"
|
|
||||||
#include "search_renderer.h"
|
#include "search_renderer.h"
|
||||||
#include "opds_dumper.h"
|
#include "opds_dumper.h"
|
||||||
#include "i18n.h"
|
#include "i18n.h"
|
||||||
|
@ -61,6 +59,7 @@ extern "C" {
|
||||||
#include <zim/error.h>
|
#include <zim/error.h>
|
||||||
#include <zim/entry.h>
|
#include <zim/entry.h>
|
||||||
#include <zim/item.h>
|
#include <zim/item.h>
|
||||||
|
#include <zim/suggestion.h>
|
||||||
|
|
||||||
#include <mustache.hpp>
|
#include <mustache.hpp>
|
||||||
|
|
||||||
|
@ -618,40 +617,6 @@ std::unique_ptr<Response> InternalServer::build_homepage(const RequestContext& r
|
||||||
* Archive and Zim handlers begin
|
* Archive and Zim handlers begin
|
||||||
**/
|
**/
|
||||||
|
|
||||||
SuggestionsList_t getSuggestions(SuggestionSearcherCache& cache, const zim::Archive* const archive,
|
|
||||||
const std::string& bookId, const std::string& queryString, int start, int suggestionCount)
|
|
||||||
{
|
|
||||||
SuggestionsList_t suggestions;
|
|
||||||
std::shared_ptr<zim::SuggestionSearcher> searcher;
|
|
||||||
searcher = cache.getOrPut(bookId, [=](){ return make_shared<zim::SuggestionSearcher>(*archive); });
|
|
||||||
|
|
||||||
if (archive->hasTitleIndex()) {
|
|
||||||
auto search = searcher->suggest(queryString);
|
|
||||||
auto srs = search.getResults(start, suggestionCount);
|
|
||||||
|
|
||||||
for (auto it : srs) {
|
|
||||||
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
|
|
||||||
it.getPath(), it.getSnippet());
|
|
||||||
suggestions.push_back(suggestion);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// TODO: This case should be handled by libzim
|
|
||||||
std::vector<std::string> variants = getTitleVariants(queryString);
|
|
||||||
int currCount = 0;
|
|
||||||
for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) {
|
|
||||||
auto search = searcher->suggest(queryString);
|
|
||||||
auto srs = search.getResults(0, suggestionCount);
|
|
||||||
for (auto it : srs) {
|
|
||||||
SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()),
|
|
||||||
it.getPath());
|
|
||||||
suggestions.push_back(suggestion);
|
|
||||||
currCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return suggestions;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& request)
|
std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& request)
|
||||||
{
|
{
|
||||||
if (m_verbose.load()) {
|
if (m_verbose.load()) {
|
||||||
|
@ -690,9 +655,13 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
||||||
/* Get the suggestions */
|
/* Get the suggestions */
|
||||||
SuggestionsList_t suggestions = getSuggestions(suggestionSearcherCache, archive.get(),
|
auto searcher = suggestionSearcherCache.getOrPut(bookId,
|
||||||
bookId, queryString, start, count);
|
[=](){ return make_shared<zim::SuggestionSearcher>(*archive); }
|
||||||
for(auto& suggestion:suggestions) {
|
);
|
||||||
|
auto search = searcher->suggest(queryString);
|
||||||
|
auto srs = search.getResults(start, count);
|
||||||
|
|
||||||
|
for(auto& suggestion: srs) {
|
||||||
MustacheData result;
|
MustacheData result;
|
||||||
result.set("label", suggestion.getTitle());
|
result.set("label", suggestion.getTitle());
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,6 @@ typedef kainjow::mustache::data MustacheData;
|
||||||
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
|
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
|
||||||
typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache;
|
typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache;
|
||||||
|
|
||||||
class Entry;
|
|
||||||
class OPDSDumper;
|
class OPDSDumper;
|
||||||
|
|
||||||
class InternalServer {
|
class InternalServer {
|
||||||
|
|
|
@ -26,10 +26,11 @@
|
||||||
|
|
||||||
#include <mustache.hpp>
|
#include <mustache.hpp>
|
||||||
#include "byte_range.h"
|
#include "byte_range.h"
|
||||||
#include "entry.h"
|
|
||||||
#include "etag.h"
|
#include "etag.h"
|
||||||
#include "i18n.h"
|
#include "i18n.h"
|
||||||
|
|
||||||
|
#include <zim/item.h>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#include "microhttpd_wrapper.h"
|
#include "microhttpd_wrapper.h"
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
#include "../include/server.h"
|
#include "../include/server.h"
|
||||||
#include "../include/name_mapper.h"
|
#include "../include/name_mapper.h"
|
||||||
#include "../include/tools.h"
|
#include "../include/tools.h"
|
||||||
|
#include <zim/entry.h>
|
||||||
|
#include <zim/item.h>
|
||||||
|
|
||||||
// Output generated via mustache templates sometimes contains end-of-line
|
// Output generated via mustache templates sometimes contains end-of-line
|
||||||
// whitespace. This complicates representing the expected output of a unit-test
|
// whitespace. This complicates representing the expected output of a unit-test
|
||||||
|
|
Loading…
Reference in New Issue