mirror of https://github.com/kiwix/libkiwix.git
commit
14653c6958
|
@ -38,6 +38,10 @@ namespace kiwix
|
|||
{
|
||||
enum supportedIndexType { UNKNOWN, XAPIAN };
|
||||
|
||||
|
||||
/**
|
||||
* A class to store information about a book (a zim file)
|
||||
*/
|
||||
class Book
|
||||
{
|
||||
public:
|
||||
|
@ -78,6 +82,9 @@ class Book
|
|||
string faviconMimeType;
|
||||
};
|
||||
|
||||
/**
|
||||
* A Library store several books.
|
||||
*/
|
||||
class Library
|
||||
{
|
||||
public:
|
||||
|
@ -85,7 +92,24 @@ class Library
|
|||
~Library();
|
||||
|
||||
string version;
|
||||
/**
|
||||
* Add a book to the library.
|
||||
*
|
||||
* If a book already exist in the library with the same id, update
|
||||
* the existing book instead of adding a new one.
|
||||
*
|
||||
* @param book The book to add.
|
||||
* @return True if the book has been added.
|
||||
* False if a book has been updated.
|
||||
*/
|
||||
bool addBook(const Book& book);
|
||||
|
||||
/**
|
||||
* Remove a book from the library.
|
||||
*
|
||||
* @param bookIndex the index of the book to remove.
|
||||
* @return True
|
||||
*/
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
vector<kiwix::Book> books;
|
||||
|
||||
|
|
|
@ -39,43 +39,216 @@ namespace kiwix
|
|||
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
|
||||
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
|
||||
|
||||
/**
|
||||
* A tool to manage a `Library`.
|
||||
*
|
||||
* A `Manager` handle a internal `Library`.
|
||||
* This `Library` can be retrived with `cloneLibrary` method.
|
||||
*/
|
||||
class Manager
|
||||
{
|
||||
public:
|
||||
Manager();
|
||||
~Manager();
|
||||
|
||||
/**
|
||||
* Read a `library.xml` and add book in the file to the library.
|
||||
*
|
||||
* @param path The path to the `library.xml`.
|
||||
* @param readOnly Set if the libray path could be overwritten latter with
|
||||
* updated content.
|
||||
* @return True if file has been properly parsed.
|
||||
*/
|
||||
bool readFile(const string path, const bool readOnly = true);
|
||||
|
||||
/**
|
||||
* Read a `library.xml` and add book in the file to the library.
|
||||
*
|
||||
* @param nativePath The path of the `library.xml`
|
||||
* @param UTF8Path The utf8 version (?) of the path. Also the path where the
|
||||
* library will be writen i readOnly is False.
|
||||
* @param readOnly Set if the libray path could be overwritten latter with
|
||||
* updated content.
|
||||
* @return True if file has been properly parsed.
|
||||
*/
|
||||
bool readFile(const string nativePath,
|
||||
const string UTF8Path,
|
||||
const bool readOnly = true);
|
||||
|
||||
/**
|
||||
* Load a library content store in the string.
|
||||
*
|
||||
* @param xml The content corresponding of the library xml
|
||||
* @param readOnly Set if the libray path could be overwritten latter with
|
||||
* updated content.
|
||||
* @param libraryPath The library path (used to resolve relative path)
|
||||
* @return True if the content has been properly parsed.
|
||||
*/
|
||||
bool readXml(const string xml,
|
||||
const bool readOnly = true,
|
||||
const string libraryPath = "");
|
||||
|
||||
/**
|
||||
* Write the library to a file.
|
||||
*
|
||||
* @param path the path of the file to write.
|
||||
* @return True.
|
||||
*/
|
||||
bool writeFile(const string path);
|
||||
|
||||
|
||||
string write_OPDS_feed(const string& id, const string& title);
|
||||
|
||||
/**
|
||||
* Remove a book from the library.
|
||||
*
|
||||
* @param bookIndex the index of the book to remove
|
||||
* @return True
|
||||
*/
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
|
||||
/**
|
||||
* Remove a book from the library.
|
||||
*
|
||||
* @param id the id of the book to remove.
|
||||
* @return True if the book were in the library.
|
||||
*/
|
||||
bool removeBookById(const string id);
|
||||
|
||||
/**
|
||||
* Set the current book.
|
||||
*
|
||||
* @param id The id to add to the stack of current books.
|
||||
* If id is empty, remove the current book from the stack.
|
||||
* @return True
|
||||
*/
|
||||
bool setCurrentBookId(const string id);
|
||||
string getCurrentBookId();
|
||||
|
||||
/**
|
||||
* Get the current book id.
|
||||
*
|
||||
* @return The id of the current book (or empty string if no current book).
|
||||
*/
|
||||
string getCurrentBookId() const;
|
||||
|
||||
/**
|
||||
* Set the path of the external fulltext index associated to a book.
|
||||
*
|
||||
* @param id The id of the book to set.
|
||||
* @param path The path of the external fullext index.
|
||||
* @param supportedIndexType The type of the fulltext index.
|
||||
* @return True if the book is in the library.
|
||||
*/
|
||||
bool setBookIndex(const string id,
|
||||
const string path,
|
||||
const supportedIndexType type);
|
||||
bool setBookIndex(const string id, const string path);
|
||||
const supportedIndexType type = XAPIAN);
|
||||
|
||||
/**
|
||||
* Set the path of the zim file associated to a book.
|
||||
*
|
||||
* @param id The id of the book to set.
|
||||
* @param path The path of the zim file.
|
||||
* @return True if the book is in the library.
|
||||
*/
|
||||
bool setBookPath(const string id, const string path);
|
||||
|
||||
/**
|
||||
* Add a book to the library.
|
||||
*
|
||||
* @param pathToOpen The path to the zim file to add.
|
||||
* @param pathToSave The path to store in the library in place of pathToOpen.
|
||||
* @param url The url of the book to store in the library.
|
||||
* @param checMetaData Tell if we check metadata before adding book to the
|
||||
* library.
|
||||
* @return The id of the book if the book has been added to the library.
|
||||
* Else, an empty string.
|
||||
*/
|
||||
string addBookFromPathAndGetId(const string pathToOpen,
|
||||
const string pathToSave = "",
|
||||
const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
|
||||
/**
|
||||
* Add a book to the library.
|
||||
*
|
||||
* @param pathToOpen The path to the zim file to add.
|
||||
* @param pathToSave The path to store in the library in place of pathToOpen.
|
||||
* @param url The url of the book to store in the library.
|
||||
* @param checMetaData Tell if we check metadata before adding book to the
|
||||
* library.
|
||||
* @return True if the book has been added to the library.
|
||||
*/
|
||||
|
||||
bool addBookFromPath(const string pathToOpen,
|
||||
const string pathToSave = "",
|
||||
const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
|
||||
/**
|
||||
* Clone and return the internal library.
|
||||
*
|
||||
* @return A clone of the library.
|
||||
*/
|
||||
Library cloneLibrary();
|
||||
|
||||
/**
|
||||
* Get the book corresponding to an id.
|
||||
*
|
||||
* @param[in] id The id of the book
|
||||
* @param[out] book The book corresponding to the id.
|
||||
* @return True if the book has been found.
|
||||
*/
|
||||
bool getBookById(const string id, Book& book);
|
||||
|
||||
/**
|
||||
* Get the current book.
|
||||
*
|
||||
* @param[out] The current book.
|
||||
* @return True if there is a current book.
|
||||
*/
|
||||
bool getCurrentBook(Book& book);
|
||||
|
||||
/**
|
||||
* Get the number of book in the library.
|
||||
*
|
||||
* @param localBooks If we must count local books (books with a path).
|
||||
* @param remoteBooks If we must count remote books (books with an url)
|
||||
* @return The number of books.
|
||||
*/
|
||||
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
|
||||
|
||||
/**
|
||||
* Update the "last open date" of a book
|
||||
*
|
||||
* @param id the id of the book.
|
||||
* @return True if the book is in the library.
|
||||
*/
|
||||
bool updateBookLastOpenDateById(const string id);
|
||||
|
||||
/**
|
||||
* Remove (set to empty) paths of all books in the library.
|
||||
*/
|
||||
void removeBookPaths();
|
||||
|
||||
/**
|
||||
* List books in the library.
|
||||
*
|
||||
* The books list will be available in public vector member `bookIdList`.
|
||||
*
|
||||
* @param mode The mode of listing :
|
||||
* - LASTOPEN sort by last opened book.
|
||||
* - LOCAL list only local file.
|
||||
* - REMOTE list only remote file.
|
||||
* @param sortBy Attribute to sort by the book list.
|
||||
* @param maxSize Do not list book bigger than maxSize MiB.
|
||||
* Set to 0 to cancel this filter.
|
||||
* @param language List only books in this language.
|
||||
* @param creator List only books of this creator.
|
||||
* @param publisher List only books of this publisher.
|
||||
* @param search List only books with search in the title, description or
|
||||
* language.
|
||||
* @return True
|
||||
*/
|
||||
bool listBooks(const supportedListMode mode,
|
||||
const supportedListSortBy sortBy,
|
||||
const unsigned int maxSize,
|
||||
|
@ -83,9 +256,32 @@ class Manager
|
|||
const string creator,
|
||||
const string publisher,
|
||||
const string search);
|
||||
/**
|
||||
* Get all langagues of the books in the library.
|
||||
*
|
||||
* @return A list of languages.
|
||||
*/
|
||||
vector<string> getBooksLanguages();
|
||||
|
||||
/**
|
||||
* Get all book creators of the books in the library.
|
||||
*
|
||||
* @return A list of book creators.
|
||||
*/
|
||||
vector<string> getBooksCreators();
|
||||
|
||||
/**
|
||||
* Get all book publishers of the books in the library.
|
||||
*
|
||||
* @return A list of book publishers.
|
||||
*/
|
||||
vector<string> getBooksPublishers();
|
||||
|
||||
/**
|
||||
* Get all book ids of the books in the library.
|
||||
*
|
||||
* @return A list of book ids.
|
||||
*/
|
||||
vector<string> getBooksIds();
|
||||
|
||||
string writableLibraryPath;
|
||||
|
|
312
include/reader.h
312
include/reader.h
|
@ -36,76 +36,383 @@ using namespace std;
|
|||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
/**
|
||||
* The Reader class is the class who allow to get an article content from a zim
|
||||
* file.
|
||||
*/
|
||||
class Reader
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Create a Reader to read a zim file specified by zimFilePath.
|
||||
*
|
||||
* @param zimFilePath The path to the zim file to read.
|
||||
* The zim file can be splitted (.zimaa, .zimab, ...).
|
||||
* In this case, the file path must still point to the
|
||||
* unsplitted path as if the file were not splitted
|
||||
* (.zim extesion).
|
||||
*/
|
||||
Reader(const string zimFilePath);
|
||||
~Reader();
|
||||
|
||||
void reset();
|
||||
/**
|
||||
* Get the number of "displayable" articles in the zim file.
|
||||
*
|
||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
||||
* articles with the 'text/html' MIMEtype specified in the metadata.
|
||||
* Else return the number of articles in the 'A' namespace.
|
||||
*/
|
||||
unsigned int getArticleCount() const;
|
||||
|
||||
/**
|
||||
* Get the number of media in the zim file.
|
||||
*
|
||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
||||
* articles with the 'image/jpeg', 'image/gif' and 'image/png' in
|
||||
* the metadata.
|
||||
* Else return the number of articles in the 'I' namespace.
|
||||
*/
|
||||
unsigned int getMediaCount() const;
|
||||
|
||||
/**
|
||||
* Get the number of all articles in the zim file.
|
||||
*
|
||||
* @return Return the number of all the articles, whatever their MIMEtype or
|
||||
* their namespace.
|
||||
*/
|
||||
unsigned int getGlobalCount() const;
|
||||
|
||||
/**
|
||||
* Get the path of the zim file.
|
||||
*
|
||||
* @return the path of the zim file as given in the constructor.
|
||||
*/
|
||||
string getZimFilePath() const;
|
||||
|
||||
/**
|
||||
* Get the Id of the zim file.
|
||||
*
|
||||
* @return The uuid stored in the zim file.
|
||||
*/
|
||||
string getId() const;
|
||||
|
||||
/**
|
||||
* Get the url of a random page.
|
||||
*
|
||||
* @return Url of a random page. The page is picked from all articles in
|
||||
* the 'A' namespace.
|
||||
* The main page is excluded from the potential results.
|
||||
*/
|
||||
string getRandomPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get the url of the first page.
|
||||
*
|
||||
* @return Url of the first article in the 'A' namespace.
|
||||
*/
|
||||
string getFirstPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get the url of the main page.
|
||||
*
|
||||
* @return Url of the main page as specified in the zim file.
|
||||
*/
|
||||
string getMainPageUrl() const;
|
||||
bool getMetatag(const string& url, string& content) const;
|
||||
|
||||
/**
|
||||
* Get the content of a metadata.
|
||||
*
|
||||
* @param[in] name The name of the metadata.
|
||||
* @param[out] value The value will be set to the content of the metadata.
|
||||
* @return True if it was possible to get the content of the metadata.
|
||||
*/
|
||||
bool getMetatag(const string& name, string& value) const;
|
||||
|
||||
/**
|
||||
* Get the title of the zim file.
|
||||
*
|
||||
* @return The title of zim file as specified in the zim metadata.
|
||||
* If no title has been set, return a title computed from the
|
||||
* file path.
|
||||
*/
|
||||
string getTitle() const;
|
||||
|
||||
/**
|
||||
* Get the description of the zim file.
|
||||
*
|
||||
* @return The description of the zim file as specified in the zim metadata.
|
||||
* If no description has been set, return the subtitle.
|
||||
*/
|
||||
string getDescription() const;
|
||||
|
||||
/**
|
||||
* Get the language of the zim file.
|
||||
*
|
||||
* @return The language of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getLanguage() const;
|
||||
|
||||
/**
|
||||
* Get the name of the zim file.
|
||||
*
|
||||
* @return The name of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getName() const;
|
||||
|
||||
/**
|
||||
* Get the tags of the zim file.
|
||||
*
|
||||
* @return The tags of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getTags() const;
|
||||
|
||||
/**
|
||||
* Get the date of the zim file.
|
||||
*
|
||||
* @return The date of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getDate() const;
|
||||
|
||||
/**
|
||||
* Get the creator of the zim file.
|
||||
*
|
||||
* @return The creator of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getCreator() const;
|
||||
|
||||
/**
|
||||
* Get the publisher of the zim file.
|
||||
*
|
||||
* @return The publisher of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getPublisher() const;
|
||||
|
||||
/**
|
||||
* Get the origId of the zim file.
|
||||
*
|
||||
* The origId is only used in the case of patch zim file and is the Id
|
||||
* of the original zim file.
|
||||
*
|
||||
* @return The origId of the zim file as specified in the zim metadata.
|
||||
*/
|
||||
string getOrigId() const;
|
||||
|
||||
/**
|
||||
* Get the favicon of the zim file.
|
||||
*
|
||||
* @param[out] content The content of the favicon.
|
||||
* @param[out] mimeType The mimeType of the favicon.
|
||||
* @return True if a favicon has been found.
|
||||
*/
|
||||
bool getFavicon(string& content, string& mimeType) const;
|
||||
|
||||
/**
|
||||
* Get the url of a page specified by a title.
|
||||
*
|
||||
* @param[in] title the title of the page.
|
||||
* @param[out] url the url of the page.
|
||||
* @return True if the page can be found.
|
||||
*/
|
||||
bool getPageUrlFromTitle(const string& title, string& url) const;
|
||||
|
||||
/**
|
||||
* Get the mimetype of a article specified by a url.
|
||||
*
|
||||
* @param[in] url the url of the article.
|
||||
* @param[out] mimetype the mimeType of the article.
|
||||
* @return True if the mimeType has been found.
|
||||
*/
|
||||
bool getMimeTypeByUrl(const string& url, string& mimeType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specifed by a url.
|
||||
*
|
||||
* Alias to `getContentByEncodedUrl`
|
||||
*/
|
||||
bool getContentByUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specified by a url encoded url.
|
||||
*
|
||||
* Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
|
||||
*/
|
||||
bool getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specified by an url encoded url.
|
||||
*
|
||||
* Equivalent to getContentByEncodedUrl but without baseUrl.
|
||||
*/
|
||||
bool getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specified by a url.
|
||||
*
|
||||
* @param[in] url The url of the article.
|
||||
* @param[out] content The content of the article.
|
||||
* @param[out] title the title of the article.
|
||||
* @param[out] contentLength The size of the article (size of content).
|
||||
* @param[out] contentType The mimeType of the article.
|
||||
* @param[out] baseUrl Return the true url of the article.
|
||||
* If the specified article is a redirection, contains
|
||||
* the url of the targeted article.
|
||||
* @return True if the article has been found.
|
||||
*/
|
||||
bool getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const;
|
||||
/**
|
||||
* Get the content of an article specified by a url.
|
||||
*
|
||||
* Equivalent to getContentByDecodedUrl but withou the baseUrl.
|
||||
*/
|
||||
bool getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Search for articles with title starting with prefix (case sensitive).
|
||||
*
|
||||
* Suggestions are stored in an internal vector and can be retrieved using
|
||||
* `getNextSuggestion` method.
|
||||
*
|
||||
* @param prefix The prefix to search.
|
||||
* @param suggestionCount How many suggestions to search for.
|
||||
* @param reset If true, remove previous suggestions in the internal vector.
|
||||
* If false, add suggestions to the internal vector
|
||||
* (until internal vector size is suggestionCount (or no more
|
||||
* suggestion))
|
||||
* @return True if some suggestions where added to the internal vector.
|
||||
*/
|
||||
bool searchSuggestions(const string& prefix,
|
||||
unsigned int suggestionsCount,
|
||||
const bool reset = true);
|
||||
|
||||
/**
|
||||
* Search for articles for the given prefix.
|
||||
*
|
||||
* If the zim file has a internal fulltext index, the suggestions will be
|
||||
* searched using it.
|
||||
* Else the suggestions will be search using `searchSuggestions` while trying
|
||||
* to be smart about case sensitivity (using `getTitleVariants`).
|
||||
*
|
||||
* In any case, suggestions are stored in an internal vector and can be
|
||||
* retrieved using `getNextSuggestion` method.
|
||||
* The internal vector will be reset.
|
||||
*
|
||||
* @param prefix The prefix to search for.
|
||||
* @param suggestionCount How many suggestions to search for.
|
||||
*/
|
||||
bool searchSuggestionsSmart(const string& prefix,
|
||||
unsigned int suggestionsCount);
|
||||
|
||||
/**
|
||||
* Check if the url exists in the zim file.
|
||||
*
|
||||
* @param url the url to check.
|
||||
* @return True if the url exits in the zim file.
|
||||
*/
|
||||
bool urlExists(const string& url) const;
|
||||
|
||||
/**
|
||||
* Check if the zim file has a embedded fulltext index.
|
||||
*
|
||||
* @return True if the zim file has a embedded fulltext index
|
||||
* and is not split (else the fulltext is not accessible).
|
||||
*/
|
||||
bool hasFulltextIndex() const;
|
||||
|
||||
/**
|
||||
* Get potential case title variations for a title.
|
||||
*
|
||||
* @param title a title.
|
||||
* @return the list of variantions.
|
||||
*/
|
||||
std::vector<std::string> getTitleVariants(const std::string& title) const;
|
||||
|
||||
/**
|
||||
* Get the next suggestion title.
|
||||
*
|
||||
* @param[out] title the title of the suggestion.
|
||||
* @return True if title has been set.
|
||||
*/
|
||||
bool getNextSuggestion(string& title);
|
||||
|
||||
/**
|
||||
* Get the next suggestion title and url.
|
||||
*
|
||||
* @param[out] title the title of the suggestion.
|
||||
* @param[out] url the url of the suggestion.
|
||||
* @return True if title and url have been set.
|
||||
*/
|
||||
bool getNextSuggestion(string& title, string& url);
|
||||
|
||||
/**
|
||||
* Get if we can check zim file integrity (has a checksum).
|
||||
*
|
||||
* @return True if zim file have a checksum.
|
||||
*/
|
||||
bool canCheckIntegrity() const;
|
||||
|
||||
/**
|
||||
* Check is zim file is corrupted.
|
||||
*
|
||||
* @return True if zim file is corrupted.
|
||||
*/
|
||||
bool isCorrupted() const;
|
||||
|
||||
/**
|
||||
* Parse a full url into a namespace and url.
|
||||
*
|
||||
* @param[in] url The full url ("/N/url").
|
||||
* @param[out] ns The namespace (N).
|
||||
* @param[out] title The url (url).
|
||||
* @return True
|
||||
*/
|
||||
bool parseUrl(const string& url, char* ns, string& title) const;
|
||||
|
||||
/**
|
||||
* Return the total size of the zim file.
|
||||
*
|
||||
* If zim file is split, return the sum of all parts' size.
|
||||
*
|
||||
* @return Size of the size file is KiB.
|
||||
*/
|
||||
unsigned int getFileSize() const;
|
||||
|
||||
/**
|
||||
* Get the zim file handler.
|
||||
*
|
||||
* @return The libzim file handler.
|
||||
*/
|
||||
zim::File* getZimFileHandler() const;
|
||||
|
||||
/**
|
||||
* Get the zim article object associated to a url.
|
||||
*
|
||||
* @param[in] url The url of the article.
|
||||
* @param[out] article The libzim article object.
|
||||
* @return True if the url is good (article.good()).
|
||||
*/
|
||||
bool getArticleObjectByDecodedUrl(const string& url,
|
||||
zim::Article& article) const;
|
||||
|
||||
|
@ -113,7 +420,6 @@ class Reader
|
|||
zim::File* zimFileHandler;
|
||||
zim::size_type firstArticleOffset;
|
||||
zim::size_type lastArticleOffset;
|
||||
zim::size_type currentArticleOffset;
|
||||
zim::size_type nsACount;
|
||||
zim::size_type nsICount;
|
||||
std::string zimFilePath;
|
||||
|
|
|
@ -53,33 +53,149 @@ class Result
|
|||
};
|
||||
|
||||
struct SearcherInternal;
|
||||
/**
|
||||
* The Searcher class is reponsible to do different kind of search using the
|
||||
* fulltext index.
|
||||
*
|
||||
* Historically, there are two kind of fulltext index :
|
||||
* - The legacy one, is the external fulltext index. A directory stored outside
|
||||
* of the zim file.
|
||||
* - The new one, a embedded fulltext index in the zim file.
|
||||
*
|
||||
* Legacy external fulltext index has to be considered as obsolet format with
|
||||
* less functionnalities:
|
||||
* - No multi zim search ;
|
||||
* - No geo_search ;
|
||||
* - No suggestions search ;
|
||||
*
|
||||
* To reflect this, there is two Search creation "API":
|
||||
* - One for the external fulltext index, using the constructor taking a
|
||||
* xapianDirectoryPath) ;
|
||||
* - One for the embedded fulltext index, using a "empty" constructor and the
|
||||
* `add_reader` method".
|
||||
*
|
||||
* On top of that, the Searcher may (if compiled with ctpp2) be used to
|
||||
* generate a html page for the search result. This use a template that need a
|
||||
* humanReaderName. This feature is only used by kiwix-serve and this should be
|
||||
* move outside of Searcher (and with a better API). If you don't use the html
|
||||
* rendering (getHtml method), you better should simply ignore the different
|
||||
* humanReadeableName attributes (or give an empty string).
|
||||
*/
|
||||
class Searcher
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* The default constructor.
|
||||
*
|
||||
* @param humanReadableName The global zim's humanReadableName.
|
||||
* Used to generate pagination links.
|
||||
*/
|
||||
Searcher(const string& humanReadableName = "");
|
||||
|
||||
/**
|
||||
* The constructor for legacy external fulltext index.
|
||||
*
|
||||
* @param xapianDirectoryPath The path to the external index directory.
|
||||
* @param reader The reader associated to the external index.
|
||||
* It will be used retrive the article content or generate
|
||||
* the snippet.
|
||||
* @param humanReadableName The humanReadableName for the zim.
|
||||
*/
|
||||
Searcher(const string& xapianDirectoryPath,
|
||||
Reader* reader,
|
||||
const string& humanReadableName);
|
||||
~Searcher();
|
||||
|
||||
void add_reader(Reader* reader, const std::string& humanReaderName);
|
||||
/**
|
||||
* Add a reader (containing embedded fulltext index) to the search.
|
||||
*
|
||||
* @param reader The Reader for the zim containing the fulltext index.
|
||||
* @param humanReaderName The human readable name of the reader.
|
||||
* @return true if the reader has been added.
|
||||
* false if the reader cannot be added (no embedded fulltext index present)
|
||||
*/
|
||||
bool add_reader(Reader* reader, const std::string& humanReaderName);
|
||||
|
||||
/**
|
||||
* Start a search on the zim associated to the Searcher.
|
||||
*
|
||||
* Search results should be retrived using the getNextResult method.
|
||||
*
|
||||
* @param search The search query.
|
||||
* @param resultStart the start offset of the search results (used for pagination).
|
||||
* @param resultEnd the end offset of the search results (used for pagination).
|
||||
* @param verbose print some info on stdout if true.
|
||||
*/
|
||||
void search(std::string& search,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
|
||||
/**
|
||||
* Start a geographique search.
|
||||
* The search return result for entry in a disc of center latitude/longitude
|
||||
* and radius distance.
|
||||
*
|
||||
* Search results should be retrived using the getNextResult method.
|
||||
*
|
||||
* @param latitude The latitude of the center point.
|
||||
* @param longitude The longitude of the center point.
|
||||
* @param distance The radius of the disc.
|
||||
* @param resultStart the start offset of the search results (used for pagination).
|
||||
* @param resultEnd the end offset of the search results (used for pagination).
|
||||
* @param verbose print some info on stdout if true.
|
||||
*/
|
||||
void geo_search(float latitude, float longitude, float distance,
|
||||
unsigned int resultStart,
|
||||
unsigned int resultEnd,
|
||||
const bool verbose = false);
|
||||
|
||||
/**
|
||||
* Start a suggestion search.
|
||||
* The search made depend of the "version" of the embedded index.
|
||||
* - If the index is newer enough and have a title namespace, the search is
|
||||
* made in the titles only.
|
||||
* - Else the search is made on the whole article content.
|
||||
* In any case, the search is made "partial" (as adding '*' at the end of the query)
|
||||
*
|
||||
* @param search The search query.
|
||||
* @param verbose print some info on stdout if true.
|
||||
*/
|
||||
void suggestions(std::string& search, const bool verbose = false);
|
||||
|
||||
/**
|
||||
* Get the next result of a started search.
|
||||
* This is the method to use to loop hover the search results.
|
||||
*/
|
||||
Result* getNextResult();
|
||||
|
||||
/**
|
||||
* Restart the previous search.
|
||||
* Next call to getNextResult will return the first result.
|
||||
*/
|
||||
void restart_search();
|
||||
|
||||
/**
|
||||
* Get a estimation of the result count.
|
||||
*/
|
||||
unsigned int getEstimatedResultCount();
|
||||
|
||||
/**
|
||||
* Set protocol prefix.
|
||||
* Only used by getHtml.
|
||||
*/
|
||||
bool setProtocolPrefix(const std::string prefix);
|
||||
|
||||
/**
|
||||
* Set search protocol prefix.
|
||||
* Only used by getHtml.
|
||||
*/
|
||||
bool setSearchProtocolPrefix(const std::string prefix);
|
||||
void reset();
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
/**
|
||||
* Generate the html page with the resutls of the search.
|
||||
*/
|
||||
string getHtml();
|
||||
#endif
|
||||
|
||||
|
@ -102,7 +218,13 @@ class Searcher
|
|||
unsigned int resultStart;
|
||||
unsigned int resultEnd;
|
||||
std::string contentHumanReadableId;
|
||||
|
||||
private:
|
||||
void reset();
|
||||
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -243,7 +243,7 @@ bool Manager::setCurrentBookId(const string id)
|
|||
return true;
|
||||
}
|
||||
|
||||
string Manager::getCurrentBookId()
|
||||
string Manager::getCurrentBookId() const
|
||||
{
|
||||
return library.current.empty() ? "" : library.current.top();
|
||||
}
|
||||
|
@ -495,11 +495,6 @@ bool Manager::setBookIndex(const string id,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool Manager::setBookIndex(const string id, const string path)
|
||||
{
|
||||
return this->setBookIndex(id, path, XAPIAN);
|
||||
}
|
||||
|
||||
bool Manager::setBookPath(const string id, const string path)
|
||||
{
|
||||
std::vector<kiwix::Book>::iterator itr;
|
||||
|
|
|
@ -80,7 +80,6 @@ Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
|
|||
this->firstArticleOffset
|
||||
= this->zimFileHandler->getNamespaceBeginOffset('A');
|
||||
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
|
||||
this->currentArticleOffset = this->firstArticleOffset;
|
||||
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
|
||||
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
|
||||
this->zimFilePath = zimFilePath;
|
||||
|
@ -102,11 +101,6 @@ zim::File* Reader::getZimFileHandler() const
|
|||
{
|
||||
return this->zimFileHandler;
|
||||
}
|
||||
/* Reset the cursor for GetNextArticle() */
|
||||
void Reader::reset()
|
||||
{
|
||||
this->currentArticleOffset = this->firstArticleOffset;
|
||||
}
|
||||
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const
|
||||
{
|
||||
std::map<const std::string, unsigned int> counters;
|
||||
|
|
|
@ -118,10 +118,14 @@ Searcher::~Searcher()
|
|||
delete internal;
|
||||
}
|
||||
|
||||
void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
|
||||
bool Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
|
||||
{
|
||||
if (!reader->hasFulltextIndex()) {
|
||||
return false;
|
||||
}
|
||||
this->readers.push_back(reader);
|
||||
this->humanReaderNames.push_back(humanReadableName);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
|
@ -166,7 +170,9 @@ void Searcher::search(std::string& search,
|
|||
std::vector<const zim::File*> zims;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
if ( (*current)->hasFulltextIndex() ) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
}
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
search->set_query(unaccentedSearch);
|
||||
|
|
Loading…
Reference in New Issue