mirror of https://github.com/kiwix/libkiwix.git
commit
1b49c632b3
|
@ -0,0 +1,24 @@
|
|||
|
||||
#ifndef _KIWIX_COMMON_H_
|
||||
#define _KIWIX_COMMON_H_
|
||||
|
||||
#include <zim/zim.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define DEPRECATED __attribute__((deprecated))
|
||||
#elif defined(_MSC_VER)
|
||||
#define DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
#praga message("WARNING: You need to implement DEPRECATED for this compiler")
|
||||
#define DEPRECATED
|
||||
#endif
|
||||
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
typedef zim::size_type size_type;
|
||||
typedef zim::offset_type offset_type;
|
||||
|
||||
}
|
||||
|
||||
#endif //_KIWIX_COMMON_H_
|
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_ENTRY_H
|
||||
#define KIWIX_ENTRY_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zim/article.h>
|
||||
#include <exception>
|
||||
#include <string>
|
||||
#include "common.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
|
||||
class NoEntry : public std::exception {};
|
||||
|
||||
/**
|
||||
* A entry represent an.. entry in a zim file.
|
||||
*/
|
||||
class Entry
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
*
|
||||
* Construct an invalid entry.
|
||||
*/
|
||||
Entry() = default;
|
||||
|
||||
/**
|
||||
* Construct an entry making reference to an zim article.
|
||||
*
|
||||
* @param article a zim::Article object
|
||||
*/
|
||||
Entry(zim::Article article);
|
||||
virtual ~Entry() = default;
|
||||
|
||||
/**
|
||||
* Get the path of the entry.
|
||||
*
|
||||
* The path is the "key" of an entry.
|
||||
*
|
||||
* @return the path of the entry.
|
||||
*/
|
||||
std::string getPath() const;
|
||||
|
||||
/**
|
||||
* Get the title of the entry.
|
||||
*
|
||||
* @return the title of the entry.
|
||||
*/
|
||||
std::string getTitle() const;
|
||||
|
||||
/**
|
||||
* Get the content of the entry.
|
||||
*
|
||||
* The string is a copy of the content.
|
||||
* If you don't want to do a copy, use get_blob.
|
||||
*
|
||||
* @return the content of the entry.
|
||||
*/
|
||||
std::string getContent() const;
|
||||
|
||||
/**
|
||||
* Get the blob of the entry.
|
||||
*
|
||||
* A blob make reference to the content without copying it.
|
||||
*
|
||||
* @param offset The starting offset of the blob.
|
||||
* @return the blob of the entry.
|
||||
*/
|
||||
zim::Blob getBlob(offset_type offset = 0) const;
|
||||
|
||||
/**
|
||||
* Get the blob of the entry.
|
||||
*
|
||||
* A blob make reference to the content without copying it.
|
||||
*
|
||||
* @param offset The starting offset of the blob.
|
||||
* @param size The size of the blob.
|
||||
* @return the blob of the entry.
|
||||
*/
|
||||
zim::Blob getBlob(offset_type offset, size_type size) const;
|
||||
|
||||
/**
|
||||
* Get the info for direct access to the content of the entry.
|
||||
*
|
||||
* Some entry (ie binary ones) have their content plain stored
|
||||
* in the zim file. Knowing the offset where the content is stored
|
||||
* an user can directly read the content in the zim file bypassing the
|
||||
* kiwix-lib/libzim.
|
||||
*
|
||||
* @return A pair specifying where to read the content.
|
||||
* The string is the real file to read (may be different that .zim
|
||||
* file if zim is cut).
|
||||
* The offset is the offset to read in the file.
|
||||
* Return <"",0> if is not possible to read directly.
|
||||
*/
|
||||
std::pair<std::string, offset_type> getDirectAccessInfo() const;
|
||||
|
||||
/**
|
||||
* Get the size of the entry.
|
||||
*
|
||||
* @return the size of the entry.
|
||||
*/
|
||||
size_type getSize() const;
|
||||
|
||||
/**
|
||||
* Get the mime_type of the entry.
|
||||
*
|
||||
* @return the mime_type of the entry.
|
||||
*/
|
||||
std::string getMimetype() const;
|
||||
|
||||
|
||||
/**
|
||||
* Get if the entry is a redirect entry.
|
||||
*
|
||||
* @return True if the entry is a redirect.
|
||||
*/
|
||||
bool isRedirect() const;
|
||||
|
||||
/**
|
||||
* Get if the entry is a link target entry.
|
||||
*
|
||||
* @return True if the entry is a link target.
|
||||
*/
|
||||
bool isLinkTarget() const;
|
||||
|
||||
/**
|
||||
* Get if the entry is a deleted entry.
|
||||
*
|
||||
* @return True if the entry is a deleted entry.
|
||||
*/
|
||||
bool isDeleted() const;
|
||||
|
||||
/**
|
||||
* Get the entry pointed by this entry.
|
||||
*
|
||||
* @return the entry pointed.
|
||||
* @throw NoEntry if the entry is not a redirected entry.
|
||||
*/
|
||||
Entry getRedirectEntry() const;
|
||||
|
||||
/**
|
||||
* Get the final entry pointed by this entry.
|
||||
*
|
||||
* Follow the redirection until a "not redirecting" entry is found.
|
||||
* If the entry is not a redirected entry, return the entry itself.
|
||||
*
|
||||
* @return the final entry.
|
||||
*/
|
||||
Entry getFinalEntry() const;
|
||||
|
||||
/**
|
||||
* Convert the entry to a boolean value.
|
||||
*
|
||||
* @return True if the entry is valid.
|
||||
*/
|
||||
explicit operator bool() const { return good(); }
|
||||
|
||||
private:
|
||||
zim::Article article;
|
||||
mutable zim::Article final_article;
|
||||
|
||||
bool good() const { return article.good(); }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // KIWIX_ENTRY_H
|
|
@ -1,9 +1,11 @@
|
|||
headers = [
|
||||
'common.h',
|
||||
'library.h',
|
||||
'manager.h',
|
||||
'opds_dumper.h',
|
||||
'downloader.h',
|
||||
'reader.h',
|
||||
'entry.h',
|
||||
'searcher.h'
|
||||
]
|
||||
|
||||
|
|
158
include/reader.h
158
include/reader.h
|
@ -29,6 +29,8 @@
|
|||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "common.h"
|
||||
#include "entry.h"
|
||||
#include "common/pathTools.h"
|
||||
#include "common/stringTools.h"
|
||||
|
||||
|
@ -38,7 +40,7 @@ namespace kiwix
|
|||
{
|
||||
|
||||
/**
|
||||
* The Reader class is the class who allow to get an article content from a zim
|
||||
* The Reader class is the class who allow to get an entry content from a zim
|
||||
* file.
|
||||
*/
|
||||
class Reader
|
||||
|
@ -57,11 +59,11 @@ class Reader
|
|||
~Reader();
|
||||
|
||||
/**
|
||||
* Get the number of "displayable" articles in the zim file.
|
||||
* Get the number of "displayable" entries in the zim file.
|
||||
*
|
||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
||||
* articles with the 'text/html' MIMEtype specified in the metadata.
|
||||
* Else return the number of articles in the 'A' namespace.
|
||||
* entries with the 'text/html' MIMEtype specified in the metadata.
|
||||
* Else return the number of entries in the 'A' namespace.
|
||||
*/
|
||||
unsigned int getArticleCount() const;
|
||||
|
||||
|
@ -69,16 +71,16 @@ class Reader
|
|||
* Get the number of media in the zim file.
|
||||
*
|
||||
* @return If the zim file has a /M/Counter metadata, return the number of
|
||||
* articles with the 'image/jpeg', 'image/gif' and 'image/png' in
|
||||
* entries with the 'image/jpeg', 'image/gif' and 'image/png' in
|
||||
* the metadata.
|
||||
* Else return the number of articles in the 'I' namespace.
|
||||
* Else return the number of entries in the 'I' namespace.
|
||||
*/
|
||||
unsigned int getMediaCount() const;
|
||||
|
||||
/**
|
||||
* Get the number of all articles in the zim file.
|
||||
* Get the number of all entries in the zim file.
|
||||
*
|
||||
* @return Return the number of all the articles, whatever their MIMEtype or
|
||||
* @return Return the number of all the entries, whatever their MIMEtype or
|
||||
* their namespace.
|
||||
*/
|
||||
unsigned int getGlobalCount() const;
|
||||
|
@ -100,25 +102,54 @@ class Reader
|
|||
/**
|
||||
* Get the url of a random page.
|
||||
*
|
||||
* @return Url of a random page. The page is picked from all articles in
|
||||
* Deprecated : Use `getRandomPage` instead.
|
||||
*
|
||||
* @return Url of a random page. The page is picked from all entries in
|
||||
* the 'A' namespace.
|
||||
* The main page is excluded from the potential results.
|
||||
*/
|
||||
string getRandomPageUrl() const;
|
||||
DEPRECATED string getRandomPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get a random page.
|
||||
*
|
||||
* @return A random Entry. The entry is picked from all entries in
|
||||
* the 'A' namespace.
|
||||
* The main entry is excluded from the potential results.
|
||||
*/
|
||||
Entry getRandomPage() const;
|
||||
|
||||
/**
|
||||
* Get the url of the first page.
|
||||
*
|
||||
* @return Url of the first article in the 'A' namespace.
|
||||
* Deprecated : Use `getFirstPage` instead.
|
||||
*
|
||||
* @return Url of the first entry in the 'A' namespace.
|
||||
*/
|
||||
string getFirstPageUrl() const;
|
||||
DEPRECATED string getFirstPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get the entry of the first page.
|
||||
*
|
||||
* @return The first entry in the 'A' namespace.
|
||||
*/
|
||||
Entry getFirstPage() const;
|
||||
|
||||
/**
|
||||
* Get the url of the main page.
|
||||
*
|
||||
* Deprecated : Use `getMainPage` instead.
|
||||
*
|
||||
* @return Url of the main page as specified in the zim file.
|
||||
*/
|
||||
string getMainPageUrl() const;
|
||||
DEPRECATED string getMainPageUrl() const;
|
||||
|
||||
/**
|
||||
* Get the entry of the main page.
|
||||
*
|
||||
* @return Entry of the main page as specified in the zim file.
|
||||
*/
|
||||
Entry getMainPage() const;
|
||||
|
||||
/**
|
||||
* Get the content of a metadata.
|
||||
|
@ -207,6 +238,35 @@ class Reader
|
|||
*/
|
||||
bool getFavicon(string& content, string& mimeType) const;
|
||||
|
||||
/**
|
||||
* Get an entry associated to an path.
|
||||
*
|
||||
* @param path The path of the entry.
|
||||
* @return The entry.
|
||||
* @throw NoEntry If no entry correspond to the path.
|
||||
*/
|
||||
Entry getEntryFromPath(const std::string& path) const;
|
||||
|
||||
/**
|
||||
* Get an entry associated to an url encoded path.
|
||||
*
|
||||
* Equivalent to `getEntryFromPath(urlDecode(path));`
|
||||
*
|
||||
* @param path The url encoded path.
|
||||
* @return The entry.
|
||||
* @throw NoEntry If no entry correspond to the path.
|
||||
*/
|
||||
Entry getEntryFromEncodedPath(const std::string& path) const;
|
||||
|
||||
/**
|
||||
* Get un entry associated to a title.
|
||||
*
|
||||
* @param title The title.
|
||||
* @return The entry
|
||||
* throw NoEntry If no entry correspond to the url.
|
||||
*/
|
||||
Entry getEntryFromTitle(const std::string& title) const;
|
||||
|
||||
/**
|
||||
* Get the url of a page specified by a title.
|
||||
*
|
||||
|
@ -214,34 +274,34 @@ class Reader
|
|||
* @param[out] url the url of the page.
|
||||
* @return True if the page can be found.
|
||||
*/
|
||||
bool getPageUrlFromTitle(const string& title, string& url) const;
|
||||
DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const;
|
||||
|
||||
/**
|
||||
* Get the mimetype of a article specified by a url.
|
||||
* Get the mimetype of a entry specified by a url.
|
||||
*
|
||||
* @param[in] url the url of the article.
|
||||
* @param[out] mimetype the mimeType of the article.
|
||||
* @param[in] url the url of the entry.
|
||||
* @param[out] mimeType the mimeType of the entry.
|
||||
* @return True if the mimeType has been found.
|
||||
*/
|
||||
bool getMimeTypeByUrl(const string& url, string& mimeType) const;
|
||||
DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specifed by a url.
|
||||
* Get the content of an entry specifed by a url.
|
||||
*
|
||||
* Alias to `getContentByEncodedUrl`
|
||||
*/
|
||||
bool getContentByUrl(const string& url,
|
||||
DEPRECATED bool getContentByUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specified by a url encoded url.
|
||||
* Get the content of an entry specified by a url encoded url.
|
||||
*
|
||||
* Equivalent to getContentByDecodedUrl(urlDecode(url), ...).
|
||||
*/
|
||||
bool getContentByEncodedUrl(const string& url,
|
||||
DEPRECATED bool getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
|
@ -249,54 +309,54 @@ class Reader
|
|||
string& baseUrl) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specified by an url encoded url.
|
||||
* Get the content of an entry specified by an url encoded url.
|
||||
*
|
||||
* Equivalent to getContentByEncodedUrl but without baseUrl.
|
||||
*/
|
||||
bool getContentByEncodedUrl(const string& url,
|
||||
DEPRECATED bool getContentByEncodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Get the content of an article specified by a url.
|
||||
* Get the content of an entry specified by a url.
|
||||
*
|
||||
* @param[in] url The url of the article.
|
||||
* @param[out] content The content of the article.
|
||||
* @param[out] title the title of the article.
|
||||
* @param[out] contentLength The size of the article (size of content).
|
||||
* @param[out] contentType The mimeType of the article.
|
||||
* @param[out] baseUrl Return the true url of the article.
|
||||
* If the specified article is a redirection, contains
|
||||
* the url of the targeted article.
|
||||
* @return True if the article has been found.
|
||||
* @param[in] url The url of the entry.
|
||||
* @param[out] content The content of the entry.
|
||||
* @param[out] title the title of the entry.
|
||||
* @param[out] contentLength The size of the entry (size of content).
|
||||
* @param[out] contentType The mimeType of the entry.
|
||||
* @param[out] baseUrl Return the true url of the entry.
|
||||
* If the specified entry is a redirection, contains
|
||||
* the url of the targeted entry.
|
||||
* @return True if the entry has been found.
|
||||
*/
|
||||
bool getContentByDecodedUrl(const string& url,
|
||||
DEPRECATED bool getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl) const;
|
||||
/**
|
||||
* Get the content of an article specified by a url.
|
||||
* Get the content of an entry specified by a url.
|
||||
*
|
||||
* Equivalent to getContentByDecodedUrl but withou the baseUrl.
|
||||
*/
|
||||
bool getContentByDecodedUrl(const string& url,
|
||||
DEPRECATED bool getContentByDecodedUrl(const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType) const;
|
||||
|
||||
/**
|
||||
* Search for articles with title starting with prefix (case sensitive).
|
||||
* Search for entries with title starting with prefix (case sensitive).
|
||||
*
|
||||
* Suggestions are stored in an internal vector and can be retrieved using
|
||||
* `getNextSuggestion` method.
|
||||
*
|
||||
* @param prefix The prefix to search.
|
||||
* @param suggestionCount How many suggestions to search for.
|
||||
* @param suggestionsCount How many suggestions to search for.
|
||||
* @param reset If true, remove previous suggestions in the internal vector.
|
||||
* If false, add suggestions to the internal vector
|
||||
* (until internal vector size is suggestionCount (or no more
|
||||
|
@ -308,7 +368,7 @@ class Reader
|
|||
const bool reset = true);
|
||||
|
||||
/**
|
||||
* Search for articles for the given prefix.
|
||||
* Search for entries for the given prefix.
|
||||
*
|
||||
* If the zim file has a internal fulltext index, the suggestions will be
|
||||
* searched using it.
|
||||
|
@ -320,7 +380,7 @@ class Reader
|
|||
* The internal vector will be reset.
|
||||
*
|
||||
* @param prefix The prefix to search for.
|
||||
* @param suggestionCount How many suggestions to search for.
|
||||
* @param suggestionsCount How many suggestions to search for.
|
||||
*/
|
||||
bool searchSuggestionsSmart(const string& prefix,
|
||||
unsigned int suggestionsCount);
|
||||
|
@ -328,10 +388,20 @@ class Reader
|
|||
/**
|
||||
* Check if the url exists in the zim file.
|
||||
*
|
||||
* Deprecated : Use `pathExists` instead.
|
||||
*
|
||||
* @param url the url to check.
|
||||
* @return True if the url exits in the zim file.
|
||||
*/
|
||||
bool urlExists(const string& url) const;
|
||||
DEPRECATED bool urlExists(const string& url) const;
|
||||
|
||||
/**
|
||||
* Check if the path exists in the zim file.
|
||||
*
|
||||
* @param path the path to check.
|
||||
* @return True if the path exists in the zim file.
|
||||
*/
|
||||
bool pathExists(const string& path) const;
|
||||
|
||||
/**
|
||||
* Check if the zim file has a embedded fulltext index.
|
||||
|
@ -388,7 +458,7 @@ class Reader
|
|||
* @param[out] title The url (url).
|
||||
* @return True
|
||||
*/
|
||||
bool parseUrl(const string& url, char* ns, string& title) const;
|
||||
DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const;
|
||||
|
||||
/**
|
||||
* Return the total size of the zim file.
|
||||
|
@ -413,7 +483,7 @@ class Reader
|
|||
* @param[out] article The libzim article object.
|
||||
* @return True if the url is good (article.good()).
|
||||
*/
|
||||
bool getArticleObjectByDecodedUrl(const string& url,
|
||||
DEPRECATED bool getArticleObjectByDecodedUrl(const string& url,
|
||||
zim::Article& article) const;
|
||||
|
||||
protected:
|
||||
|
|
|
@ -60,7 +60,7 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getMainPage(JNIEnv* env, jobject obj)
|
|||
jstring url;
|
||||
|
||||
try {
|
||||
std::string cUrl = READER->getMainPageUrl();
|
||||
std::string cUrl = READER->getMainPage().getPath();
|
||||
url = c2jni(cUrl, env);
|
||||
} catch (...) {
|
||||
std::cerr << "Unable to get ZIM main page" << std::endl;
|
||||
|
@ -196,8 +196,8 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMimeType(
|
|||
|
||||
std::string cUrl = jni2c(url, env);
|
||||
try {
|
||||
std::string cMimeType;
|
||||
READER->getMimeTypeByUrl(cUrl, cMimeType);
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
auto cMimeType = entry.getMimetype();
|
||||
mimeType = c2jni(cMimeType, env);
|
||||
} catch (...) {
|
||||
std::cerr << "Unable to get mime-type for url " << cUrl << std::endl;
|
||||
|
@ -216,20 +216,20 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContent(
|
|||
|
||||
/* Retrieve the content */
|
||||
std::string cUrl = jni2c(url, env);
|
||||
std::string cData;
|
||||
std::string cTitle;
|
||||
std::string cMimeType;
|
||||
unsigned int cSize = 0;
|
||||
|
||||
try {
|
||||
if (READER->getContentByUrl(cUrl, cData, cTitle, cSize, cMimeType)) {
|
||||
data = env->NewByteArray(cSize);
|
||||
env->SetByteArrayRegion(
|
||||
data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
|
||||
setStringObjValue(cMimeType, mimeTypeObj, env);
|
||||
setStringObjValue(cTitle, titleObj, env);
|
||||
setIntObjValue(cSize, sizeObj, env);
|
||||
}
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
entry = entry.getFinalEntry();
|
||||
cSize = entry.getSize();
|
||||
setIntObjValue(cSize, sizeObj, env);
|
||||
|
||||
data = env->NewByteArray(cSize);
|
||||
env->SetByteArrayRegion(
|
||||
data, 0, cSize, reinterpret_cast<const jbyte*>(entry.getBlob().data()));
|
||||
|
||||
setStringObjValue(entry.getMimetype(), mimeTypeObj, env);
|
||||
setStringObjValue(entry.getTitle(), titleObj, env);
|
||||
} catch (...) {
|
||||
std::cerr << "Unable to get content for url " << cUrl << std::endl;
|
||||
}
|
||||
|
@ -249,22 +249,13 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContentPa
|
|||
unsigned int cOffset = jni2c(offset);
|
||||
unsigned int cLen = jni2c(len);
|
||||
try {
|
||||
zim::Article article;
|
||||
READER->getArticleObjectByDecodedUrl(kiwix::urlDecode(cUrl), article);
|
||||
if (! article.good()) {
|
||||
return data;
|
||||
}
|
||||
int loopCounter = 0;
|
||||
while (article.isRedirect() && ++loopCounter < 42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
if (loopCounter == 42) {
|
||||
return data;
|
||||
}
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
entry = entry.getFinalEntry();
|
||||
|
||||
if (cLen == 0) {
|
||||
setIntObjValue(article.getArticleSize(), sizeObj, env);
|
||||
} else if (cOffset+cLen > article.getArticleSize()) {
|
||||
auto blob = article.getData(cOffset, cLen);
|
||||
setIntObjValue(entry.getSize(), sizeObj, env);
|
||||
} else if (cOffset+cLen < entry.getSize()) {
|
||||
auto blob = entry.getBlob(cOffset, cLen);
|
||||
data = env->NewByteArray(cLen);
|
||||
env->SetByteArrayRegion(
|
||||
data, 0, cLen, reinterpret_cast<const jbyte*>(blob.data()));
|
||||
|
@ -288,20 +279,9 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
|
|||
|
||||
std::string cUrl = jni2c(url, env);
|
||||
try {
|
||||
zim::Article article;
|
||||
READER->getArticleObjectByDecodedUrl(kiwix::urlDecode(cUrl), article);
|
||||
if (! article.good()) {
|
||||
return pair;
|
||||
}
|
||||
int loopCounter = 0;
|
||||
while (article.isRedirect() && ++loopCounter < 42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
if (loopCounter == 42) {
|
||||
return pair;
|
||||
}
|
||||
|
||||
auto part_info = article.getDirectAccessInformation();
|
||||
auto entry = READER->getEntryFromEncodedPath(cUrl);
|
||||
entry = entry.getFinalEntry();
|
||||
auto part_info = entry.getDirectAccessInfo();
|
||||
setPairObjValue(part_info.first, part_info.second, pair, env);
|
||||
} catch (...) {
|
||||
std::cerr << "Unable to locate direct access information for url " << cUrl
|
||||
|
@ -359,20 +339,18 @@ Java_org_kiwix_kiwixlib_JNIKiwixReader_getPageUrlFromTitle(JNIEnv* env,
|
|||
jstring title,
|
||||
jobject urlObj)
|
||||
{
|
||||
jboolean retVal = JNI_FALSE;
|
||||
std::string cTitle = jni2c(title, env);
|
||||
std::string cUrl;
|
||||
|
||||
try {
|
||||
if (READER->getPageUrlFromTitle(cTitle, cUrl)) {
|
||||
setStringObjValue(cUrl, urlObj, env);
|
||||
retVal = JNI_TRUE;
|
||||
}
|
||||
auto entry = READER->getEntryFromTitle(cTitle);
|
||||
entry = entry.getFinalEntry();
|
||||
setStringObjValue(entry.getPath(), urlObj, env);
|
||||
return JNI_TRUE;
|
||||
} catch (...) {
|
||||
std::cerr << "Unable to get URL for title " << cTitle << std::endl;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
return JNI_FALSE;
|
||||
}
|
||||
|
||||
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getTitle(
|
||||
|
@ -410,7 +388,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getRandomPage(
|
|||
std::string cUrl;
|
||||
|
||||
try {
|
||||
std::string cUrl = READER->getRandomPageUrl();
|
||||
std::string cUrl = READER->getRandomPage().getPath();
|
||||
setStringObjValue(cUrl, urlObj, env);
|
||||
retVal = JNI_TRUE;
|
||||
} catch (...) {
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "reader.h"
|
||||
#include <time.h>
|
||||
|
||||
#include <zim/search.h>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
Entry::Entry(zim::Article article)
|
||||
: article(article)
|
||||
{
|
||||
}
|
||||
|
||||
#define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); }
|
||||
|
||||
std::string Entry::getPath() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
std::string Entry::getTitle() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getTitle();
|
||||
}
|
||||
|
||||
std::string Entry::getContent() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getData();
|
||||
}
|
||||
|
||||
zim::Blob Entry::getBlob(offset_type offset) const
|
||||
{
|
||||
RETURN_IF_INVALID(zim::Blob());
|
||||
return article.getData(offset);
|
||||
}
|
||||
|
||||
zim::Blob Entry::getBlob(offset_type offset, size_type size) const
|
||||
{
|
||||
RETURN_IF_INVALID(zim::Blob());
|
||||
return article.getData(offset, size);
|
||||
}
|
||||
|
||||
std::pair<std::string, offset_type> Entry::getDirectAccessInfo() const
|
||||
{
|
||||
RETURN_IF_INVALID(std::make_pair("", 0));
|
||||
return article.getDirectAccessInformation();
|
||||
}
|
||||
|
||||
size_type Entry::getSize() const
|
||||
{
|
||||
RETURN_IF_INVALID(0);
|
||||
return article.getArticleSize();
|
||||
}
|
||||
|
||||
std::string Entry::getMimetype() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
try {
|
||||
return article.getMimeType();
|
||||
} catch (exception& e) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
}
|
||||
|
||||
bool Entry::isRedirect() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isRedirect();
|
||||
}
|
||||
|
||||
bool Entry::isLinkTarget() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isLinktarget();
|
||||
}
|
||||
|
||||
bool Entry::isDeleted() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isDeleted();
|
||||
}
|
||||
|
||||
Entry Entry::getRedirectEntry() const
|
||||
{
|
||||
RETURN_IF_INVALID(Entry());
|
||||
if ( !article.isRedirect() ) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto targeted_article = article.getRedirectArticle();
|
||||
if ( !targeted_article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
return targeted_article;
|
||||
}
|
||||
|
||||
Entry Entry::getFinalEntry() const
|
||||
{
|
||||
RETURN_IF_INVALID(Entry());
|
||||
if (final_article.good()) {
|
||||
return final_article;
|
||||
}
|
||||
|
||||
int loopCounter = 42;
|
||||
final_article = article;
|
||||
while (final_article.isRedirect() && loopCounter--) {
|
||||
final_article = final_article.getRedirectArticle();
|
||||
if ( !final_article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
}
|
||||
|
||||
return final_article;
|
||||
}
|
||||
|
||||
}
|
|
@ -4,6 +4,7 @@ kiwix_sources = [
|
|||
'opds_dumper.cpp',
|
||||
'downloader.cpp',
|
||||
'reader.cpp',
|
||||
'entry.cpp',
|
||||
'searcher.cpp',
|
||||
'common/base64.cpp',
|
||||
'common/pathTools.cpp',
|
||||
|
|
358
src/reader.cpp
358
src/reader.cpp
|
@ -190,79 +190,88 @@ string Reader::getId() const
|
|||
/* Return a page url from a title */
|
||||
bool Reader::getPageUrlFromTitle(const string& title, string& url) const
|
||||
{
|
||||
/* Extract the content from the zim file */
|
||||
zim::Article article = this->zimFileHandler->getArticleByTitle('A', title);
|
||||
|
||||
if (!article.good()) {
|
||||
try {
|
||||
auto entry = getEntryFromTitle(title);
|
||||
entry = entry.getFinalEntry();
|
||||
url = entry.getPath();
|
||||
return true;
|
||||
} catch (NoEntry& e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++ < 42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
url = article.getLongUrl();
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return an URL from a title */
|
||||
string Reader::getRandomPageUrl() const
|
||||
{
|
||||
return getRandomPage().getPath();
|
||||
}
|
||||
|
||||
Entry Reader::getRandomPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
zim::Article article;
|
||||
zim::size_type idx;
|
||||
std::string mainPageUrl = this->getMainPageUrl();
|
||||
std::string mainPagePath = this->getMainPage().getPath();
|
||||
int watchdog = 42;
|
||||
|
||||
do {
|
||||
idx = this->firstArticleOffset
|
||||
auto idx = this->firstArticleOffset
|
||||
+ (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
|
||||
* this->nsACount);
|
||||
article = zimFileHandler->getArticle(idx);
|
||||
} while (article.getLongUrl() == mainPageUrl);
|
||||
if (!watchdog--) {
|
||||
throw NoEntry();
|
||||
}
|
||||
} while (!article.good() && article.getLongUrl() == mainPagePath);
|
||||
|
||||
return article.getLongUrl();
|
||||
return article;
|
||||
}
|
||||
|
||||
/* Return the welcome page URL */
|
||||
string Reader::getMainPageUrl() const
|
||||
{
|
||||
string url = "";
|
||||
return getMainPage().getPath();
|
||||
}
|
||||
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage()) {
|
||||
zim::Article article = zimFileHandler->getArticle(
|
||||
this->zimFileHandler->getFileheader().getMainPage());
|
||||
url = article.getLongUrl();
|
||||
|
||||
if (url.empty()) {
|
||||
url = getFirstPageUrl();
|
||||
}
|
||||
} else {
|
||||
url = getFirstPageUrl();
|
||||
Entry Reader::getMainPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return url;
|
||||
string url = "";
|
||||
|
||||
zim::Article article;
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage())
|
||||
{
|
||||
article = zimFileHandler->getArticle(
|
||||
this->zimFileHandler->getFileheader().getMainPage());
|
||||
}
|
||||
|
||||
if (!article.good())
|
||||
{
|
||||
return getFirstPage();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
bool Reader::getFavicon(string& content, string& mimeType) const
|
||||
{
|
||||
unsigned int contentLength = 0;
|
||||
string title;
|
||||
static const char* const paths[] = {"-/favicon.png", "I/favicon.png", "I/favicon", "-/favicon"};
|
||||
|
||||
this->getContentByUrl("/-/favicon.png", content, title, contentLength, mimeType);
|
||||
|
||||
if (content.empty()) {
|
||||
this->getContentByUrl("/I/favicon.png", content, title, contentLength, mimeType);
|
||||
|
||||
if (content.empty()) {
|
||||
this->getContentByUrl("/I/favicon", content, title, contentLength, mimeType);
|
||||
|
||||
if (content.empty()) {
|
||||
this->getContentByUrl("/-/favicon", content, title, contentLength, mimeType);
|
||||
}
|
||||
}
|
||||
for (auto &path: paths) {
|
||||
try {
|
||||
auto entry = getEntryFromPath(path);
|
||||
content = entry.getContent();
|
||||
mimeType = entry.getMimetype();
|
||||
return true;
|
||||
} catch(NoEntry& e) {};
|
||||
}
|
||||
|
||||
return content.empty() ? false : true;
|
||||
return false;
|
||||
}
|
||||
|
||||
string Reader::getZimFilePath() const
|
||||
|
@ -272,11 +281,13 @@ string Reader::getZimFilePath() const
|
|||
/* Return a metatag value */
|
||||
bool Reader::getMetatag(const string& name, string& value) const
|
||||
{
|
||||
unsigned int contentLength = 0;
|
||||
string contentType = "";
|
||||
string title;
|
||||
|
||||
return this->getContentByUrl("/M/" + name, value, title, contentLength, contentType);
|
||||
try {
|
||||
auto entry = getEntryFromPath("M/"+name);
|
||||
value = entry.getContent();
|
||||
return true;
|
||||
} catch(NoEntry& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
string Reader::getTitle() const
|
||||
|
@ -375,12 +386,26 @@ string Reader::getOrigId() const
|
|||
/* Return the first page URL */
|
||||
string Reader::getFirstPageUrl() const
|
||||
{
|
||||
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
zim::Article article = zimFileHandler->getArticle(firstPageOffset);
|
||||
return article.getLongUrl();
|
||||
return getFirstPage().getPath();
|
||||
}
|
||||
|
||||
bool Reader::parseUrl(const string& url, char* ns, string& title) const
|
||||
Entry Reader::getFirstPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
auto article = zimFileHandler->getArticle(firstPageOffset);
|
||||
|
||||
if (! article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
bool _parseUrl(const string& url, char* ns, string& title)
|
||||
{
|
||||
/* Offset to visit the url */
|
||||
unsigned int urlLength = url.size();
|
||||
|
@ -414,6 +439,52 @@ bool Reader::parseUrl(const string& url, char* ns, string& title) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Reader::parseUrl(const string& url, char* ns, string& title) const
|
||||
{
|
||||
return _parseUrl(url, ns, title);
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromPath(const std::string& path) const
|
||||
{
|
||||
char ns = 0;
|
||||
std::string short_url;
|
||||
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
_parseUrl(path, &ns, short_url);
|
||||
|
||||
if (short_url.empty() && ns == 0) {
|
||||
return getMainPage();
|
||||
}
|
||||
|
||||
auto article = zimFileHandler->getArticle(ns, short_url);
|
||||
if (!article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
|
||||
{
|
||||
return getEntryFromPath(urlDecode(path));
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromTitle(const std::string& title) const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto article = this->zimFileHandler->getArticleByTitle('A', title);
|
||||
if (!article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
/* Return article by url */
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string& url,
|
||||
zim::Article& article) const
|
||||
|
@ -425,11 +496,11 @@ bool Reader::getArticleObjectByDecodedUrl(const string& url,
|
|||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string urlStr;
|
||||
this->parseUrl(url, &ns, urlStr);
|
||||
_parseUrl(url, &ns, urlStr);
|
||||
|
||||
/* Main page */
|
||||
if (urlStr.empty() && ns == 0) {
|
||||
this->parseUrl(this->getMainPageUrl(), &ns, urlStr);
|
||||
_parseUrl(this->getMainPage().getPath(), &ns, urlStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
|
@ -440,26 +511,53 @@ bool Reader::getArticleObjectByDecodedUrl(const string& url,
|
|||
/* Return the mimeType without the content */
|
||||
bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
|
||||
{
|
||||
if (this->zimFileHandler == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
try {
|
||||
mimeType = article.getMimeType();
|
||||
} catch (exception& e) {
|
||||
cerr << "Unable to get the mimetype for " << url << ":" << e.what()
|
||||
<< endl;
|
||||
mimeType = "application/octet-stream";
|
||||
}
|
||||
try {
|
||||
auto entry = getEntryFromPath(url);
|
||||
mimeType = entry.getMimetype();
|
||||
return true;
|
||||
} else {
|
||||
} catch (NoEntry& e) {
|
||||
mimeType = "";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool get_content_by_decoded_url(const Reader& reader,
|
||||
const string& url,
|
||||
string& content,
|
||||
string& title,
|
||||
unsigned int& contentLength,
|
||||
string& contentType,
|
||||
string& baseUrl)
|
||||
{
|
||||
content = "";
|
||||
contentType = "";
|
||||
contentLength = 0;
|
||||
|
||||
try {
|
||||
auto entry = reader.getEntryFromPath(url);
|
||||
entry = entry.getFinalEntry();
|
||||
baseUrl = entry.getPath();
|
||||
contentType = entry.getMimetype();
|
||||
content = entry.getContent();
|
||||
contentLength = entry.getSize();
|
||||
title = entry.getTitle();
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos
|
||||
&& content.find("<body") == std::string::npos
|
||||
&& content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + title +
|
||||
"</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
|
||||
"charset=utf-8\" /></head><body>" +
|
||||
content + "</body></html>";
|
||||
}
|
||||
return true;
|
||||
} catch (NoEntry& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Get a content from a zim file */
|
||||
bool Reader::getContentByUrl(const string& url,
|
||||
string& content,
|
||||
|
@ -467,7 +565,14 @@ bool Reader::getContentByUrl(const string& url,
|
|||
unsigned int& contentLength,
|
||||
string& contentType) const
|
||||
{
|
||||
return this->getContentByEncodedUrl(url, content, title, contentLength, contentType);
|
||||
std::string stubRedirectUrl;
|
||||
return get_content_by_decoded_url(*this,
|
||||
kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string& url,
|
||||
|
@ -477,8 +582,13 @@ bool Reader::getContentByEncodedUrl(const string& url,
|
|||
string& contentType,
|
||||
string& baseUrl) const
|
||||
{
|
||||
return this->getContentByDecodedUrl(
|
||||
kiwix::urlDecode(url), content, title, contentLength, contentType, baseUrl);
|
||||
return get_content_by_decoded_url(*this,
|
||||
kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
baseUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string& url,
|
||||
|
@ -488,12 +598,13 @@ bool Reader::getContentByEncodedUrl(const string& url,
|
|||
string& contentType) const
|
||||
{
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByEncodedUrl(kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
return get_content_by_decoded_url(*this,
|
||||
kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string& url,
|
||||
|
@ -503,12 +614,13 @@ bool Reader::getContentByDecodedUrl(const string& url,
|
|||
string& contentType) const
|
||||
{
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url),
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
return get_content_by_decoded_url(*this,
|
||||
url,
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string& url,
|
||||
|
@ -518,64 +630,31 @@ bool Reader::getContentByDecodedUrl(const string& url,
|
|||
string& contentType,
|
||||
string& baseUrl) const
|
||||
{
|
||||
content = "";
|
||||
contentType = "";
|
||||
contentLength = 0;
|
||||
|
||||
zim::Article article;
|
||||
if (!this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++ < 42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
if (loopCounter < 42) {
|
||||
/* Compute base url (might be different from the url if redirects */
|
||||
baseUrl
|
||||
= "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
|
||||
|
||||
/* Get the content mime-type */
|
||||
try {
|
||||
contentType
|
||||
= string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception& e) {
|
||||
cerr << "Unable to get the mimetype for " << baseUrl << ":" << e.what()
|
||||
<< endl;
|
||||
contentType = "application/octet-stream";
|
||||
}
|
||||
|
||||
/* Get the data */
|
||||
content = string(article.getData().data(), article.getArticleSize());
|
||||
title = article.getTitle();
|
||||
}
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos
|
||||
&& content.find("<body") == std::string::npos
|
||||
&& content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + article.getTitle() +
|
||||
"</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
|
||||
"charset=utf-8\" /></head><body>" +
|
||||
content + "</body></html>";
|
||||
}
|
||||
|
||||
/* Get the data length */
|
||||
contentLength = article.getArticleSize();
|
||||
|
||||
return true;
|
||||
return get_content_by_decoded_url(*this,
|
||||
url,
|
||||
content,
|
||||
title,
|
||||
contentLength,
|
||||
contentType,
|
||||
baseUrl);
|
||||
}
|
||||
|
||||
/* Check if an article exists */
|
||||
bool Reader::urlExists(const string& url) const
|
||||
{
|
||||
return pathExists(url);
|
||||
}
|
||||
|
||||
bool Reader::pathExists(const string& path) const
|
||||
{
|
||||
if (!zimFileHandler)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
titleStr = "/" + titleStr;
|
||||
_parseUrl(path, &ns, titleStr);
|
||||
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
|
||||
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
|
||||
}
|
||||
|
@ -583,8 +662,13 @@ bool Reader::urlExists(const string& url) const
|
|||
/* Does the ZIM file has a fulltext index */
|
||||
bool Reader::hasFulltextIndex() const
|
||||
{
|
||||
return ( this->urlExists("/Z/fulltextIndex/xapian")
|
||||
&& !zimFileHandler->is_multiPart() );
|
||||
if (!zimFileHandler || zimFileHandler->is_multiPart() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return ( pathExists("Z//fulltextIndex/xapian")
|
||||
|| pathExists("X/fulltext/xapian"));
|
||||
}
|
||||
|
||||
/* Search titles by prefix */
|
||||
|
|
|
@ -193,13 +193,8 @@ std::string XapianResult::get_content()
|
|||
if (!searcher->reader) {
|
||||
return "";
|
||||
}
|
||||
std::string content;
|
||||
std::string title;
|
||||
unsigned int contentLength;
|
||||
std::string contentType;
|
||||
searcher->reader->getContentByUrl(
|
||||
get_url(), content, title, contentLength, contentType);
|
||||
return content;
|
||||
auto entry = searcher->reader->getEntryFromEncodedPath(get_url());
|
||||
return entry.getContent();
|
||||
}
|
||||
|
||||
int XapianResult::get_size()
|
||||
|
|
Loading…
Reference in New Issue