mirror of https://github.com/kiwix/libkiwix.git
Adapt kiwix-lib to the new libzim api.
This commit is contained in:
parent
d87079ec13
commit
1a5a2e7a8e
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2018 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -21,7 +21,8 @@
|
|||
#define KIWIX_ENTRY_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/entry.h>
|
||||
#include <zim/item.h>
|
||||
#include <exception>
|
||||
#include <string>
|
||||
|
||||
|
@ -41,19 +42,12 @@ class NoEntry : public std::exception {};
|
|||
class Entry
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Default constructor.
|
||||
*
|
||||
* Construct an invalid entry.
|
||||
*/
|
||||
Entry() = default;
|
||||
|
||||
/**
|
||||
* Construct an entry making reference to an zim article.
|
||||
*
|
||||
* @param article a zim::Article object
|
||||
*/
|
||||
Entry(zim::Article article);
|
||||
Entry(zim::Entry entry);
|
||||
virtual ~Entry() = default;
|
||||
|
||||
/**
|
||||
|
@ -63,14 +57,14 @@ class Entry
|
|||
*
|
||||
* @return the path of the entry.
|
||||
*/
|
||||
std::string getPath() const;
|
||||
std::string getPath() const { return entry.getPath(); }
|
||||
|
||||
/**
|
||||
* Get the title of the entry.
|
||||
*
|
||||
* @return the title of the entry.
|
||||
*/
|
||||
std::string getTitle() const;
|
||||
std::string getTitle() const { return entry.getTitle(); }
|
||||
|
||||
/**
|
||||
* Get the content of the entry.
|
||||
|
@ -80,7 +74,7 @@ class Entry
|
|||
*
|
||||
* @return the content of the entry.
|
||||
*/
|
||||
std::string getContent() const;
|
||||
std::string getContent() const { return entry.getItem().getData(); }
|
||||
|
||||
/**
|
||||
* Get the blob of the entry.
|
||||
|
@ -90,7 +84,7 @@ class Entry
|
|||
* @param offset The starting offset of the blob.
|
||||
* @return the blob of the entry.
|
||||
*/
|
||||
zim::Blob getBlob(offset_type offset = 0) const;
|
||||
zim::Blob getBlob(offset_type offset = 0) const { return entry.getItem().getData(offset); }
|
||||
|
||||
/**
|
||||
* Get the blob of the entry.
|
||||
|
@ -101,7 +95,7 @@ class Entry
|
|||
* @param size The size of the blob.
|
||||
* @return the blob of the entry.
|
||||
*/
|
||||
zim::Blob getBlob(offset_type offset, size_type size) const;
|
||||
zim::Blob getBlob(offset_type offset, size_type size) const { return entry.getItem().getData(offset, size); }
|
||||
|
||||
/**
|
||||
* Get the info for direct access to the content of the entry.
|
||||
|
@ -117,7 +111,7 @@ class Entry
|
|||
* The offset is the offset to read in the file.
|
||||
* Return <"",0> if is not possible to read directly.
|
||||
*/
|
||||
std::pair<std::string, offset_type> getDirectAccessInfo() const;
|
||||
std::pair<std::string, offset_type> getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
|
||||
|
||||
/**
|
||||
* Get the size of the entry.
|
||||
|
@ -174,17 +168,14 @@ class Entry
|
|||
Entry getFinalEntry() const;
|
||||
|
||||
/**
|
||||
* Convert the entry to a boolean value.
|
||||
* Get the zim entry wrapped by this (kiwix) entry
|
||||
*
|
||||
* @return True if the entry is valid.
|
||||
* @return the zim entry
|
||||
*/
|
||||
explicit operator bool() const { return good(); }
|
||||
const zim::Entry& getZimEntry() const { return entry; }
|
||||
|
||||
private:
|
||||
zim::Article article;
|
||||
mutable zim::Article final_article;
|
||||
|
||||
bool good() const { return article.good(); }
|
||||
zim::Entry entry;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -21,10 +21,8 @@
|
|||
#define KIWIX_READER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include <zim/zim.h>
|
||||
#include <zim/archive.h>
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
|
@ -58,7 +56,7 @@ class Reader
|
|||
* (.zim extesion).
|
||||
*/
|
||||
Reader(const string zimFilePath);
|
||||
~Reader();
|
||||
~Reader() = default;
|
||||
|
||||
/**
|
||||
* Get the number of "displayable" entries in the zim file.
|
||||
|
@ -110,13 +108,6 @@ class Reader
|
|||
*/
|
||||
Entry getRandomPage() const;
|
||||
|
||||
/**
|
||||
* Get the entry of the first page.
|
||||
*
|
||||
* @return The first entry in the 'A' namespace.
|
||||
*/
|
||||
Entry getFirstPage() const;
|
||||
|
||||
/**
|
||||
* Get the entry of the main page.
|
||||
*
|
||||
|
@ -455,14 +446,10 @@ class Reader
|
|||
*
|
||||
* @return The libzim file handler.
|
||||
*/
|
||||
zim::File* getZimFileHandler() const;
|
||||
zim::Archive* getZimArchive() const;
|
||||
|
||||
protected:
|
||||
zim::File* zimFileHandler;
|
||||
zim::size_type firstArticleOffset;
|
||||
zim::size_type lastArticleOffset;
|
||||
zim::size_type nsACount;
|
||||
zim::size_type nsICount;
|
||||
std::unique_ptr<zim::Archive> zimArchive;
|
||||
std::string zimFilePath;
|
||||
|
||||
SuggestionsList_t suggestions;
|
||||
|
|
|
@ -43,7 +43,7 @@ namespace kiwix
|
|||
const std::string& tagName);
|
||||
bool convertStrToBool(const std::string& value);
|
||||
|
||||
using MimeCounterType = std::map<const std::string, zim::article_index_type>;
|
||||
using MimeCounterType = std::map<const std::string, zim::entry_index_type>;
|
||||
MimeCounterType parseMimetypeCounter(const std::string& counterData);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
* Copyright 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
@ -25,116 +25,51 @@
|
|||
namespace kiwix
|
||||
{
|
||||
|
||||
Entry::Entry(zim::Article article)
|
||||
: article(article)
|
||||
Entry::Entry(zim::Entry entry)
|
||||
: entry(entry)
|
||||
{
|
||||
}
|
||||
|
||||
#define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); }
|
||||
|
||||
std::string Entry::getPath() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
std::string Entry::getTitle() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getTitle();
|
||||
}
|
||||
|
||||
std::string Entry::getContent() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
return article.getData();
|
||||
}
|
||||
|
||||
zim::Blob Entry::getBlob(offset_type offset) const
|
||||
{
|
||||
RETURN_IF_INVALID(zim::Blob());
|
||||
return article.getData(offset);
|
||||
}
|
||||
|
||||
zim::Blob Entry::getBlob(offset_type offset, size_type size) const
|
||||
{
|
||||
RETURN_IF_INVALID(zim::Blob());
|
||||
return article.getData(offset, size);
|
||||
}
|
||||
|
||||
std::pair<std::string, offset_type> Entry::getDirectAccessInfo() const
|
||||
{
|
||||
RETURN_IF_INVALID(std::make_pair("", 0));
|
||||
return article.getDirectAccessInformation();
|
||||
}
|
||||
|
||||
size_type Entry::getSize() const
|
||||
{
|
||||
RETURN_IF_INVALID(0);
|
||||
return article.getArticleSize();
|
||||
if (entry.isRedirect()) {
|
||||
return 0;
|
||||
} else {
|
||||
return entry.getItem().getSize();
|
||||
}
|
||||
}
|
||||
|
||||
std::string Entry::getMimetype() const
|
||||
{
|
||||
RETURN_IF_INVALID("");
|
||||
try {
|
||||
return article.getMimeType();
|
||||
} catch (exception& e) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
return entry.getItem(true).getMimetype();
|
||||
}
|
||||
|
||||
bool Entry::isRedirect() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isRedirect();
|
||||
}
|
||||
|
||||
bool Entry::isLinkTarget() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isLinktarget();
|
||||
}
|
||||
|
||||
bool Entry::isDeleted() const
|
||||
{
|
||||
RETURN_IF_INVALID(false);
|
||||
return article.isDeleted();
|
||||
return entry.isRedirect();
|
||||
}
|
||||
|
||||
Entry Entry::getRedirectEntry() const
|
||||
{
|
||||
RETURN_IF_INVALID(Entry());
|
||||
if ( !article.isRedirect() ) {
|
||||
if ( !entry.isRedirect() ) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto targeted_article = article.getRedirectArticle();
|
||||
if ( !targeted_article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
return targeted_article;
|
||||
return entry.getRedirectEntry();
|
||||
}
|
||||
|
||||
Entry Entry::getFinalEntry() const
|
||||
{
|
||||
RETURN_IF_INVALID(Entry());
|
||||
if (final_article.good()) {
|
||||
return final_article;
|
||||
}
|
||||
int loopCounter = 42;
|
||||
final_article = article;
|
||||
while (final_article.isRedirect() && loopCounter--) {
|
||||
final_article = final_article.getRedirectArticle();
|
||||
if ( !final_article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
auto final_entry = entry;
|
||||
while (final_entry.isRedirect() && loopCounter--) {
|
||||
final_entry = final_entry.getRedirectEntry();
|
||||
}
|
||||
// Prevent infinite loops.
|
||||
if (final_article.isRedirect()) {
|
||||
if (final_entry.isRedirect()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
return final_article;
|
||||
return final_entry;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
292
src/reader.cpp
292
src/reader.cpp
|
@ -21,6 +21,8 @@
|
|||
#include <time.h>
|
||||
|
||||
#include <zim/search.h>
|
||||
#include <zim/item.h>
|
||||
#include <zim/error.h>
|
||||
|
||||
#include "tools/otherTools.h"
|
||||
|
||||
|
@ -65,7 +67,9 @@ std::string hexUUID(std::string in)
|
|||
namespace kiwix
|
||||
{
|
||||
/* Constructor */
|
||||
Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
|
||||
Reader::Reader(const string zimFilePath)
|
||||
: zimArchive(nullptr),
|
||||
zimFilePath(zimFilePath)
|
||||
{
|
||||
string tmpZimFilePath = zimFilePath;
|
||||
|
||||
|
@ -76,43 +80,21 @@ Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
|
|||
tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
|
||||
}
|
||||
|
||||
this->zimFileHandler = new zim::File(tmpZimFilePath);
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
this->firstArticleOffset
|
||||
= this->zimFileHandler->getNamespaceBeginOffset('A');
|
||||
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
|
||||
this->nsACount = this->zimFileHandler->getNamespaceCount('A');
|
||||
this->nsICount = this->zimFileHandler->getNamespaceCount('I');
|
||||
this->zimFilePath = zimFilePath;
|
||||
}
|
||||
zimArchive.reset(new zim::Archive(tmpZimFilePath));
|
||||
|
||||
/* initialize random seed: */
|
||||
srand(time(NULL));
|
||||
srand(time(nullptr));
|
||||
}
|
||||
|
||||
/* Destructor */
|
||||
Reader::~Reader()
|
||||
zim::Archive* Reader::getZimArchive() const
|
||||
{
|
||||
if (this->zimFileHandler != NULL) {
|
||||
delete this->zimFileHandler;
|
||||
}
|
||||
}
|
||||
|
||||
zim::File* Reader::getZimFileHandler() const
|
||||
{
|
||||
return this->zimFileHandler;
|
||||
return zimArchive.get();
|
||||
}
|
||||
|
||||
MimeCounterType Reader::parseCounterMetadata() const
|
||||
{
|
||||
zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
|
||||
|
||||
if (article.good()) {
|
||||
return parseMimetypeCounter(article.getData());
|
||||
}
|
||||
|
||||
return MimeCounterType();
|
||||
auto counterContent = zimArchive->getMetadata("Counter");
|
||||
return parseMimetypeCounter(counterContent);
|
||||
}
|
||||
|
||||
/* Get the count of articles which can be indexed/displayed */
|
||||
|
@ -122,13 +104,9 @@ unsigned int Reader::getArticleCount() const
|
|||
= this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
counter = this->nsACount;
|
||||
} else {
|
||||
for(auto &pair:counterMap) {
|
||||
if (startsWith(pair.first, "text/html")) {
|
||||
counter += pair.second;
|
||||
}
|
||||
for(auto &pair:counterMap) {
|
||||
if (startsWith(pair.first, "text/html")) {
|
||||
counter += pair.second;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -142,23 +120,19 @@ unsigned int Reader::getMediaCount() const
|
|||
= this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
counter = this->nsICount;
|
||||
} else {
|
||||
auto it = counterMap.find("image/jpeg");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
auto it = counterMap.find("image/jpeg");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
|
||||
it = counterMap.find("image/gif");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
it = counterMap.find("image/gif");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
|
||||
it = counterMap.find("image/png");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
it = counterMap.find("image/png");
|
||||
if (it != counterMap.end()) {
|
||||
counter += it->second;
|
||||
}
|
||||
return counter;
|
||||
}
|
||||
|
@ -166,65 +140,41 @@ unsigned int Reader::getMediaCount() const
|
|||
/* Get the total of all items of a ZIM file, redirects included */
|
||||
unsigned int Reader::getGlobalCount() const
|
||||
{
|
||||
return this->zimFileHandler->getCountArticles();
|
||||
return zimArchive->getEntryCount();
|
||||
}
|
||||
|
||||
/* Return the UID of the ZIM file */
|
||||
string Reader::getId() const
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << this->zimFileHandler->getFileheader().getUuid();
|
||||
s << zimArchive->getUuid();
|
||||
return s.str();
|
||||
}
|
||||
|
||||
Entry Reader::getRandomPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
zim::Article article;
|
||||
std::string mainPagePath = this->getMainPage().getPath();
|
||||
auto mainPagePath = zimArchive->getMainEntry().getPath();
|
||||
int watchdog = 42;
|
||||
|
||||
do {
|
||||
auto idx = this->firstArticleOffset
|
||||
+ (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
|
||||
* this->nsACount);
|
||||
article = zimFileHandler->getArticle(idx);
|
||||
if (!watchdog--) {
|
||||
throw NoEntry();
|
||||
while (--watchdog){
|
||||
auto idx = (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
|
||||
* zimArchive->getEntryCount());
|
||||
auto entry = zimArchive->getEntryByPath(idx);
|
||||
|
||||
if (entry.getPath()==mainPagePath) {
|
||||
continue;
|
||||
}
|
||||
} while (!article.good() && article.getLongUrl() == mainPagePath);
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
/* Return the welcome page URL */
|
||||
string Reader::getMainPageUrl() const
|
||||
{
|
||||
return getMainPage().getPath();
|
||||
auto item = entry.getItem(true);
|
||||
if (item.getMimetype() == "text/html") {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
Entry Reader::getMainPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
zim::Article article;
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage())
|
||||
{
|
||||
article = zimFileHandler->getArticle(
|
||||
this->zimFileHandler->getFileheader().getMainPage());
|
||||
}
|
||||
|
||||
if (!article.good())
|
||||
{
|
||||
return getFirstPage();
|
||||
}
|
||||
|
||||
return article;
|
||||
return zimArchive->getMainEntry();
|
||||
}
|
||||
|
||||
bool Reader::getFavicon(string& content, string& mimeType) const
|
||||
|
@ -233,12 +183,12 @@ bool Reader::getFavicon(string& content, string& mimeType) const
|
|||
|
||||
for (auto &path: paths) {
|
||||
try {
|
||||
auto entry = getEntryFromPath(path);
|
||||
entry = entry.getFinalEntry();
|
||||
content = entry.getContent();
|
||||
mimeType = entry.getMimetype();
|
||||
auto entry = zimArchive->getEntryByPath(path);
|
||||
auto item = entry.getItem(true);
|
||||
content = item.getData();
|
||||
mimeType = item.getMimetype();
|
||||
return true;
|
||||
} catch(NoEntry& e) {};
|
||||
} catch(zim::EntryNotFound& e) {};
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -246,16 +196,15 @@ bool Reader::getFavicon(string& content, string& mimeType) const
|
|||
|
||||
string Reader::getZimFilePath() const
|
||||
{
|
||||
return this->zimFilePath;
|
||||
return zimFilePath;
|
||||
}
|
||||
/* Return a metatag value */
|
||||
bool Reader::getMetadata(const string& name, string& value) const
|
||||
{
|
||||
try {
|
||||
auto entry = getEntryFromPath("M/"+name);
|
||||
value = entry.getContent();
|
||||
value = zimArchive->getMetadata(name);
|
||||
return true;
|
||||
} catch(NoEntry& e) {
|
||||
} catch(zim::EntryNotFound& e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -269,10 +218,9 @@ string Reader::getName() const
|
|||
|
||||
string Reader::getTitle() const
|
||||
{
|
||||
string value;
|
||||
this->getMetadata("Title", value);
|
||||
string value = zimArchive->getMetadata("Title");
|
||||
if (value.empty()) {
|
||||
value = getLastPathElement(zimFileHandler->getFilename());
|
||||
value = getLastPathElement(zimFilePath);
|
||||
std::replace(value.begin(), value.end(), '_', ' ');
|
||||
size_t pos = value.find(".zim");
|
||||
value = value.substr(0, pos);
|
||||
|
@ -393,42 +341,21 @@ string Reader::getOrigId() const
|
|||
return origID;
|
||||
}
|
||||
|
||||
Entry Reader::getFirstPage() const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
auto article = zimFileHandler->getArticle(firstPageOffset);
|
||||
|
||||
if (! article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromPath(const std::string& path) const
|
||||
{
|
||||
char ns = 0;
|
||||
std::string short_url;
|
||||
|
||||
if (!this->zimFileHandler) {
|
||||
if (!this->zimArchive) {
|
||||
throw NoEntry();
|
||||
}
|
||||
_parseUrl(path, &ns, short_url);
|
||||
|
||||
if (short_url.empty() && ns == 0) {
|
||||
if (path.empty() || path == "/") {
|
||||
return getMainPage();
|
||||
}
|
||||
|
||||
auto article = zimFileHandler->getArticle(ns, short_url);
|
||||
if (!article.good()) {
|
||||
try {
|
||||
return zimArchive->getEntryByPath(path);
|
||||
} catch (zim::EntryNotFound& e) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
Entry Reader::getEntryFromEncodedPath(const std::string& path) const
|
||||
|
@ -438,56 +365,47 @@ Entry Reader::getEntryFromEncodedPath(const std::string& path) const
|
|||
|
||||
Entry Reader::getEntryFromTitle(const std::string& title) const
|
||||
{
|
||||
if (!this->zimFileHandler) {
|
||||
if (!this->zimArchive) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
auto article = this->zimFileHandler->getArticleByTitle('A', title);
|
||||
if (!article.good()) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
return article;
|
||||
}
|
||||
|
||||
/* Return the mimeType without the content */
|
||||
bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
|
||||
{
|
||||
try {
|
||||
auto entry = getEntryFromPath(url);
|
||||
mimeType = entry.getMimetype();
|
||||
return true;
|
||||
} catch (NoEntry& e) {
|
||||
mimeType = "";
|
||||
return false;
|
||||
return zimArchive->getEntryByTitle(title);
|
||||
} catch(zim::EntryNotFound& e) {
|
||||
throw NoEntry();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool Reader::pathExists(const string& path) const
|
||||
{
|
||||
if (!zimFileHandler)
|
||||
if (!zimArchive)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
_parseUrl(path, &ns, titleStr);
|
||||
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
|
||||
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
|
||||
return zimArchive->hasEntryByPath(path);
|
||||
}
|
||||
|
||||
/* Does the ZIM file has a fulltext index */
|
||||
bool Reader::hasFulltextIndex() const
|
||||
{
|
||||
if (!zimFileHandler || zimFileHandler->is_multiPart() )
|
||||
if (!zimArchive)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return ( pathExists("Z//fulltextIndex/xapian")
|
||||
|| pathExists("X/fulltext/xapian"));
|
||||
for(auto path: {"Z//fulltextIndex/xapian", "X/fulltext/xapian"}) {
|
||||
try {
|
||||
auto entry = zimArchive->getEntryByPath(path);
|
||||
auto item = entry.getItem(true);
|
||||
auto accessInfo = item.getDirectAccessInformation();
|
||||
if (accessInfo.second) {
|
||||
return true;
|
||||
}
|
||||
} catch(...) {}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Search titles by prefix */
|
||||
|
@ -527,23 +445,17 @@ bool Reader::searchSuggestions(const string& prefix,
|
|||
return false;
|
||||
}
|
||||
|
||||
for (auto articleItr = zimFileHandler->findByTitle('A', prefix);
|
||||
articleItr != zimFileHandler->end()
|
||||
&& articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
|
||||
&& results.size() < suggestionsCount;
|
||||
++articleItr) {
|
||||
for (auto& entry: zimArchive->findByTitle(prefix)) {
|
||||
if (results.size() >= suggestionsCount) {
|
||||
break;
|
||||
}
|
||||
/* Extract the interesting part of article title & url */
|
||||
std::string normalizedArticleTitle
|
||||
= kiwix::normalize(articleItr->getTitle());
|
||||
std::string articleFinalUrl = "/A/" + articleItr->getUrl();
|
||||
if (articleItr->isRedirect()) {
|
||||
zim::Article article = *articleItr;
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++ < 42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
articleFinalUrl = "/A/" + article.getUrl();
|
||||
}
|
||||
= kiwix::normalize(entry.getTitle());
|
||||
|
||||
// Get the final path.
|
||||
auto item = entry.getItem(true);
|
||||
std::string articleFinalUrl = item.getPath();
|
||||
|
||||
/* Go through all already found suggestions and skip if this
|
||||
article is already in the suggestions list (with an other
|
||||
|
@ -565,7 +477,7 @@ bool Reader::searchSuggestions(const string& prefix,
|
|||
/* Insert if possible */
|
||||
if (insert) {
|
||||
std::vector<std::string> suggestion;
|
||||
suggestion.push_back(articleItr->getTitle());
|
||||
suggestion.push_back(entry.getTitle());
|
||||
suggestion.push_back(articleFinalUrl);
|
||||
suggestion.push_back(normalizedArticleTitle);
|
||||
results.insert(suggestionItr, suggestion);
|
||||
|
@ -612,19 +524,18 @@ bool Reader::searchSuggestionsSmart(const string& prefix,
|
|||
bool retVal = false;
|
||||
|
||||
/* Try to search in the title using fulltext search database */
|
||||
const auto suggestionSearch
|
||||
= this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount);
|
||||
auto suggestionSearch = zim::Search(*zimArchive);
|
||||
suggestionSearch.set_query(prefix);
|
||||
suggestionSearch.set_range(0, suggestionsCount);
|
||||
suggestionSearch.set_suggestion_mode(true);
|
||||
|
||||
if (suggestionSearch->get_matches_estimated()) {
|
||||
for (auto current = suggestionSearch->begin();
|
||||
current != suggestionSearch->end();
|
||||
if (suggestionSearch.get_matches_estimated()) {
|
||||
for (auto current = suggestionSearch.begin();
|
||||
current != suggestionSearch.end();
|
||||
current++) {
|
||||
if (!current->good()) {
|
||||
continue;
|
||||
}
|
||||
std::vector<std::string> suggestion;
|
||||
suggestion.push_back(current->getTitle());
|
||||
suggestion.push_back("/A/" + current->getUrl());
|
||||
suggestion.push_back(current->getPath());
|
||||
suggestion.push_back(kiwix::normalize(current->getTitle()));
|
||||
results.push_back(suggestion);
|
||||
}
|
||||
|
@ -676,14 +587,14 @@ bool Reader::getNextSuggestion(string& title, string& url)
|
|||
/* Check if the file has as checksum */
|
||||
bool Reader::canCheckIntegrity() const
|
||||
{
|
||||
return this->zimFileHandler->getChecksum() != "";
|
||||
return zimArchive->hasChecksum();
|
||||
}
|
||||
|
||||
/* Return true if corrupted, false otherwise */
|
||||
bool Reader::isCorrupted() const
|
||||
{
|
||||
try {
|
||||
if (this->zimFileHandler->verify() == true) {
|
||||
if (zimArchive->check() == true) {
|
||||
return false;
|
||||
}
|
||||
} catch (exception& e) {
|
||||
|
@ -697,13 +608,10 @@ bool Reader::isCorrupted() const
|
|||
/* Return the file size, works also for splitted files */
|
||||
unsigned int Reader::getFileSize() const
|
||||
{
|
||||
zim::File* file = this->getZimFileHandler();
|
||||
zim::size_type size = 0;
|
||||
|
||||
if (file != NULL) {
|
||||
size = file->getFilesize();
|
||||
if (!zimArchive) {
|
||||
return 0;
|
||||
}
|
||||
return zimArchive->getFilesize() / 1024;
|
||||
}
|
||||
|
||||
return (size / 1024);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -115,14 +115,14 @@ void Searcher::search(const std::string& search,
|
|||
if (resultStart != resultEnd) {
|
||||
/* Perform the search */
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
std::vector<const zim::File*> zims;
|
||||
std::vector<zim::Archive> archives;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
if ( (*current)->hasFulltextIndex() ) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
archives.push_back(*(*current)->getZimArchive());
|
||||
}
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
zim::Search* search = new zim::Search(archives);
|
||||
search->set_verbose(verbose);
|
||||
search->set_query(unaccentedSearch);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
|
@ -158,12 +158,12 @@ void Searcher::geo_search(float latitude, float longitude, float distance,
|
|||
return;
|
||||
}
|
||||
|
||||
std::vector<const zim::File*> zims;
|
||||
std::vector<zim::Archive> archives;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
archives.push_back(*(*current)->getZimArchive());
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
zim::Search* search = new zim::Search(archives);
|
||||
search->set_verbose(verbose);
|
||||
search->set_query("");
|
||||
search->set_georange(latitude, longitude, distance);
|
||||
|
@ -213,12 +213,12 @@ void Searcher::suggestions(std::string& searchPattern, const bool verbose)
|
|||
this->resultEnd = 10;
|
||||
string unaccentedSearch = removeAccents(searchPattern);
|
||||
|
||||
std::vector<const zim::File*> zims;
|
||||
std::vector<zim::Archive> archives;
|
||||
for (auto current = this->readers.begin(); current != this->readers.end();
|
||||
current++) {
|
||||
zims.push_back((*current)->getZimFileHandler());
|
||||
archives.push_back(*(*current)->getZimArchive());
|
||||
}
|
||||
zim::Search* search = new zim::Search(zims);
|
||||
zim::Search* search = new zim::Search(archives);
|
||||
search->set_verbose(verbose);
|
||||
search->set_query(unaccentedSearch);
|
||||
search->set_range(resultStart, resultEnd);
|
||||
|
@ -257,10 +257,7 @@ std::string _Result::get_snippet()
|
|||
}
|
||||
std::string _Result::get_content()
|
||||
{
|
||||
if (iterator->good()) {
|
||||
return iterator->getData();
|
||||
}
|
||||
return "";
|
||||
return iterator->getItem(true).getData();
|
||||
}
|
||||
int _Result::get_size()
|
||||
{
|
||||
|
|
|
@ -766,33 +766,30 @@ std::unique_ptr<Response> InternalServer::handle_content(const RequestContext& r
|
|||
urlStr = urlStr.substr(1);
|
||||
}
|
||||
|
||||
kiwix::Entry entry;
|
||||
|
||||
try {
|
||||
entry = reader->getEntryFromPath(urlStr);
|
||||
auto entry = reader->getEntryFromPath(urlStr);
|
||||
if (entry.isRedirect() || urlStr.empty()) {
|
||||
// If urlStr is empty, we want to mainPage.
|
||||
// We must do a redirection to the real page.
|
||||
return build_redirect(bookName, entry.getFinalEntry());
|
||||
}
|
||||
auto response = ItemResponse::build(*this, request, entry.getZimEntry().getItem());
|
||||
try {
|
||||
dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, reader->getTitle());
|
||||
} catch (std::bad_cast& e) {}
|
||||
|
||||
if (m_verbose.load()) {
|
||||
printf("Found %s\n", entry.getPath().c_str());
|
||||
printf("mimeType: %s\n", entry.getMimetype().c_str());
|
||||
}
|
||||
|
||||
return response;
|
||||
} catch(kiwix::NoEntry& e) {
|
||||
if (m_verbose.load())
|
||||
printf("Failed to find %s\n", urlStr.c_str());
|
||||
|
||||
return Response::build_404(*this, request, bookName);
|
||||
}
|
||||
|
||||
auto response = EntryResponse::build(*this, request, entry);
|
||||
try {
|
||||
dynamic_cast<ContentResponse&>(*response).set_taskbar(bookName, reader->getTitle());
|
||||
} catch (std::bad_cast& e) {}
|
||||
|
||||
if (m_verbose.load()) {
|
||||
printf("Found %s\n", entry.getPath().c_str());
|
||||
printf("mimeType: %s\n", entry.getMimetype().c_str());
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ class InternalServer {
|
|||
|
||||
friend std::unique_ptr<Response> Response::build(const InternalServer& server);
|
||||
friend std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& server, const std::string& content, const std::string& mimetype);
|
||||
friend std::unique_ptr<Response> EntryResponse::build(const InternalServer& server, const RequestContext& request, const Entry& entry);
|
||||
friend std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
|
||||
friend std::unique_ptr<Response> Response::build_500(const InternalServer& server, const std::string& msg);
|
||||
|
||||
};
|
||||
|
|
|
@ -33,10 +33,10 @@ std::string render_template(const std::string& template_str, kainjow::mustache::
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string get_mime_type(const kiwix::Entry& entry)
|
||||
std::string get_mime_type(const zim::Item& item)
|
||||
{
|
||||
try {
|
||||
return entry.getMimetype();
|
||||
return item.getMimetype();
|
||||
} catch (exception& e) {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
|
@ -131,17 +131,17 @@ static MHD_Result print_key_value (void *cls, enum MHD_ValueKind kind,
|
|||
|
||||
|
||||
struct RunningResponse {
|
||||
kiwix::Entry entry;
|
||||
zim::Item item;
|
||||
int range_start;
|
||||
|
||||
RunningResponse(kiwix::Entry entry,
|
||||
RunningResponse(zim::Item item,
|
||||
int range_start) :
|
||||
entry(entry),
|
||||
item(item),
|
||||
range_start(range_start)
|
||||
{}
|
||||
};
|
||||
|
||||
static ssize_t callback_reader_from_entry(void* cls,
|
||||
static ssize_t callback_reader_from_item(void* cls,
|
||||
uint64_t pos,
|
||||
char* buf,
|
||||
size_t max)
|
||||
|
@ -150,13 +150,13 @@ static ssize_t callback_reader_from_entry(void* cls,
|
|||
|
||||
size_t max_size_to_set = min<size_t>(
|
||||
max,
|
||||
response->entry.getSize() - pos - response->range_start);
|
||||
response->item.getSize() - pos - response->range_start);
|
||||
|
||||
if (max_size_to_set <= 0) {
|
||||
return MHD_CONTENT_READER_END_WITH_ERROR;
|
||||
}
|
||||
|
||||
zim::Blob blob = response->entry.getBlob(response->range_start+pos, max_size_to_set);
|
||||
zim::Blob blob = response->item.getData(response->range_start+pos, max_size_to_set);
|
||||
memcpy(buf, blob.data(), max_size_to_set);
|
||||
return max_size_to_set;
|
||||
}
|
||||
|
@ -178,8 +178,6 @@ void print_response_info(int retCode, MHD_Response* response)
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void ContentResponse::introduce_taskbar()
|
||||
{
|
||||
kainjow::mustache::data data;
|
||||
|
@ -342,9 +340,9 @@ std::unique_ptr<ContentResponse> ContentResponse::build(const InternalServer& se
|
|||
return ContentResponse::build(server, content, mimetype);
|
||||
}
|
||||
|
||||
EntryResponse::EntryResponse(bool verbose, const Entry& entry, const std::string& mimetype, const ByteRange& byterange) :
|
||||
ItemResponse::ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange) :
|
||||
Response(verbose),
|
||||
m_entry(entry),
|
||||
m_item(item),
|
||||
m_mimeType(mimetype)
|
||||
{
|
||||
m_byteRange = byterange;
|
||||
|
@ -352,48 +350,46 @@ EntryResponse::EntryResponse(bool verbose, const Entry& entry, const std::string
|
|||
add_header(MHD_HTTP_HEADER_CONTENT_TYPE, m_mimeType);
|
||||
}
|
||||
|
||||
std::unique_ptr<Response> EntryResponse::build(const InternalServer& server, const RequestContext& request, const Entry& entry)
|
||||
std::unique_ptr<Response> ItemResponse::build(const InternalServer& server, const RequestContext& request, const zim::Item& item)
|
||||
{
|
||||
const std::string mimetype = get_mime_type(entry);
|
||||
auto byteRange = request.get_range().resolve(entry.getSize());
|
||||
const std::string mimetype = get_mime_type(item);
|
||||
auto byteRange = request.get_range().resolve(item.getSize());
|
||||
const bool noRange = byteRange.kind() == ByteRange::RESOLVED_FULL_CONTENT;
|
||||
if (noRange && is_compressible_mime_type(mimetype)) {
|
||||
// Return a contentResponse
|
||||
zim::Blob raw_content = entry.getBlob();
|
||||
const std::string content = string(raw_content.data(), raw_content.size());
|
||||
auto response = ContentResponse::build(server, content, mimetype);
|
||||
auto response = ContentResponse::build(server, item.getData(), mimetype);
|
||||
response->set_cacheable();
|
||||
response->m_byteRange = byteRange;
|
||||
return std::move(response);
|
||||
}
|
||||
|
||||
if (byteRange.kind() == ByteRange::RESOLVED_UNSATISFIABLE) {
|
||||
auto response = Response::build_416(server, entry.getSize());
|
||||
auto response = Response::build_416(server, item.getSize());
|
||||
response->set_cacheable();
|
||||
return response;
|
||||
}
|
||||
|
||||
return std::unique_ptr<Response>(new EntryResponse(
|
||||
return std::unique_ptr<Response>(new ItemResponse(
|
||||
server.m_verbose.load(),
|
||||
entry,
|
||||
item,
|
||||
mimetype,
|
||||
byteRange));
|
||||
}
|
||||
|
||||
MHD_Response*
|
||||
EntryResponse::create_mhd_response(const RequestContext& request)
|
||||
ItemResponse::create_mhd_response(const RequestContext& request)
|
||||
{
|
||||
const auto content_length = m_byteRange.length();
|
||||
MHD_Response* response = MHD_create_response_from_callback(content_length,
|
||||
16384,
|
||||
callback_reader_from_entry,
|
||||
new RunningResponse(m_entry, m_byteRange.first()),
|
||||
callback_reader_from_item,
|
||||
new RunningResponse(m_item, m_byteRange.first()),
|
||||
callback_free_response);
|
||||
MHD_add_response_header(response, MHD_HTTP_HEADER_ACCEPT_RANGES, "bytes");
|
||||
if ( m_byteRange.kind() == ByteRange::RESOLVED_PARTIAL_CONTENT ) {
|
||||
std::ostringstream oss;
|
||||
oss << "bytes " << m_byteRange.first() << "-" << m_byteRange.last()
|
||||
<< "/" << m_entry.getSize();
|
||||
<< "/" << m_item.getSize();
|
||||
|
||||
MHD_add_response_header(response,
|
||||
MHD_HTTP_HEADER_CONTENT_RANGE, oss.str().c_str());
|
||||
|
|
|
@ -72,7 +72,7 @@ class Response {
|
|||
ETag m_etag;
|
||||
std::map<std::string, std::string> m_customHeaders;
|
||||
|
||||
friend class EntryResponse; // temporary to allow the builder to change m_mode
|
||||
friend class ItemResponse;
|
||||
};
|
||||
|
||||
|
||||
|
@ -104,15 +104,15 @@ class ContentResponse : public Response {
|
|||
std::string m_bookTitle;
|
||||
};
|
||||
|
||||
class EntryResponse : public Response {
|
||||
class ItemResponse : public Response {
|
||||
public:
|
||||
EntryResponse(bool verbose, const Entry& entry, const std::string& mimetype, const ByteRange& byterange);
|
||||
static std::unique_ptr<Response> build(const InternalServer& server, const RequestContext& request, const Entry& entry);
|
||||
ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
|
||||
static std::unique_ptr<Response> build(const InternalServer& server, const RequestContext& request, const zim::Item& item);
|
||||
|
||||
private:
|
||||
MHD_Response* create_mhd_response(const RequestContext& request);
|
||||
|
||||
Entry m_entry;
|
||||
zim::Item m_item;
|
||||
std::string m_mimeType;
|
||||
};
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
|
||||
|
||||
#include <jni.h>
|
||||
#include <zim/file.h>
|
||||
#include <exception>
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixReader.h"
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
*/
|
||||
|
||||
|
||||
#include <zim/file.h>
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixSearcher.h"
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h"
|
||||
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
|
||||
|
||||
#include <jni.h>
|
||||
#include <zim/file.h>
|
||||
#include "org_kiwix_kiwixlib_JNIKiwixServer.h"
|
||||
|
||||
#include "tools/base64.h"
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
|
||||
#include <jni.h>
|
||||
#include <zim/file.h>
|
||||
#include "org_kiwix_kiwixlib_Manager.h"
|
||||
|
||||
#include "manager.h"
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include <zim/zim.h>
|
||||
|
||||
namespace kiwix {
|
||||
using CounterType = std::map<const std::string, zim::article_index_type>;
|
||||
using CounterType = std::map<const std::string, zim::entry_index_type>;
|
||||
CounterType parseMimetypeCounter(const std::string& counterData);
|
||||
};
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
tests = [
|
||||
'parseUrl',
|
||||
'library',
|
||||
'regex',
|
||||
'tagParsing',
|
||||
|
|
|
@ -1,67 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 Tommi Maekitalo
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include <string>
|
||||
|
||||
namespace kiwix {
|
||||
bool _parseUrl(const std::string& url, char* ns, std::string& title);
|
||||
};
|
||||
|
||||
using namespace kiwix;
|
||||
|
||||
namespace
|
||||
{
|
||||
TEST(ParseUrlTest, invalid)
|
||||
{
|
||||
char ns;
|
||||
std::string title;
|
||||
|
||||
ASSERT_FALSE(_parseUrl("", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("A", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("/", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("//", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("/A", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("/A/", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("/AB", &ns, title));
|
||||
ASSERT_FALSE(_parseUrl("//A/title", &ns, title));
|
||||
}
|
||||
|
||||
TEST(ParseUrlTest, valid)
|
||||
{
|
||||
char ns;
|
||||
std::string title;
|
||||
|
||||
ASSERT_TRUE(_parseUrl("A/title", &ns, title));
|
||||
ASSERT_EQ(ns, 'A');
|
||||
ASSERT_EQ(title, "title");
|
||||
|
||||
ASSERT_TRUE(_parseUrl("/A/title", &ns, title));
|
||||
ASSERT_EQ(ns, 'A');
|
||||
ASSERT_EQ(title, "title");
|
||||
|
||||
ASSERT_TRUE(_parseUrl("A//title", &ns, title));
|
||||
ASSERT_EQ(ns, 'A');
|
||||
ASSERT_EQ(title, "/title");
|
||||
|
||||
ASSERT_TRUE(_parseUrl("/A//title", &ns, title));
|
||||
ASSERT_EQ(ns, 'A');
|
||||
ASSERT_EQ(title, "/title");
|
||||
}
|
||||
};
|
Loading…
Reference in New Issue