mirror of https://github.com/kiwix/libkiwix.git
commit
4485cc8d0f
|
@ -5,10 +5,6 @@ headers = [
|
|||
'searcher.h'
|
||||
]
|
||||
|
||||
if xapian_dep.found()
|
||||
headers += ['xapianSearcher.h']
|
||||
endif
|
||||
|
||||
install_headers(headers, subdir:'kiwix')
|
||||
|
||||
install_headers(
|
||||
|
|
|
@ -43,45 +43,45 @@ namespace kiwix {
|
|||
~Reader();
|
||||
|
||||
void reset();
|
||||
unsigned int getArticleCount();
|
||||
unsigned int getMediaCount();
|
||||
unsigned int getGlobalCount();
|
||||
string getZimFilePath();
|
||||
string getId();
|
||||
string getRandomPageUrl();
|
||||
string getFirstPageUrl();
|
||||
string getMainPageUrl();
|
||||
bool getMetatag(const string &url, string &content);
|
||||
string getTitle();
|
||||
string getDescription();
|
||||
string getLanguage();
|
||||
string getName();
|
||||
string getTags();
|
||||
string getDate();
|
||||
string getCreator();
|
||||
string getPublisher();
|
||||
string getOrigId();
|
||||
bool getFavicon(string &content, string &mimeType);
|
||||
bool getPageUrlFromTitle(const string &title, string &url);
|
||||
bool getMimeTypeByUrl(const string &url, string &mimeType);
|
||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
unsigned int getArticleCount() const;
|
||||
unsigned int getMediaCount() const;
|
||||
unsigned int getGlobalCount() const;
|
||||
string getZimFilePath() const;
|
||||
string getId() const;
|
||||
string getRandomPageUrl() const;
|
||||
string getFirstPageUrl() const;
|
||||
string getMainPageUrl() const;
|
||||
bool getMetatag(const string &url, string &content) const;
|
||||
string getTitle() const;
|
||||
string getDescription() const;
|
||||
string getLanguage() const;
|
||||
string getName() const;
|
||||
string getTags() const;
|
||||
string getDate() const;
|
||||
string getCreator() const;
|
||||
string getPublisher() const;
|
||||
string getOrigId() const;
|
||||
bool getFavicon(string &content, string &mimeType) const;
|
||||
bool getPageUrlFromTitle(const string &title, string &url) const;
|
||||
bool getMimeTypeByUrl(const string &url, string &mimeType) const;
|
||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
|
||||
bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true);
|
||||
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
|
||||
bool urlExists(const string &url);
|
||||
bool hasFulltextIndex();
|
||||
std::vector<std::string> getTitleVariants(const std::string &title);
|
||||
bool urlExists(const string &url) const;
|
||||
bool hasFulltextIndex() const;
|
||||
std::vector<std::string> getTitleVariants(const std::string &title) const;
|
||||
bool getNextSuggestion(string &title);
|
||||
bool getNextSuggestion(string &title, string &url);
|
||||
bool canCheckIntegrity();
|
||||
bool isCorrupted();
|
||||
bool parseUrl(const string &url, char *ns, string &title);
|
||||
unsigned int getFileSize();
|
||||
zim::File* getZimFileHandler();
|
||||
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article);
|
||||
bool canCheckIntegrity() const;
|
||||
bool isCorrupted() const;
|
||||
bool parseUrl(const string &url, char *ns, string &title) const;
|
||||
unsigned int getFileSize() const;
|
||||
zim::File* getZimFileHandler() const;
|
||||
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const;
|
||||
|
||||
protected:
|
||||
zim::File* zimFileHandler;
|
||||
|
@ -96,7 +96,7 @@ namespace kiwix {
|
|||
std::vector< std::vector<std::string> >::iterator suggestionsOffset;
|
||||
|
||||
private:
|
||||
std::map<std::string, unsigned int> parseCounterMetadata();
|
||||
std::map<const std::string, unsigned int> parseCounterMetadata() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -35,30 +35,31 @@
|
|||
|
||||
using namespace std;
|
||||
|
||||
class Result
|
||||
{
|
||||
public:
|
||||
virtual ~Result() {};
|
||||
virtual std::string get_url() = 0;
|
||||
virtual std::string get_title() = 0;
|
||||
virtual int get_score() = 0;
|
||||
virtual std::string get_snippet() = 0;
|
||||
virtual int get_wordCount() = 0;
|
||||
virtual int get_size() = 0;
|
||||
};
|
||||
|
||||
namespace kiwix {
|
||||
class Reader;
|
||||
class Result {
|
||||
public:
|
||||
virtual ~Result() {};
|
||||
virtual std::string get_url() = 0;
|
||||
virtual std::string get_title() = 0;
|
||||
virtual int get_score() = 0;
|
||||
virtual std::string get_snippet() = 0;
|
||||
virtual int get_wordCount() = 0;
|
||||
virtual int get_size() = 0;
|
||||
};
|
||||
|
||||
|
||||
struct SearcherInternal;
|
||||
class Searcher {
|
||||
|
||||
public:
|
||||
Searcher();
|
||||
virtual ~Searcher();
|
||||
Searcher(Reader* reader);
|
||||
~Searcher();
|
||||
|
||||
void search(std::string &search, unsigned int resultStart,
|
||||
unsigned int resultEnd, const bool verbose=false);
|
||||
virtual Result* getNextResult() = 0;
|
||||
virtual void restart_search() = 0;
|
||||
Result* getNextResult();
|
||||
void restart_search();
|
||||
unsigned int getEstimatedResultCount();
|
||||
bool setProtocolPrefix(const std::string prefix);
|
||||
bool setSearchProtocolPrefix(const std::string prefix);
|
||||
|
@ -71,10 +72,12 @@ namespace kiwix {
|
|||
|
||||
protected:
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
virtual void closeIndex() = 0;
|
||||
virtual void searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose=false) = 0;
|
||||
void closeIndex() ;
|
||||
void searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose=false);
|
||||
|
||||
Reader* reader;
|
||||
SearcherInternal* internal;
|
||||
std::string searchPattern;
|
||||
std::string protocolPrefix;
|
||||
std::string searchProtocolPrefix;
|
||||
|
|
10
meson.build
10
meson.build
|
@ -1,6 +1,7 @@
|
|||
project('kiwixlib', 'cpp',
|
||||
version : '0.1.0',
|
||||
license : 'GPL')
|
||||
license : 'GPL',
|
||||
default_options : ['c_std=c11', 'cpp_std=c++11'])
|
||||
|
||||
compiler = meson.get_compiler('cpp')
|
||||
find_library_in_compiler = meson.version().version_compare('>=0.31.0')
|
||||
|
@ -61,9 +62,7 @@ else
|
|||
endif
|
||||
endif
|
||||
|
||||
xapian_dep = dependency('xapian-core', required:false)
|
||||
|
||||
all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep]
|
||||
all_deps = [thread_dep, libicu_dep, libzim_dep, pugixml_dep]
|
||||
if has_ctpp2_dep
|
||||
all_deps += [ctpp2_dep]
|
||||
endif
|
||||
|
@ -80,9 +79,6 @@ subdir('static')
|
|||
subdir('src')
|
||||
|
||||
pkg_requires = ['libzim', 'icu-i18n', 'pugixml']
|
||||
if xapian_dep.found()
|
||||
pkg_requires += ['xapian-core']
|
||||
endif
|
||||
|
||||
extra_libs = []
|
||||
extra_cflags = ''
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "unicode/putil.h"
|
||||
#include "reader.h"
|
||||
#include "xapianSearcher.h"
|
||||
#include "searcher.h"
|
||||
#include "common/base64.h"
|
||||
|
||||
#include <android/log.h>
|
||||
|
@ -23,7 +23,7 @@
|
|||
|
||||
/* global variables */
|
||||
kiwix::Reader *reader = NULL;
|
||||
kiwix::XapianSearcher *searcher = NULL;
|
||||
kiwix::Searcher *searcher = NULL;
|
||||
|
||||
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
@ -445,7 +445,7 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
|
|||
searcher = NULL;
|
||||
try {
|
||||
if (searcher != NULL) delete searcher;
|
||||
searcher = new kiwix::XapianSearcher(cPath, NULL);
|
||||
searcher = new kiwix::Searcher(reader);
|
||||
} catch (...) {
|
||||
searcher = NULL;
|
||||
retVal = JNI_FALSE;
|
||||
|
@ -460,7 +460,7 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
|
|||
(JNIEnv *env, jclass obj, jstring query, jint count) {
|
||||
std::string cQuery = jni2c(query, env);
|
||||
unsigned int cCount = jni2c(count);
|
||||
Result *p_result;
|
||||
kiwix::Result *p_result;
|
||||
std::string result;
|
||||
|
||||
pthread_mutex_lock(&searcherLock);
|
||||
|
|
|
@ -85,7 +85,7 @@ std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
|
|||
|
||||
/* some systems have ifr_addr.sa_len and adjust the length that
|
||||
* way, but not mine. weird */
|
||||
#ifndef linux
|
||||
#ifndef __linux__
|
||||
len=IFNAMSIZ + ifreq->ifr_addr.sa_len;
|
||||
#else
|
||||
len=sizeof *ifreq;
|
||||
|
|
|
@ -8,16 +8,10 @@ kiwix_sources = [
|
|||
'common/regexTools.cpp',
|
||||
'common/stringTools.cpp',
|
||||
'common/networkTools.cpp',
|
||||
'common/otherTools.cpp',
|
||||
'xapian/htmlparse.cc',
|
||||
'xapian/myhtmlparse.cc'
|
||||
'common/otherTools.cpp'
|
||||
]
|
||||
kiwix_sources += lib_resources
|
||||
|
||||
if xapian_dep.found()
|
||||
kiwix_sources += ['xapianSearcher.cpp']
|
||||
endif
|
||||
|
||||
if get_option('android')
|
||||
subdir('android')
|
||||
endif
|
||||
|
|
274
src/reader.cpp
274
src/reader.cpp
|
@ -87,7 +87,7 @@ namespace kiwix {
|
|||
}
|
||||
}
|
||||
|
||||
zim::File* Reader::getZimFileHandler() {
|
||||
zim::File* Reader::getZimFileHandler() const {
|
||||
return this->zimFileHandler;
|
||||
}
|
||||
|
||||
|
@ -96,14 +96,14 @@ namespace kiwix {
|
|||
this->currentArticleOffset = this->firstArticleOffset;
|
||||
}
|
||||
|
||||
std::map<std::string, unsigned int> Reader::parseCounterMetadata() {
|
||||
std::map<std::string, unsigned int> counters;
|
||||
string content, mimeType, item, counterString;
|
||||
unsigned int contentLength, counter;
|
||||
string counterUrl = "/M/Counter";
|
||||
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const {
|
||||
std::map<const std::string, unsigned int> counters;
|
||||
string mimeType, item, counterString;
|
||||
unsigned int counter;
|
||||
|
||||
this->getContentByUrl(counterUrl, content, contentLength, mimeType);
|
||||
stringstream ssContent(content);
|
||||
zim::Article article = this->zimFileHandler->getArticle('M',"Counter");
|
||||
|
||||
stringstream ssContent(article.getData());
|
||||
|
||||
while(getline(ssContent, item, ';')) {
|
||||
stringstream ssItem(item);
|
||||
|
@ -119,8 +119,8 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Get the count of articles which can be indexed/displayed */
|
||||
unsigned int Reader::getArticleCount() {
|
||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int Reader::getArticleCount() const {
|
||||
std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty()) {
|
||||
|
@ -135,8 +135,8 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Get the count of medias content in the ZIM file */
|
||||
unsigned int Reader::getMediaCount() {
|
||||
std::map<std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int Reader::getMediaCount() const {
|
||||
std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
|
||||
unsigned int counter = 0;
|
||||
|
||||
if (counterMap.empty())
|
||||
|
@ -161,43 +161,38 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Get the total of all items of a ZIM file, redirects included */
|
||||
unsigned int Reader::getGlobalCount() {
|
||||
unsigned int Reader::getGlobalCount() const {
|
||||
return this->zimFileHandler->getCountArticles();
|
||||
}
|
||||
|
||||
/* Return the UID of the ZIM file */
|
||||
string Reader::getId() {
|
||||
string Reader::getId() const {
|
||||
std::ostringstream s;
|
||||
s << this->zimFileHandler->getFileheader().getUuid();
|
||||
return s.str();
|
||||
}
|
||||
|
||||
/* Return a page url from a title */
|
||||
bool Reader::getPageUrlFromTitle(const string &title, string &url) {
|
||||
bool Reader::getPageUrlFromTitle(const string &title, string &url) const {
|
||||
/* Extract the content from the zim file */
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findxByTitle('A', title);
|
||||
zim::Article article = this->zimFileHandler->getArticleByTitle('A', title);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
|
||||
/* Get the article */
|
||||
zim::Article article = *resultPair.second;
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
url = article.getLongUrl();
|
||||
return true;
|
||||
if ( ! article.good() )
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
url = article.getLongUrl();
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Return an URL from a title*/
|
||||
string Reader::getRandomPageUrl() {
|
||||
string Reader::getRandomPageUrl() const {
|
||||
zim::Article article;
|
||||
zim::size_type idx;
|
||||
std::string mainPageUrl = this->getMainPageUrl();
|
||||
|
@ -208,11 +203,11 @@ namespace kiwix {
|
|||
article = zimFileHandler->getArticle(idx);
|
||||
} while (article.getLongUrl() == mainPageUrl);
|
||||
|
||||
return article.getLongUrl().c_str();
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
/* Return the welcome page URL */
|
||||
string Reader::getMainPageUrl() {
|
||||
string Reader::getMainPageUrl() const {
|
||||
string url = "";
|
||||
|
||||
if (this->zimFileHandler->getFileheader().hasMainPage()) {
|
||||
|
@ -229,7 +224,7 @@ namespace kiwix {
|
|||
return url;
|
||||
}
|
||||
|
||||
bool Reader::getFavicon(string &content, string &mimeType) {
|
||||
bool Reader::getFavicon(string &content, string &mimeType) const {
|
||||
unsigned int contentLength = 0;
|
||||
|
||||
this->getContentByUrl( "/-/favicon.png", content,
|
||||
|
@ -254,12 +249,12 @@ namespace kiwix {
|
|||
return content.empty() ? false : true;
|
||||
}
|
||||
|
||||
string Reader::getZimFilePath() {
|
||||
string Reader::getZimFilePath() const {
|
||||
return this->zimFilePath;
|
||||
}
|
||||
|
||||
/* Return a metatag value */
|
||||
bool Reader::getMetatag(const string &name, string &value) {
|
||||
bool Reader::getMetatag(const string &name, string &value) const {
|
||||
unsigned int contentLength = 0;
|
||||
string contentType = "";
|
||||
|
||||
|
@ -267,7 +262,7 @@ namespace kiwix {
|
|||
contentLength, contentType);
|
||||
}
|
||||
|
||||
string Reader::getTitle() {
|
||||
string Reader::getTitle() const {
|
||||
string value;
|
||||
this->getMetatag("Title", value);
|
||||
if (value.empty()) {
|
||||
|
@ -279,19 +274,19 @@ namespace kiwix {
|
|||
return value;
|
||||
}
|
||||
|
||||
string Reader::getName() {
|
||||
string Reader::getName() const {
|
||||
string value;
|
||||
this->getMetatag("Name", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getTags() {
|
||||
string Reader::getTags() const {
|
||||
string value;
|
||||
this->getMetatag("Tags", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDescription() {
|
||||
string Reader::getDescription() const{
|
||||
string value;
|
||||
this->getMetatag("Description", value);
|
||||
|
||||
|
@ -303,31 +298,31 @@ namespace kiwix {
|
|||
return value;
|
||||
}
|
||||
|
||||
string Reader::getLanguage() {
|
||||
string Reader::getLanguage() const {
|
||||
string value;
|
||||
this->getMetatag("Language", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getDate() {
|
||||
string Reader::getDate() const {
|
||||
string value;
|
||||
this->getMetatag("Date", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getCreator() {
|
||||
string Reader::getCreator() const {
|
||||
string value;
|
||||
this->getMetatag("Creator", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getPublisher() {
|
||||
string Reader::getPublisher() const {
|
||||
string value;
|
||||
this->getMetatag("Publisher", value);
|
||||
return value;
|
||||
}
|
||||
|
||||
string Reader::getOrigId() {
|
||||
string Reader::getOrigId() const {
|
||||
string value;
|
||||
this->getMetatag("startfileuid", value);
|
||||
if(value.empty())
|
||||
|
@ -355,17 +350,13 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Return the first page URL */
|
||||
string Reader::getFirstPageUrl() {
|
||||
string url;
|
||||
|
||||
string Reader::getFirstPageUrl() const {
|
||||
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
|
||||
zim::Article article = zimFileHandler->getArticle(firstPageOffset);
|
||||
url = article.getLongUrl();
|
||||
|
||||
return url;
|
||||
return article.getLongUrl();
|
||||
}
|
||||
|
||||
bool Reader::parseUrl(const string &url, char *ns, string &title) {
|
||||
bool Reader::parseUrl(const string &url, char *ns, string &title) const {
|
||||
/* Offset to visit the url */
|
||||
unsigned int urlLength = url.size();
|
||||
unsigned int offset = 0;
|
||||
|
@ -395,130 +386,113 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Return article by url */
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
|
||||
/* Main page */
|
||||
if (titleStr.empty() && ns == 0) {
|
||||
this->parseUrl(this->getMainPageUrl(), &ns, titleStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
std::pair<bool, zim::File::const_iterator> resultPair = zimFileHandler->findx(ns, titleStr);
|
||||
|
||||
/* Test if the article was found */
|
||||
if (resultPair.first == true) {
|
||||
article = zimFileHandler->getArticle(resultPair.second.getIndex());
|
||||
retVal = true;
|
||||
}
|
||||
|
||||
bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const {
|
||||
if (this->zimFileHandler == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
|
||||
/* Parse the url */
|
||||
char ns = 0;
|
||||
string urlStr;
|
||||
this->parseUrl(url, &ns, urlStr);
|
||||
|
||||
/* Main page */
|
||||
if (urlStr.empty() && ns == 0) {
|
||||
this->parseUrl(this->getMainPageUrl(), &ns, urlStr);
|
||||
}
|
||||
|
||||
/* Extract the content from the zim file */
|
||||
article = zimFileHandler->getArticle(ns, urlStr);
|
||||
return article.good();
|
||||
}
|
||||
|
||||
/* Return the mimeType without the content */
|
||||
bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) {
|
||||
bool retVal = false;
|
||||
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
try {
|
||||
mimeType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< url << ":" << e.what() << endl;
|
||||
mimeType = "application/octet-stream";
|
||||
}
|
||||
retVal = true;
|
||||
} else {
|
||||
mimeType = "";
|
||||
}
|
||||
|
||||
bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const {
|
||||
if (this->zimFileHandler == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
try {
|
||||
mimeType = article.getMimeType();
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for " << url << ":" << e.what() << endl;
|
||||
mimeType = "application/octet-stream";
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
mimeType = "";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get a content from a zim file */
|
||||
bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
|
||||
return this->getContentByEncodedUrl(url, content, contentLength, contentType);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const {
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) {
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const {
|
||||
std::string stubRedirectUrl;
|
||||
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl);
|
||||
}
|
||||
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) {
|
||||
bool retVal = false;
|
||||
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const {
|
||||
content="";
|
||||
contentType="";
|
||||
contentLength = 0;
|
||||
if (this->zimFileHandler != NULL) {
|
||||
|
||||
zim::Article article;
|
||||
if (this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
if (loopCounter < 42) {
|
||||
/* Compute base url (might be different from the url if redirects */
|
||||
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
|
||||
|
||||
/* Get the content mime-type */
|
||||
try {
|
||||
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
|
||||
contentType = "application/octet-stream";
|
||||
}
|
||||
|
||||
/* Get the data */
|
||||
content = string(article.getData().data(), article.getArticleSize());
|
||||
}
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos &&
|
||||
content.find("<body") == std::string::npos &&
|
||||
content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
||||
}
|
||||
|
||||
/* Get the data length */
|
||||
contentLength = article.getArticleSize();
|
||||
|
||||
/* Set return value */
|
||||
retVal = true;
|
||||
}
|
||||
zim::Article article;
|
||||
if ( ! this->getArticleObjectByDecodedUrl(url, article)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return retVal;
|
||||
/* If redirect */
|
||||
unsigned int loopCounter = 0;
|
||||
while (article.isRedirect() && loopCounter++<42) {
|
||||
article = article.getRedirectArticle();
|
||||
}
|
||||
|
||||
if (loopCounter < 42) {
|
||||
/* Compute base url (might be different from the url if redirects */
|
||||
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
|
||||
|
||||
/* Get the content mime-type */
|
||||
try {
|
||||
contentType = string(article.getMimeType().data(), article.getMimeType().size());
|
||||
} catch (exception &e) {
|
||||
cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl;
|
||||
contentType = "application/octet-stream";
|
||||
}
|
||||
|
||||
/* Get the data */
|
||||
content = string(article.getData().data(), article.getArticleSize());
|
||||
}
|
||||
|
||||
/* Try to set a stub HTML header/footer if necesssary */
|
||||
if (contentType.find("text/html") != string::npos &&
|
||||
content.find("<body") == std::string::npos &&
|
||||
content.find("<BODY") == std::string::npos) {
|
||||
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>";
|
||||
}
|
||||
|
||||
/* Get the data length */
|
||||
contentLength = article.getArticleSize();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if an article exists */
|
||||
bool Reader::urlExists(const string &url) {
|
||||
bool Reader::urlExists(const string &url) const {
|
||||
char ns = 0;
|
||||
string titleStr;
|
||||
this->parseUrl(url, &ns, titleStr);
|
||||
|
@ -528,7 +502,7 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Does the ZIM file has a fulltext index */
|
||||
bool Reader::hasFulltextIndex() {
|
||||
bool Reader::hasFulltextIndex() const {
|
||||
return this->urlExists("/Z/fulltextIndex/xapian");
|
||||
}
|
||||
|
||||
|
@ -604,7 +578,7 @@ namespace kiwix {
|
|||
return retVal;
|
||||
}
|
||||
|
||||
std::vector<std::string> Reader::getTitleVariants(const std::string &title) {
|
||||
std::vector<std::string> Reader::getTitleVariants(const std::string &title) const {
|
||||
std::vector<std::string> variants;
|
||||
variants.push_back(title);
|
||||
variants.push_back(kiwix::ucFirst(title));
|
||||
|
@ -660,12 +634,12 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Check if the file has as checksum */
|
||||
bool Reader::canCheckIntegrity() {
|
||||
bool Reader::canCheckIntegrity() const {
|
||||
return this->zimFileHandler->getChecksum() != "";
|
||||
}
|
||||
|
||||
/* Return true if corrupted, false otherwise */
|
||||
bool Reader::isCorrupted() {
|
||||
bool Reader::isCorrupted() const {
|
||||
try {
|
||||
if (this->zimFileHandler->verify() == true)
|
||||
return false;
|
||||
|
@ -678,7 +652,7 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Return the file size, works also for splitted files */
|
||||
unsigned int Reader::getFileSize() {
|
||||
unsigned int Reader::getFileSize() const {
|
||||
zim::File *file = this->getZimFileHandler();
|
||||
zim::offset_type size = 0;
|
||||
|
||||
|
|
|
@ -18,8 +18,11 @@
|
|||
*/
|
||||
|
||||
#include "searcher.h"
|
||||
#include "reader.h"
|
||||
#include "kiwixlib-resources.h"
|
||||
|
||||
#include <zim/search.h>
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
#include <ctpp2/CDT.hpp>
|
||||
#include <ctpp2/CTPP2FileLogger.hpp>
|
||||
|
@ -32,8 +35,39 @@ using namespace CTPP;
|
|||
|
||||
namespace kiwix {
|
||||
|
||||
class _Result : public Result {
|
||||
public:
|
||||
_Result(Searcher* searcher, zim::Search::iterator& iterator);
|
||||
virtual ~_Result() {};
|
||||
|
||||
virtual std::string get_url();
|
||||
virtual std::string get_title();
|
||||
virtual int get_score();
|
||||
virtual std::string get_snippet();
|
||||
virtual int get_wordCount();
|
||||
virtual int get_size();
|
||||
|
||||
private:
|
||||
Searcher* searcher;
|
||||
zim::Search::iterator iterator;
|
||||
};
|
||||
|
||||
struct SearcherInternal {
|
||||
const zim::Search *_search;
|
||||
zim::Search::iterator current_iterator;
|
||||
|
||||
SearcherInternal() : _search(NULL) {}
|
||||
~SearcherInternal() {
|
||||
if ( _search != NULL )
|
||||
delete _search;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* Constructor */
|
||||
Searcher::Searcher() :
|
||||
Searcher::Searcher(Reader* reader) :
|
||||
reader(reader),
|
||||
internal(new SearcherInternal()),
|
||||
searchPattern(""),
|
||||
protocolPrefix("zim://"),
|
||||
searchProtocolPrefix("search://?"),
|
||||
|
@ -47,7 +81,9 @@ namespace kiwix {
|
|||
}
|
||||
|
||||
/* Destructor */
|
||||
Searcher::~Searcher() {}
|
||||
Searcher::~Searcher() {
|
||||
delete internal;
|
||||
}
|
||||
|
||||
/* Search strings in the database */
|
||||
void Searcher::search(std::string &search, unsigned int resultStart,
|
||||
|
@ -80,12 +116,28 @@ namespace kiwix {
|
|||
this->resultStart = resultStart;
|
||||
this->resultEnd = resultEnd;
|
||||
string unaccentedSearch = removeAccents(search);
|
||||
searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
|
||||
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
this->estimatedResultCount = internal->_search->get_matches_estimated();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void Searcher::restart_search() {
|
||||
internal->current_iterator = internal->_search->begin();
|
||||
}
|
||||
|
||||
Result* Searcher::getNextResult() {
|
||||
if (internal->current_iterator != internal->_search->end()) {
|
||||
Result* result = new _Result(this, internal->current_iterator);
|
||||
internal->current_iterator++;
|
||||
return result;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/* Reset the results */
|
||||
void Searcher::reset() {
|
||||
this->estimatedResultCount = 0;
|
||||
|
@ -112,6 +164,36 @@ namespace kiwix {
|
|||
this->contentHumanReadableId = contentHumanReadableId;
|
||||
}
|
||||
|
||||
_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator):
|
||||
searcher(searcher),
|
||||
iterator(iterator)
|
||||
{
|
||||
}
|
||||
|
||||
std::string _Result::get_url() {
|
||||
return iterator.get_url();
|
||||
}
|
||||
|
||||
std::string _Result::get_title() {
|
||||
return iterator.get_title();
|
||||
}
|
||||
|
||||
int _Result::get_score() {
|
||||
return iterator.get_score();
|
||||
}
|
||||
|
||||
std::string _Result::get_snippet() {
|
||||
return iterator.get_snippet();
|
||||
}
|
||||
|
||||
int _Result::get_size() {
|
||||
return iterator.get_size();
|
||||
}
|
||||
|
||||
int _Result::get_wordCount() {
|
||||
return iterator.get_wordCount();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_CTPP2
|
||||
|
||||
string Searcher::getHtml() {
|
||||
|
|
Loading…
Reference in New Issue