Format all the code using clang-format.

Add a script `format_code.sh` to easily format the code.
This commit is contained in:
Matthieu Gautier 2017-07-05 15:21:57 +02:00
parent a205ff00c8
commit f76e9d2dbf
28 changed files with 2647 additions and 2190 deletions

12
.clang-format Normal file
View File

@ -0,0 +1,12 @@
BasedOnStyle: Google
BinPackArguments: false
BinPackParameters: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Linux
DerivePointerAlignment: false
SpacesInContainerLiterals: false
Standard: Cpp11
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false

36
format_code.sh Executable file
View File

@ -0,0 +1,36 @@
#!/usr/bin/bash
files=(
"include/library.h"
"include/common/stringTools.h"
"include/common/pathTools.h"
"include/common/otherTools.h"
"include/common/regexTools.h"
"include/common/networkTools.h"
"include/manager.h"
"include/reader.h"
"include/kiwix.h"
"include/xapianSearcher.h"
"include/searcher.h"
"src/library.cpp"
"src/android/kiwix.cpp"
"src/android/org/kiwix/kiwixlib/JNIKiwixBool.java"
"src/android/org/kiwix/kiwixlib/JNIKiwix.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixString.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixInt.java"
"src/searcher.cpp"
"src/common/pathTools.cpp"
"src/common/regexTools.cpp"
"src/common/otherTools.cpp"
"src/common/networkTools.cpp"
"src/common/stringTools.cpp"
"src/xapianSearcher.cpp"
"src/manager.cpp"
"src/reader.cpp"
)
for i in "${files[@]}"
do
echo $i
clang-format -i -style=file $i
done

View File

@ -24,25 +24,26 @@
#include <winsock2.h> #include <winsock2.h>
#include <ws2tcpip.h> #include <ws2tcpip.h>
#else #else
#include <net/if.h>
#include <netdb.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <unistd.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/types.h> #include <sys/types.h>
#include <net/if.h> #include <unistd.h>
#include <netdb.h>
#endif #endif
#include <iostream> #include <iostream>
#include <vector>
#include <string>
#include <map> #include <map>
#include <string>
#include <vector>
namespace kiwix { namespace kiwix
std::map<std::string, std::string> getNetworkInterfaces(); {
std::string getBestPublicIp(); std::map<std::string, std::string> getNetworkInterfaces();
std::string getBestPublicIp();
} }
#endif #endif

View File

@ -26,8 +26,9 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
namespace kiwix { namespace kiwix
void sleep(unsigned int milliseconds); {
void sleep(unsigned int milliseconds);
} }
#endif #endif

View File

@ -20,18 +20,18 @@
#ifndef KIWIX_PATHTOOLS_H #ifndef KIWIX_PATHTOOLS_H
#define KIWIX_PATHTOOLS_H #define KIWIX_PATHTOOLS_H
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fstream>
#include <ios>
#include <iostream>
#include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include <sstream>
#include <iostream>
#include <fstream>
#include <string.h>
#include <stdio.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <ios>
#include <limits.h>
#ifdef _WIN32 #ifdef _WIN32
#include <direct.h> #include <direct.h>
@ -41,20 +41,21 @@
using namespace std; using namespace std;
bool isRelativePath(const string &path); bool isRelativePath(const string& path);
string computeAbsolutePath(const string path, const string relativePath); string computeAbsolutePath(const string path, const string relativePath);
string computeRelativePath(const string path, const string absolutePath); string computeRelativePath(const string path, const string absolutePath);
string removeLastPathElement(const string path, const bool removePreSeparator = false, string removeLastPathElement(const string path,
const bool removePreSeparator = false,
const bool removePostSeparator = false); const bool removePostSeparator = false);
string appendToDirectory(const string &directoryPath, const string &filename); string appendToDirectory(const string& directoryPath, const string& filename);
unsigned int getFileSize(const string &path); unsigned int getFileSize(const string& path);
string getFileSizeAsString(const string &path); string getFileSizeAsString(const string& path);
bool fileExists(const string &path); bool fileExists(const string& path);
bool makeDirectory(const string &path); bool makeDirectory(const string& path);
bool copyFile(const string &sourcePath, const string &destPath); bool copyFile(const string& sourcePath, const string& destPath);
string getLastPathElement(const string &path); string getLastPathElement(const string& path);
string getExecutablePath(); string getExecutablePath();
string getCurrentDirectory(); string getCurrentDirectory();
bool writeTextFile(const string &path, const string &content); bool writeTextFile(const string& path, const string& content);
#endif #endif

View File

@ -22,11 +22,15 @@
#include <unicode/regex.h> #include <unicode/regex.h>
#include <unicode/ucnv.h> #include <unicode/ucnv.h>
#include <string>
#include <map> #include <map>
#include <string>
bool matchRegex(const std::string &content, const std::string &regex); bool matchRegex(const std::string& content, const std::string& regex);
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string &regex); std::string replaceRegex(const std::string& content,
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement); const std::string& replacement,
const std::string& regex);
std::string appendToFirstOccurence(const std::string& content,
const std::string regex,
const std::string& replacement);
#endif #endif

View File

@ -22,44 +22,46 @@
#include <unicode/unistr.h> #include <unicode/unistr.h>
#include <iostream>
#include <vector>
#include <string>
#include <fstream> #include <fstream>
#include <iostream>
#include <sstream> #include <sstream>
#include <string>
#include <vector>
#include "pathTools.h" #include "pathTools.h"
namespace kiwix { namespace kiwix
{
#ifndef __ANDROID__ #ifndef __ANDROID__
std::string beautifyInteger(const unsigned int number); std::string beautifyInteger(const unsigned int number);
std::string beautifyFileSize(const unsigned int number); std::string beautifyFileSize(const unsigned int number);
std::string urlEncode(const std::string &c); std::string urlEncode(const std::string& c);
void printStringInHexadecimal(const char *s); void printStringInHexadecimal(const char* s);
void printStringInHexadecimal(UnicodeString s); void printStringInHexadecimal(UnicodeString s);
void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr); void stringReplacement(std::string& str,
std::string encodeDiples(const std::string& str); const std::string& oldStr,
const std::string& newStr);
std::string encodeDiples(const std::string& str);
#endif #endif
std::string removeAccents(const std::string &text); std::string removeAccents(const std::string& text);
void loadICUExternalTables(); void loadICUExternalTables();
std::string urlDecode(const std::string &c); std::string urlDecode(const std::string& c);
std::vector<std::string> split(const std::string&, const std::string&); std::vector<std::string> split(const std::string&, const std::string&);
std::vector<std::string> split(const char*, const char*); std::vector<std::string> split(const char*, const char*);
std::vector<std::string> split(const std::string&, const char*); std::vector<std::string> split(const std::string&, const char*);
std::vector<std::string> split(const char*, const std::string&); std::vector<std::string> split(const char*, const std::string&);
std::string ucAll(const std::string &word); std::string ucAll(const std::string& word);
std::string lcAll(const std::string &word); std::string lcAll(const std::string& word);
std::string ucFirst(const std::string &word); std::string ucFirst(const std::string& word);
std::string lcFirst(const std::string &word); std::string lcFirst(const std::string& word);
std::string toTitle(const std::string &word); std::string toTitle(const std::string& word);
std::string normalize(const std::string &word); std::string normalize(const std::string& word);
} }
#endif #endif

View File

@ -22,5 +22,4 @@
#include "library.h" #include "library.h"
#endif #endif

View File

@ -22,35 +22,35 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string>
#include <string.h> #include <string.h>
#include <vector>
#include <stack> #include <stack>
#include <string>
#include <vector>
#include "common/stringTools.h"
#include "common/regexTools.h" #include "common/regexTools.h"
#include "common/stringTools.h"
#define KIWIX_LIBRARY_VERSION "20110515" #define KIWIX_LIBRARY_VERSION "20110515"
using namespace std; using namespace std;
namespace kiwix { namespace kiwix
{
enum supportedIndexType { UNKNOWN, XAPIAN }; enum supportedIndexType { UNKNOWN, XAPIAN };
class Book {
class Book
{
public: public:
Book(); Book();
~Book(); ~Book();
static bool sortByLastOpen(const Book &a, const Book &b); static bool sortByLastOpen(const Book& a, const Book& b);
static bool sortByTitle(const Book &a, const Book &b); static bool sortByTitle(const Book& a, const Book& b);
static bool sortBySize(const Book &a, const Book &b); static bool sortBySize(const Book& a, const Book& b);
static bool sortByDate(const Book &a, const Book &b); static bool sortByDate(const Book& a, const Book& b);
static bool sortByCreator(const Book &a, const Book &b); static bool sortByCreator(const Book& a, const Book& b);
static bool sortByPublisher(const Book &a, const Book &b); static bool sortByPublisher(const Book& a, const Book& b);
static bool sortByLanguage(const Book &a, const Book &b); static bool sortByLanguage(const Book& a, const Book& b);
string getHumanReadableIdFromPath(); string getHumanReadableIdFromPath();
string id; string id;
@ -76,18 +76,18 @@ namespace kiwix {
string size; string size;
string favicon; string favicon;
string faviconMimeType; string faviconMimeType;
}; };
class Library {
class Library
{
public: public:
Library(); Library();
~Library(); ~Library();
string version; string version;
bool addBook(const Book &book); bool addBook(const Book& book);
bool removeBookByIndex(const unsigned int bookIndex); bool removeBookByIndex(const unsigned int bookIndex);
vector <kiwix::Book> books; vector<kiwix::Book> books;
/* /*
* 'current' is the variable storing the current content/book id * 'current' is the variable storing the current content/book id
@ -100,8 +100,7 @@ namespace kiwix {
* failed. That is the reason why we need a stack here * failed. That is the reason why we need a stack here
*/ */
stack<string> current; stack<string> current;
}; };
} }
#endif #endif

View File

@ -20,54 +20,69 @@
#ifndef KIWIX_MANAGER_H #ifndef KIWIX_MANAGER_H
#define KIWIX_MANAGER_H #define KIWIX_MANAGER_H
#include <string>
#include <sstream>
#include <time.h> #include <time.h>
#include <sstream>
#include <string>
#include <pugixml.hpp> #include <pugixml.hpp>
#include "common/base64.h" #include "common/base64.h"
#include "common/regexTools.h"
#include "common/pathTools.h" #include "common/pathTools.h"
#include "common/regexTools.h"
#include "library.h" #include "library.h"
#include "reader.h" #include "reader.h"
using namespace std; using namespace std;
namespace kiwix { namespace kiwix
{
enum supportedListMode { LASTOPEN, REMOTE, LOCAL }; enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER }; enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
class Manager {
class Manager
{
public: public:
Manager(); Manager();
~Manager(); ~Manager();
bool readFile(const string path, const bool readOnly = true); bool readFile(const string path, const bool readOnly = true);
bool readFile(const string nativePath, const string UTF8Path, const bool readOnly = true); bool readFile(const string nativePath,
bool readXml(const string xml, const bool readOnly = true, const string libraryPath = ""); const string UTF8Path,
const bool readOnly = true);
bool readXml(const string xml,
const bool readOnly = true,
const string libraryPath = "");
bool writeFile(const string path); bool writeFile(const string path);
bool removeBookByIndex(const unsigned int bookIndex); bool removeBookByIndex(const unsigned int bookIndex);
bool removeBookById(const string id); bool removeBookById(const string id);
bool setCurrentBookId(const string id); bool setCurrentBookId(const string id);
string getCurrentBookId(); string getCurrentBookId();
bool setBookIndex(const string id, const string path, const supportedIndexType type); bool setBookIndex(const string id,
const string path,
const supportedIndexType type);
bool setBookIndex(const string id, const string path); bool setBookIndex(const string id, const string path);
bool setBookPath(const string id, const string path); bool setBookPath(const string id, const string path);
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", string addBookFromPathAndGetId(const string pathToOpen,
const string pathToSave = "",
const string url = "",
const bool checkMetaData = false); const bool checkMetaData = false);
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", bool addBookFromPath(const string pathToOpen,
const string pathToSave = "",
const string url = "",
const bool checkMetaData = false); const bool checkMetaData = false);
Library cloneLibrary(); Library cloneLibrary();
bool getBookById(const string id, Book &book); bool getBookById(const string id, Book& book);
bool getCurrentBook(Book &book); bool getCurrentBook(Book& book);
unsigned int getBookCount(const bool localBooks, const bool remoteBooks); unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
bool updateBookLastOpenDateById(const string id); bool updateBookLastOpenDateById(const string id);
void removeBookPaths(); void removeBookPaths();
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, bool listBooks(const supportedListMode mode,
const string language, const string creator, const string publisher, const string search); const supportedListSortBy sortBy,
const unsigned int maxSize,
const string language,
const string creator,
const string publisher,
const string search);
vector<string> getBooksLanguages(); vector<string> getBooksLanguages();
vector<string> getBooksCreators(); vector<string> getBooksCreators();
vector<string> getBooksPublishers(); vector<string> getBooksPublishers();
@ -80,13 +95,14 @@ namespace kiwix {
protected: protected:
kiwix::Library library; kiwix::Library library;
bool readBookFromPath(const string path, Book *book = NULL); bool readBookFromPath(const string path, Book* book = NULL);
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath); bool parseXmlDom(const pugi::xml_document& doc,
const bool readOnly,
const string libraryPath);
private: private:
void checkAndCleanBookPaths(Book &book, const string &libraryPath); void checkAndCleanBookPaths(Book& book, const string& libraryPath);
}; };
} }
#endif #endif

View File

@ -20,24 +20,24 @@
#ifndef KIWIX_READER_H #ifndef KIWIX_READER_H
#define KIWIX_READER_H #define KIWIX_READER_H
#include <zim/zim.h>
#include <zim/file.h>
#include <zim/article.h>
#include <zim/fileiterator.h>
#include <stdio.h> #include <stdio.h>
#include <string> #include <zim/article.h>
#include <zim/file.h>
#include <zim/fileiterator.h>
#include <zim/zim.h>
#include <exception> #include <exception>
#include <sstream>
#include <map> #include <map>
#include <sstream>
#include <string>
#include "common/pathTools.h" #include "common/pathTools.h"
#include "common/stringTools.h" #include "common/stringTools.h"
using namespace std; using namespace std;
namespace kiwix { namespace kiwix
{
class Reader { class Reader
{
public: public:
Reader(const string zimFilePath); Reader(const string zimFilePath);
~Reader(); ~Reader();
@ -51,7 +51,7 @@ namespace kiwix {
string getRandomPageUrl() const; string getRandomPageUrl() const;
string getFirstPageUrl() const; string getFirstPageUrl() const;
string getMainPageUrl() const; string getMainPageUrl() const;
bool getMetatag(const string &url, string &content) const; bool getMetatag(const string& url, string& content) const;
string getTitle() const; string getTitle() const;
string getDescription() const; string getDescription() const;
string getLanguage() const; string getLanguage() const;
@ -61,27 +61,48 @@ namespace kiwix {
string getCreator() const; string getCreator() const;
string getPublisher() const; string getPublisher() const;
string getOrigId() const; string getOrigId() const;
bool getFavicon(string &content, string &mimeType) const; bool getFavicon(string& content, string& mimeType) const;
bool getPageUrlFromTitle(const string &title, string &url) const; bool getPageUrlFromTitle(const string& title, string& url) const;
bool getMimeTypeByUrl(const string &url, string &mimeType) const; bool getMimeTypeByUrl(const string& url, string& mimeType) const;
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; bool getContentByUrl(const string& url,
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const; string& content,
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; unsigned int& contentLength,
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const; string& contentType) const;
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; bool getContentByEncodedUrl(const string& url,
bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true); string& content,
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount); unsigned int& contentLength,
bool urlExists(const string &url) const; string& contentType,
string& baseUrl) const;
bool getContentByEncodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const;
bool getContentByDecodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
bool getContentByDecodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const;
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
bool urlExists(const string& url) const;
bool hasFulltextIndex() const; bool hasFulltextIndex() const;
std::vector<std::string> getTitleVariants(const std::string &title) const; std::vector<std::string> getTitleVariants(const std::string& title) const;
bool getNextSuggestion(string &title); bool getNextSuggestion(string& title);
bool getNextSuggestion(string &title, string &url); bool getNextSuggestion(string& title, string& url);
bool canCheckIntegrity() const; bool canCheckIntegrity() const;
bool isCorrupted() const; bool isCorrupted() const;
bool parseUrl(const string &url, char *ns, string &title) const; bool parseUrl(const string& url, char* ns, string& title) const;
unsigned int getFileSize() const; unsigned int getFileSize() const;
zim::File* getZimFileHandler() const; zim::File* getZimFileHandler() const;
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const; bool getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const;
protected: protected:
zim::File* zimFileHandler; zim::File* zimFileHandler;
@ -92,13 +113,12 @@ namespace kiwix {
zim::size_type nsICount; zim::size_type nsICount;
std::string zimFilePath; std::string zimFilePath;
std::vector< std::vector<std::string> > suggestions; std::vector<std::vector<std::string>> suggestions;
std::vector< std::vector<std::string> >::iterator suggestionsOffset; std::vector<std::vector<std::string>>::iterator suggestionsOffset;
private: private:
std::map<const std::string, unsigned int> parseCounterMetadata() const; std::map<const std::string, unsigned int> parseCounterMetadata() const;
}; };
} }
#endif #endif

View File

@ -22,49 +22,52 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string> #include <unicode/putil.h>
#include <algorithm> #include <algorithm>
#include <vector>
#include <locale>
#include <cctype> #include <cctype>
#include <locale>
#include <string>
#include <vector>
#include <vector> #include <vector>
#include "common/pathTools.h" #include "common/pathTools.h"
#include "common/stringTools.h" #include "common/stringTools.h"
#include <unicode/putil.h>
#include "kiwix_config.h" #include "kiwix_config.h"
using namespace std; using namespace std;
namespace kiwix { namespace kiwix
class Reader; {
class Result { class Reader;
class Result
{
public: public:
virtual ~Result() {}; virtual ~Result(){};
virtual std::string get_url() = 0; virtual std::string get_url() = 0;
virtual std::string get_title() = 0; virtual std::string get_title() = 0;
virtual int get_score() = 0; virtual int get_score() = 0;
virtual std::string get_snippet() = 0; virtual std::string get_snippet() = 0;
virtual int get_wordCount() = 0; virtual int get_wordCount() = 0;
virtual int get_size() = 0; virtual int get_size() = 0;
}; };
struct SearcherInternal;
class Searcher {
struct SearcherInternal;
class Searcher
{
public: public:
Searcher(const string &xapianDirectoryPath, Reader* reader); Searcher(const string& xapianDirectoryPath, Reader* reader);
~Searcher(); ~Searcher();
void search(std::string &search, unsigned int resultStart, void search(std::string& search,
unsigned int resultEnd, const bool verbose=false); unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
Result* getNextResult(); Result* getNextResult();
void restart_search(); void restart_search();
unsigned int getEstimatedResultCount(); unsigned int getEstimatedResultCount();
bool setProtocolPrefix(const std::string prefix); bool setProtocolPrefix(const std::string prefix);
bool setSearchProtocolPrefix(const std::string prefix); bool setSearchProtocolPrefix(const std::string prefix);
void reset(); void reset();
void setContentHumanReadableId(const string &contentHumanReadableId); void setContentHumanReadableId(const string& contentHumanReadableId);
#ifdef ENABLE_CTPP2 #ifdef ENABLE_CTPP2
string getHtml(); string getHtml();
@ -72,9 +75,11 @@ namespace kiwix {
protected: protected:
std::string beautifyInteger(const unsigned int number); std::string beautifyInteger(const unsigned int number);
void closeIndex() ; void closeIndex();
void searchInIndex(string &search, const unsigned int resultStart, void searchInIndex(string& search,
const unsigned int resultEnd, const bool verbose=false); const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose = false);
Reader* reader; Reader* reader;
SearcherInternal* internal; SearcherInternal* internal;
@ -87,8 +92,7 @@ namespace kiwix {
unsigned int resultStart; unsigned int resultStart;
unsigned int resultEnd; unsigned int resultEnd;
std::string contentHumanReadableId; std::string contentHumanReadableId;
}; };
} }
#endif #endif

View File

@ -21,22 +21,23 @@
#define KIWIX_XAPIAN_SEARCHER_H #define KIWIX_XAPIAN_SEARCHER_H
#include <xapian.h> #include <xapian.h>
#include "searcher.h"
#include "reader.h" #include "reader.h"
#include "searcher.h"
#include <map> #include <map>
#include <string> #include <string>
using namespace std; using namespace std;
namespace kiwix { namespace kiwix
{
class XapianSearcher;
class XapianSearcher; class XapianResult : public Result
{
class XapianResult : public Result {
public: public:
XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator); XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator);
virtual ~XapianResult() {}; virtual ~XapianResult(){};
virtual std::string get_url(); virtual std::string get_url();
virtual std::string get_title(); virtual std::string get_title();
@ -49,21 +50,27 @@ namespace kiwix {
XapianSearcher* searcher; XapianSearcher* searcher;
Xapian::MSetIterator iterator; Xapian::MSetIterator iterator;
Xapian::Document document; Xapian::Document document;
}; };
class NoXapianIndexInZim: public exception { class NoXapianIndexInZim : public exception
virtual const char* what() const throw() { {
virtual const char* what() const throw()
{
return "There is no fulltext index in the zim file"; return "There is no fulltext index in the zim file";
} }
}; };
class XapianSearcher { class XapianSearcher
{
friend class XapianResult; friend class XapianResult;
public: public:
XapianSearcher(const string &xapianDirectoryPath, Reader* reader); XapianSearcher(const string& xapianDirectoryPath, Reader* reader);
virtual ~XapianSearcher() {}; virtual ~XapianSearcher(){};
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, void searchInIndex(string& search,
const bool verbose=false); const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose = false);
virtual Result* getNextResult(); virtual Result* getNextResult();
void restart_search(); void restart_search();
@ -71,7 +78,7 @@ namespace kiwix {
protected: protected:
void closeIndex(); void closeIndex();
void openIndex(const string &xapianDirectoryPath); void openIndex(const string& xapianDirectoryPath);
void setup_queryParser(); void setup_queryParser();
Reader* reader; Reader* reader;
@ -83,8 +90,7 @@ namespace kiwix {
Xapian::SimpleStopper stopper; Xapian::SimpleStopper stopper;
Xapian::MSetIterator current_result; Xapian::MSetIterator current_result;
std::map<std::string, int> valuesmap; std::map<std::string, int> valuesmap;
}; };
} }
#endif #endif

View File

@ -7,78 +7,85 @@
#include <iostream> #include <iostream>
#include <string> #include <string>
#include "unicode/putil.h" #include "common/base64.h"
#include "reader.h" #include "reader.h"
#include "searcher.h" #include "searcher.h"
#include "common/base64.h" #include "unicode/putil.h"
#include <android/log.h> #include <android/log.h>
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__) #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__)
#include <xapian.h> #include <xapian.h>
#include <zim/zim.h>
#include <zim/file.h>
#include <zim/article.h> #include <zim/article.h>
#include <zim/error.h> #include <zim/error.h>
#include <zim/file.h>
#include <zim/zim.h>
/* global variables */ /* global variables */
kiwix::Reader *reader = NULL; kiwix::Reader* reader = NULL;
kiwix::Searcher *searcher = NULL; kiwix::Searcher* searcher = NULL;
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
/* c2jni type conversion functions */ /* c2jni type conversion functions */
jboolean c2jni(const bool &val) { jboolean c2jni(const bool& val)
{
return val ? JNI_TRUE : JNI_FALSE; return val ? JNI_TRUE : JNI_FALSE;
} }
jstring c2jni(const std::string& val, JNIEnv* env)
jstring c2jni(const std::string &val, JNIEnv *env) { {
return env->NewStringUTF(val.c_str()); return env->NewStringUTF(val.c_str());
} }
jint c2jni(const int val) { jint c2jni(const int val)
{
return (jint)val; return (jint)val;
} }
jint c2jni(const unsigned val)
jint c2jni(const unsigned val) { {
return (unsigned)val; return (unsigned)val;
} }
/* jni2c type conversion functions */ /* jni2c type conversion functions */
bool jni2c(const jboolean &val) { bool jni2c(const jboolean& val)
{
return val == JNI_TRUE; return val == JNI_TRUE;
} }
std::string jni2c(const jstring& val, JNIEnv* env)
std::string jni2c(const jstring &val, JNIEnv *env) { {
return std::string(env->GetStringUTFChars(val, 0)); return std::string(env->GetStringUTFChars(val, 0));
} }
int jni2c(const jint val) { int jni2c(const jint val)
{
return (int)val; return (int)val;
} }
/* Method to deal with variable passed by reference */ /* Method to deal with variable passed by reference */
void setStringObjValue(const std::string &value, const jobject obj, JNIEnv *env) { void setStringObjValue(const std::string& value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj); jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;"); jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;");
env->SetObjectField(obj, objFid, c2jni(value, env)); env->SetObjectField(obj, objFid, c2jni(value, env));
} }
void setIntObjValue(const int value, const jobject obj, JNIEnv *env) { void setIntObjValue(const int value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj); jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "I"); jfieldID objFid = env->GetFieldID(objClass, "value", "I");
env->SetIntField(obj, objFid, value); env->SetIntField(obj, objFid, value);
} }
void setBoolObjValue(const bool value, const jobject obj, JNIEnv *env) { void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj); jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Z"); jfieldID objFid = env->GetFieldID(objClass, "value", "Z");
env->SetIntField(obj, objFid, c2jni(value)); env->SetIntField(obj, objFid, c2jni(value));
} }
/* Kiwix library functions */ /* Kiwix library functions */
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv* env, jobject obj)
{
jstring url; jstring url;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -95,7 +102,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv *e
return url; return url;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv* env,
jobject obj)
{
jstring id; jstring id;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -112,7 +121,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv *env, jo
return id; return id;
} }
JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv *env, jobject obj) { JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv* env,
jobject obj)
{
jint size; jint size;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -129,7 +140,9 @@ JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv *env,
return size; return size;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv* env, jobject obj)
{
jstring creator; jstring creator;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -146,7 +159,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv *en
return creator; return creator;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv* env, jobject obj)
{
jstring publisher; jstring publisher;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -163,7 +178,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv *
return publisher; return publisher;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv* env,
jobject obj)
{
jstring name; jstring name;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -180,8 +197,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv *env,
return name; return name;
} }
JNIEXPORT jstring JNICALL
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv *env, jobject obj) { Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv* env, jobject obj)
{
jstring favicon; jstring favicon;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -190,7 +208,11 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv *en
std::string cContent; std::string cContent;
std::string cMime; std::string cMime;
reader->getFavicon(cContent, cMime); reader->getFavicon(cContent, cMime);
favicon = c2jni(base64_encode(reinterpret_cast<const unsigned char*>(cContent.c_str()), cContent.length()), env); favicon
= c2jni(base64_encode(
reinterpret_cast<const unsigned char*>(cContent.c_str()),
cContent.length()),
env);
} catch (...) { } catch (...) {
std::cerr << "Unable to get ZIM favicon" << std::endl; std::cerr << "Unable to get ZIM favicon" << std::endl;
} }
@ -200,7 +222,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv *en
return favicon; return favicon;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv* env,
jobject obj)
{
jstring date; jstring date;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -217,7 +241,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv *env,
return date; return date;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv* env, jobject obj)
{
jstring language; jstring language;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -234,7 +260,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv *e
return language; return language;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(JNIEnv *env, jobject obj, jstring url) { JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(
JNIEnv* env, jobject obj, jstring url)
{
jstring mimeType; jstring mimeType;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -253,13 +281,17 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(JNIEnv *e
return mimeType; return mimeType;
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv *env, jobject obj, jstring path) { JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv* env, jobject obj, jstring path)
{
jboolean retVal = JNI_TRUE; jboolean retVal = JNI_TRUE;
std::string cPath = jni2c(path, env); std::string cPath = jni2c(path, env);
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
try { try {
if (reader != NULL) delete reader; if (reader != NULL) {
delete reader;
}
reader = new kiwix::Reader(cPath); reader = new kiwix::Reader(cPath);
} catch (...) { } catch (...) {
std::cerr << "Unable to load ZIM " << cPath << std::endl; std::cerr << "Unable to load ZIM " << cPath << std::endl;
@ -271,8 +303,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv *env,
return retVal; return retVal;
} }
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv *env, jobject obj, jstring url, jobject mimeTypeObj, jobject sizeObj) { JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(
JNIEnv* env, jobject obj, jstring url, jobject mimeTypeObj, jobject sizeObj)
{
/* Default values */ /* Default values */
setStringObjValue("", mimeTypeObj, env); setStringObjValue("", mimeTypeObj, env);
setIntObjValue(0, sizeObj, env); setIntObjValue(0, sizeObj, env);
@ -289,7 +322,8 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv
try { try {
if (reader->getContentByUrl(cUrl, cData, cSize, cMimeType)) { if (reader->getContentByUrl(cUrl, cData, cSize, cMimeType)) {
data = env->NewByteArray(cSize); data = env->NewByteArray(cSize);
env->SetByteArrayRegion(data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str())); env->SetByteArrayRegion(
data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
setStringObjValue(cMimeType, mimeTypeObj, env); setStringObjValue(cMimeType, mimeTypeObj, env);
setIntObjValue(cSize, sizeObj, env); setIntObjValue(cSize, sizeObj, env);
} }
@ -302,8 +336,9 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv
return data; return data;
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions(
(JNIEnv *env, jobject obj, jstring prefix, jint count) { JNIEnv* env, jobject obj, jstring prefix, jint count)
{
jboolean retVal = JNI_FALSE; jboolean retVal = JNI_FALSE;
std::string cPrefix = jni2c(prefix, env); std::string cPrefix = jni2c(prefix, env);
unsigned int cCount = jni2c(count); unsigned int cCount = jni2c(count);
@ -316,15 +351,17 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions
} }
} }
} catch (...) { } catch (...) {
std::cerr << "Unable to search suggestions for pattern " << cPrefix << std::endl; std::cerr << "Unable to search suggestions for pattern " << cPrefix
<< std::endl;
} }
pthread_mutex_unlock(&readerLock); pthread_mutex_unlock(&readerLock);
return retVal; return retVal;
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion(
(JNIEnv *env, jobject obj, jobject titleObj) { JNIEnv* env, jobject obj, jobject titleObj)
{
jboolean retVal = JNI_FALSE; jboolean retVal = JNI_FALSE;
std::string cTitle; std::string cTitle;
@ -344,8 +381,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion
return retVal; return retVal;
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle(
(JNIEnv *env, jobject obj, jstring title, jobject urlObj) { JNIEnv* env, jobject obj, jstring title, jobject urlObj)
{
jboolean retVal = JNI_FALSE; jboolean retVal = JNI_FALSE;
std::string cTitle = jni2c(title, env); std::string cTitle = jni2c(title, env);
std::string cUrl; std::string cUrl;
@ -366,8 +404,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle
return retVal; return retVal;
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle(
(JNIEnv *env , jobject obj, jobject titleObj) { JNIEnv* env, jobject obj, jobject titleObj)
{
jboolean retVal = JNI_FALSE; jboolean retVal = JNI_FALSE;
std::string cTitle; std::string cTitle;
@ -384,10 +423,11 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle
pthread_mutex_unlock(&readerLock); pthread_mutex_unlock(&readerLock);
return retVal; return retVal;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv *env, jobject obj) { JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv* env, jobject obj)
{
jstring description; jstring description;
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -404,8 +444,9 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv
return description; return description;
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage(
(JNIEnv *env, jobject obj, jobject urlObj) { JNIEnv* env, jobject obj, jobject urlObj)
{
jboolean retVal = JNI_FALSE; jboolean retVal = JNI_FALSE;
std::string cUrl; std::string cUrl;
@ -424,8 +465,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage
return retVal; return retVal;
} }
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory(
(JNIEnv *env, jobject obj, jstring dirStr) { JNIEnv* env, jobject obj, jstring dirStr)
{
std::string cPath = jni2c(dirStr, env); std::string cPath = jni2c(dirStr, env);
pthread_mutex_lock(&readerLock); pthread_mutex_lock(&readerLock);
@ -437,14 +479,18 @@ JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory
pthread_mutex_unlock(&readerLock); pthread_mutex_unlock(&readerLock);
} }
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JNIEnv *env, jobject obj, jstring path) { JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
JNIEnv* env, jobject obj, jstring path)
{
jboolean retVal = JNI_TRUE; jboolean retVal = JNI_TRUE;
std::string cPath = jni2c(path, env); std::string cPath = jni2c(path, env);
pthread_mutex_lock(&searcherLock); pthread_mutex_lock(&searcherLock);
searcher = NULL; searcher = NULL;
try { try {
if (searcher != NULL) delete searcher; if (searcher != NULL) {
delete searcher;
}
searcher = new kiwix::Searcher(cPath, reader); searcher = new kiwix::Searcher(cPath, reader);
} catch (...) { } catch (...) {
searcher = NULL; searcher = NULL;
@ -456,20 +502,21 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
return retVal; return retVal;
} }
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery(
(JNIEnv *env, jclass obj, jstring query, jint count) { JNIEnv* env, jclass obj, jstring query, jint count)
{
std::string cQuery = jni2c(query, env); std::string cQuery = jni2c(query, env);
unsigned int cCount = jni2c(count); unsigned int cCount = jni2c(count);
kiwix::Result *p_result; kiwix::Result* p_result;
std::string result; std::string result;
pthread_mutex_lock(&searcherLock); pthread_mutex_lock(&searcherLock);
try { try {
if (searcher != NULL) { if (searcher != NULL) {
searcher->search(cQuery, 0, count); searcher->search(cQuery, 0, count);
while ( (p_result = searcher->getNextResult()) && while ((p_result = searcher->getNextResult())
!(p_result->get_title().empty()) && && !(p_result->get_title().empty())
!(p_result->get_url().empty())) { && !(p_result->get_url().empty())) {
result += p_result->get_title() + "\n"; result += p_result->get_title() + "\n";
delete p_result; delete p_result;
} }
@ -481,5 +528,3 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
return env->NewStringUTF(result.c_str()); return env->NewStringUTF(result.c_str());
} }

View File

@ -23,12 +23,9 @@ import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixBool; import org.kiwix.kiwixlib.JNIKiwixBool;
import org.kiwix.kiwixlib.JNIKiwixInt; import org.kiwix.kiwixlib.JNIKiwixInt;
public class JNIKiwix { public class JNIKiwix
{
static { static { System.loadLibrary("kiwix"); }
System.loadLibrary("kiwix");
}
public native String getMainPage(); public native String getMainPage();
public native String getId(); public native String getId();

View File

@ -19,7 +19,7 @@
package org.kiwix.kiwixlib; package org.kiwix.kiwixlib;
public class JNIKiwixBool { public class JNIKiwixBool
{
public boolean value; public boolean value;
} }

View File

@ -19,8 +19,7 @@
package org.kiwix.kiwixlib; package org.kiwix.kiwixlib;
public class JNIKiwixInt { public class JNIKiwixInt
{
public int value; public int value;
} }

View File

@ -19,7 +19,7 @@
package org.kiwix.kiwixlib; package org.kiwix.kiwixlib;
public class JNIKiwixString { public class JNIKiwixString
{
public String value; public String value;
} }

View File

@ -19,44 +19,54 @@
#include <common/networkTools.h> #include <common/networkTools.h>
std::map<std::string, std::string> kiwix::getNetworkInterfaces() { std::map<std::string, std::string> kiwix::getNetworkInterfaces()
{
std::map<std::string, std::string> interfaces; std::map<std::string, std::string> interfaces;
#ifdef _WIN32 #ifdef _WIN32
SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0); SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
if (sd == SOCKET_ERROR) { if (sd == SOCKET_ERROR) {
std::cerr << "Failed to get a socket. Error " << WSAGetLastError() << std::cerr << "Failed to get a socket. Error " << WSAGetLastError()
std::endl; << std::endl;
return interfaces; return interfaces;
} }
INTERFACE_INFO InterfaceList[20]; INTERFACE_INFO InterfaceList[20];
unsigned long nBytesReturned; unsigned long nBytesReturned;
if (WSAIoctl(sd, SIO_GET_INTERFACE_LIST, 0, 0, &InterfaceList, if (WSAIoctl(sd,
sizeof(InterfaceList), &nBytesReturned, 0, 0) == SOCKET_ERROR) { SIO_GET_INTERFACE_LIST,
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() << 0,
std::endl; 0,
&InterfaceList,
sizeof(InterfaceList),
&nBytesReturned,
0,
0)
== SOCKET_ERROR) {
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError()
<< std::endl;
return interfaces; return interfaces;
} }
int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO); int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
for (int i = 0; i < nNumInterfaces; ++i) { for (int i = 0; i < nNumInterfaces; ++i) {
sockaddr_in *pAddress; sockaddr_in* pAddress;
pAddress = (sockaddr_in *) & (InterfaceList[i].iiAddress); pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress);
/* Add to the map */ /* Add to the map */
std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr)); std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr)); std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr));
interfaces.insert(std::pair<std::string, std::string>(interfaceName, interfaceIp)); interfaces.insert(
std::pair<std::string, std::string>(interfaceName, interfaceIp));
} }
#else #else
/* Get Network interfaces information */ /* Get Network interfaces information */
char buf[16384]; char buf[16384];
struct ifconf ifconf; struct ifconf ifconf;
int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */ int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
ifconf.ifc_len=sizeof buf; ifconf.ifc_len = sizeof buf;
ifconf.ifc_buf=buf; ifconf.ifc_buf = buf;
if(ioctl(fd, SIOCGIFCONF, &ifconf)!=0) { if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) {
perror("ioctl(SIOCGIFCONF)"); perror("ioctl(SIOCGIFCONF)");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
@ -64,74 +74,87 @@ std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
/* Go through each interface */ /* Go through each interface */
int i; int i;
size_t len; size_t len;
struct ifreq *ifreq; struct ifreq* ifreq;
ifreq = ifconf.ifc_req; ifreq = ifconf.ifc_req;
for (i = 0; i < ifconf.ifc_len; ) { for (i = 0; i < ifconf.ifc_len;) {
if (ifreq->ifr_addr.sa_family == AF_INET) { if (ifreq->ifr_addr.sa_family == AF_INET) {
/* Get the network interface ip */ /* Get the network interface ip */
char host[128] = { 0 }; char host[128] = {0};
const int error = getnameinfo(&(ifreq->ifr_addr), sizeof ifreq->ifr_addr, const int error = getnameinfo(&(ifreq->ifr_addr),
host, sizeof host, sizeof ifreq->ifr_addr,
0, 0, NI_NUMERICHOST); host,
sizeof host,
0,
0,
NI_NUMERICHOST);
if (!error) { if (!error) {
std::string interfaceName = std::string(ifreq->ifr_name); std::string interfaceName = std::string(ifreq->ifr_name);
std::string interfaceIp = std::string(host); std::string interfaceIp = std::string(host);
/* Add to the map */ /* Add to the map */
interfaces.insert(std::pair<std::string, std::string>(interfaceName, interfaceIp)); interfaces.insert(
std::pair<std::string, std::string>(interfaceName, interfaceIp));
} else { } else {
perror("getnameinfo()"); perror("getnameinfo()");
} }
} }
/* some systems have ifr_addr.sa_len and adjust the length that /* some systems have ifr_addr.sa_len and adjust the length that
* way, but not mine. weird */ * way, but not mine. weird */
#ifndef __linux__ #ifndef __linux__
len=IFNAMSIZ + ifreq->ifr_addr.sa_len; len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
#else #else
len=sizeof *ifreq; len = sizeof *ifreq;
#endif #endif
ifreq=(struct ifreq*)((char*)ifreq+len); ifreq = (struct ifreq*)((char*)ifreq + len);
i+=len; i += len;
} }
#endif #endif
return interfaces; return interfaces;
} }
std::string kiwix::getBestPublicIp() { std::string kiwix::getBestPublicIp()
{
std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces(); std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces();
#ifndef _WIN32 #ifndef _WIN32
const char* const prioritizedNames[] = const char* const prioritizedNames[]
{ "eth0", "eth1", "wlan0", "wlan1", "en0", "en1" }; = {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"};
const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]); const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
std::map<std::string, std::string>::const_iterator it = std::map<std::string, std::string>::const_iterator it
interfaces.find(prioritizedNames[i]); = interfaces.find(prioritizedNames[i]);
if (it != interfaces.end()) if (it != interfaces.end()) {
return it->second; return it->second;
} }
}
#endif #endif
for (std::map<std::string, std::string>::iterator iter = interfaces.begin(); for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end(); ++iter) { iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second; std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") {
return interfaceIp; return interfaceIp;
} }
}
for (std::map<std::string, std::string>::iterator iter = interfaces.begin(); for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end(); ++iter) { iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second; std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") {
return interfaceIp; return interfaceIp;
} }
}
for (std::map<std::string, std::string>::iterator iter = interfaces.begin(); for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end(); ++iter) { iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second; std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") {
return interfaceIp; return interfaceIp;
} }
}
return "127.0.0.1"; return "127.0.0.1";
} }

View File

@ -19,7 +19,8 @@
#include <common/otherTools.h> #include <common/otherTools.h>
void kiwix::sleep(unsigned int milliseconds) { void kiwix::sleep(unsigned int milliseconds)
{
#ifdef _WIN32 #ifdef _WIN32
Sleep(milliseconds); Sleep(milliseconds);
#else #else

View File

@ -20,12 +20,12 @@
#include <common/pathTools.h> #include <common/pathTools.h>
#ifdef __APPLE__ #ifdef __APPLE__
#include <mach-o/dyld.h>
#include <limits.h> #include <limits.h>
#include <mach-o/dyld.h>
#elif _WIN32 #elif _WIN32
#include <direct.h>
#include <windows.h> #include <windows.h>
#include "shlwapi.h" #include "shlwapi.h"
#include <direct.h>
#define getcwd _getcwd // stupid MSFT "deprecation" warning #define getcwd _getcwd // stupid MSFT "deprecation" warning
#endif #endif
@ -47,7 +47,8 @@
#define PATH_MAX 1024 #define PATH_MAX 1024
#endif #endif
bool isRelativePath(const string &path) { bool isRelativePath(const string& path)
{
#ifdef _WIN32 #ifdef _WIN32
return path.empty() || path.substr(1, 2) == ":\\" ? false : true; return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
#else #else
@ -55,14 +56,16 @@ bool isRelativePath(const string &path) {
#endif #endif
} }
string computeRelativePath(const string path, const string absolutePath) { string computeRelativePath(const string path, const string absolutePath)
{
std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR); std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR);
std::vector<std::string> absolutePathParts = kiwix::split(absolutePath, SEPARATOR); std::vector<std::string> absolutePathParts
= kiwix::split(absolutePath, SEPARATOR);
unsigned int commonCount = 0; unsigned int commonCount = 0;
while (commonCount < pathParts.size() && while (commonCount < pathParts.size()
commonCount < absolutePathParts.size() && && commonCount < absolutePathParts.size()
pathParts[commonCount] == absolutePathParts[commonCount]) { && pathParts[commonCount] == absolutePathParts[commonCount]) {
if (!pathParts[commonCount].empty()) { if (!pathParts[commonCount].empty()) {
commonCount++; commonCount++;
} }
@ -77,10 +80,10 @@ string computeRelativePath(const string path, const string absolutePath) {
} }
#endif #endif
for (unsigned int i = commonCount ; i < pathParts.size() ; i++) { for (unsigned int i = commonCount; i < pathParts.size(); i++) {
relativePath += "../"; relativePath += "../";
} }
for (unsigned int i = commonCount ; i < absolutePathParts.size() ; i++) { for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) {
relativePath += absolutePathParts[i]; relativePath += absolutePathParts[i];
relativePath += i + 1 < absolutePathParts.size() ? "/" : ""; relativePath += i + 1 < absolutePathParts.size() ? "/" : "";
} }
@ -89,11 +92,12 @@ string computeRelativePath(const string path, const string absolutePath) {
} }
/* Warning: the relative path must be with slashes */ /* Warning: the relative path must be with slashes */
string computeAbsolutePath(const string path, const string relativePath) { string computeAbsolutePath(const string path, const string relativePath)
{
string absolutePath; string absolutePath;
if (path.empty()) { if (path.empty()) {
char *path=NULL; char* path = NULL;
size_t size = 0; size_t size = 0;
#ifdef _WIN32 #ifdef _WIN32
@ -104,15 +108,17 @@ string computeAbsolutePath(const string path, const string relativePath) {
absolutePath = string(path) + SEPARATOR; absolutePath = string(path) + SEPARATOR;
} else { } else {
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR ? path : path + SEPARATOR; absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR
? path
: path + SEPARATOR;
} }
#if _WIN32 #if _WIN32
char *cRelativePath = _strdup(relativePath.c_str()); char* cRelativePath = _strdup(relativePath.c_str());
#else #else
char *cRelativePath = strdup(relativePath.c_str()); char* cRelativePath = strdup(relativePath.c_str());
#endif #endif
char *token = strtok(cRelativePath, "/"); char* token = strtok(cRelativePath, "/");
while (token != NULL) { while (token != NULL) {
if (string(token) == "..") { if (string(token) == "..") {
@ -121,8 +127,9 @@ string computeAbsolutePath(const string path, const string relativePath) {
} else if (strcmp(token, ".") && strcmp(token, "")) { } else if (strcmp(token, ".") && strcmp(token, "")) {
absolutePath += string(token); absolutePath += string(token);
token = strtok(NULL, "/"); token = strtok(NULL, "/");
if (token != NULL) if (token != NULL) {
absolutePath += SEPARATOR; absolutePath += SEPARATOR;
}
} else { } else {
token = strtok(NULL, "/"); token = strtok(NULL, "/");
} }
@ -131,31 +138,38 @@ string computeAbsolutePath(const string path, const string relativePath) {
return absolutePath; return absolutePath;
} }
string removeLastPathElement(const string path, const bool removePreSeparator, const bool removePostSeparator) { string removeLastPathElement(const string path,
const bool removePreSeparator,
const bool removePostSeparator)
{
string newPath = path; string newPath = path;
size_t offset = newPath.find_last_of(SEPARATOR); size_t offset = newPath.find_last_of(SEPARATOR);
if (removePreSeparator && if (removePreSeparator &&
#ifndef _WIN32 #ifndef _WIN32
offset != newPath.find_first_of(SEPARATOR) && offset != newPath.find_first_of(SEPARATOR) &&
#endif #endif
offset == newPath.length()-1) { offset == newPath.length() - 1) {
newPath = newPath.substr(0, offset); newPath = newPath.substr(0, offset);
offset = newPath.find_last_of(SEPARATOR); offset = newPath.find_last_of(SEPARATOR);
} }
newPath = removePostSeparator ? newPath.substr(0, offset) : newPath.substr(0, offset+1); newPath = removePostSeparator ? newPath.substr(0, offset)
: newPath.substr(0, offset + 1);
return newPath; return newPath;
} }
string appendToDirectory(const string &directoryPath, const string &filename) { string appendToDirectory(const string& directoryPath, const string& filename)
{
string newPath = directoryPath + SEPARATOR + filename; string newPath = directoryPath + SEPARATOR + filename;
return newPath; return newPath;
} }
string getLastPathElement(const string &path) { string getLastPathElement(const string& path)
{
return path.substr(path.find_last_of(SEPARATOR) + 1); return path.substr(path.find_last_of(SEPARATOR) + 1);
} }
unsigned int getFileSize(const string &path) { unsigned int getFileSize(const string& path)
{
#ifdef _WIN32 #ifdef _WIN32
struct _stat filestatus; struct _stat filestatus;
_stat(path.c_str(), &filestatus); _stat(path.c_str(), &filestatus);
@ -167,12 +181,15 @@ unsigned int getFileSize(const string &path) {
return filestatus.st_size / 1024; return filestatus.st_size / 1024;
} }
string getFileSizeAsString(const string &path) { string getFileSizeAsString(const string& path)
ostringstream convert; convert << getFileSize(path); {
ostringstream convert;
convert << getFileSize(path);
return convert.str(); return convert.str();
} }
bool fileExists(const string &path) { bool fileExists(const string& path)
{
#ifdef _WIN32 #ifdef _WIN32
return PathFileExists(path.c_str()); return PathFileExists(path.c_str());
#else #else
@ -187,7 +204,8 @@ bool fileExists(const string &path) {
#endif #endif
} }
bool makeDirectory(const string &path) { bool makeDirectory(const string& path)
{
#ifdef _WIN32 #ifdef _WIN32
int status = _mkdir(path.c_str()); int status = _mkdir(path.c_str());
#else #else
@ -197,7 +215,8 @@ bool makeDirectory(const string &path) {
} }
/* Try to create a link and if does not work then make a copy */ /* Try to create a link and if does not work then make a copy */
bool copyFile(const string &sourcePath, const string &destPath) { bool copyFile(const string& sourcePath, const string& destPath)
{
try { try {
#ifndef _WIN32 #ifndef _WIN32
if (link(sourcePath.c_str(), destPath.c_str()) != 0) { if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
@ -208,7 +227,7 @@ bool copyFile(const string &sourcePath, const string &destPath) {
#ifndef _WIN32 #ifndef _WIN32
} }
#endif #endif
} catch (exception &e) { } catch (exception& e) {
cerr << e.what() << endl; cerr << e.what() << endl;
return false; return false;
} }
@ -216,11 +235,12 @@ bool copyFile(const string &sourcePath, const string &destPath) {
return true; return true;
} }
string getExecutablePath() { string getExecutablePath()
{
char binRootPath[PATH_MAX]; char binRootPath[PATH_MAX];
#ifdef _WIN32 #ifdef _WIN32
GetModuleFileName( NULL, binRootPath, PATH_MAX); GetModuleFileName(NULL, binRootPath, PATH_MAX);
return std::string(binRootPath); return std::string(binRootPath);
#elif __APPLE__ #elif __APPLE__
uint32_t max = (uint32_t)PATH_MAX; uint32_t max = (uint32_t)PATH_MAX;
@ -236,7 +256,8 @@ string getExecutablePath() {
return ""; return "";
} }
bool writeTextFile(const string &path, const string &content) { bool writeTextFile(const string& path, const string& content)
{
std::ofstream file; std::ofstream file;
file.open(path.c_str()); file.open(path.c_str());
file << content; file << content;
@ -244,8 +265,9 @@ bool writeTextFile(const string &path, const string &content) {
return true; return true;
} }
string getCurrentDirectory() { string getCurrentDirectory()
char* a_cwd = getcwd(NULL,0); {
char* a_cwd = getcwd(NULL, 0);
string s_cwd(a_cwd); string s_cwd(a_cwd);
free(a_cwd); free(a_cwd);
return s_cwd; return s_cwd;

View File

@ -21,8 +21,9 @@
std::map<std::string, RegexMatcher*> regexCache; std::map<std::string, RegexMatcher*> regexCache;
RegexMatcher *buildRegex(const std::string &regex) { RegexMatcher* buildRegex(const std::string& regex)
RegexMatcher *matcher; {
RegexMatcher* matcher;
std::map<std::string, RegexMatcher*>::iterator itr = regexCache.find(regex); std::map<std::string, RegexMatcher*>::iterator itr = regexCache.find(regex);
/* Regex is in cache */ /* Regex is in cache */
@ -42,22 +43,26 @@ RegexMatcher *buildRegex(const std::string &regex) {
} }
/* todo */ /* todo */
void freeRegexCache() { void freeRegexCache()
{
} }
bool matchRegex(const std::string& content, const std::string& regex)
bool matchRegex(const std::string &content, const std::string &regex) { {
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
UnicodeString ucontent = UnicodeString(content.c_str()); UnicodeString ucontent = UnicodeString(content.c_str());
RegexMatcher *matcher = buildRegex(regex); RegexMatcher* matcher = buildRegex(regex);
matcher->reset(ucontent); matcher->reset(ucontent);
return matcher->find(); return matcher->find();
} }
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string &regex) { std::string replaceRegex(const std::string& content,
const std::string& replacement,
const std::string& regex)
{
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
UnicodeString ucontent = UnicodeString(content.c_str()); UnicodeString ucontent = UnicodeString(content.c_str());
UnicodeString ureplacement = UnicodeString(replacement.c_str()); UnicodeString ureplacement = UnicodeString(replacement.c_str());
RegexMatcher *matcher = buildRegex(regex); RegexMatcher* matcher = buildRegex(regex);
matcher->reset(ucontent); matcher->reset(ucontent);
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
UnicodeString uresult = matcher->replaceAll(ureplacement, status); UnicodeString uresult = matcher->replaceAll(ureplacement, status);
@ -66,11 +71,14 @@ std::string replaceRegex(const std::string &content, const std::string &replacem
return tmp; return tmp;
} }
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement) { std::string appendToFirstOccurence(const std::string& content,
const std::string regex,
const std::string& replacement)
{
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
UnicodeString ucontent = UnicodeString(content.c_str()); UnicodeString ucontent = UnicodeString(content.c_str());
UnicodeString ureplacement = UnicodeString(replacement.c_str()); UnicodeString ureplacement = UnicodeString(replacement.c_str());
RegexMatcher *matcher = buildRegex(regex); RegexMatcher* matcher = buildRegex(regex);
matcher->reset(ucontent); matcher->reset(ucontent);
if (matcher->find()) { if (matcher->find()) {
@ -83,4 +91,3 @@ std::string appendToFirstOccurence(const std::string &content, const std::strin
return content; return content;
} }

View File

@ -19,32 +19,36 @@
#include <common/stringTools.h> #include <common/stringTools.h>
#include <unicode/translit.h>
#include <unicode/normlzr.h> #include <unicode/normlzr.h>
#include <unicode/ustring.h>
#include <unicode/rep.h> #include <unicode/rep.h>
#include <unicode/uniset.h> #include <unicode/translit.h>
#include <unicode/ucnv.h> #include <unicode/ucnv.h>
#include <unicode/uniset.h>
#include <unicode/ustring.h>
/* tell ICU where to find its dat file (tables) */ /* tell ICU where to find its dat file (tables) */
void kiwix::loadICUExternalTables() { void kiwix::loadICUExternalTables()
{
#ifdef __APPLE__ #ifdef __APPLE__
std::string executablePath = getExecutablePath(); std::string executablePath = getExecutablePath();
std::string executableDirectory = removeLastPathElement(executablePath); std::string executableDirectory = removeLastPathElement(executablePath);
std::string datPath = computeAbsolutePath(executableDirectory, "icudt49l.dat"); std::string datPath
= computeAbsolutePath(executableDirectory, "icudt49l.dat");
try { try {
u_setDataDirectory(datPath.c_str()); u_setDataDirectory(datPath.c_str());
} catch (exception &e) { } catch (exception& e) {
std::cerr << e.what() << std::endl; std::cerr << e.what() << std::endl;
} }
#endif #endif
} }
std::string kiwix::removeAccents(const std::string &text) { std::string kiwix::removeAccents(const std::string& text)
{
loadICUExternalTables(); loadICUExternalTables();
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); Transliterator* removeAccentsTrans = Transliterator::createInstance(
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
UnicodeString ustring = UnicodeString(text.c_str()); UnicodeString ustring = UnicodeString(text.c_str());
removeAccentsTrans->transliterate(ustring); removeAccentsTrans->transliterate(ustring);
delete removeAccentsTrans; delete removeAccentsTrans;
@ -56,7 +60,8 @@ std::string kiwix::removeAccents(const std::string &text) {
#ifndef __ANDROID__ #ifndef __ANDROID__
/* Prepare integer for display */ /* Prepare integer for display */
std::string kiwix::beautifyInteger(const unsigned int number) { std::string kiwix::beautifyInteger(const unsigned int number)
{
std::stringstream numberStream; std::stringstream numberStream;
numberStream << number; numberStream << number;
std::string numberString = numberStream.str(); std::string numberString = numberStream.str();
@ -70,49 +75,58 @@ std::string kiwix::beautifyInteger(const unsigned int number) {
return numberString; return numberString;
} }
std::string kiwix::beautifyFileSize(const unsigned int number) { std::string kiwix::beautifyFileSize(const unsigned int number)
if (number > 1024*1024) { {
return kiwix::beautifyInteger(number/(1024*1024)) + " GB"; if (number > 1024 * 1024) {
return kiwix::beautifyInteger(number / (1024 * 1024)) + " GB";
} else { } else {
return kiwix::beautifyInteger(number/1024 != return kiwix::beautifyInteger(number / 1024 != 0 ? number / 1024 : 1)
0 ? number/1024 : 1) + " MB"; + " MB";
} }
} }
void kiwix::printStringInHexadecimal(UnicodeString s) { void kiwix::printStringInHexadecimal(UnicodeString s)
{
std::cout << std::showbase << std::hex; std::cout << std::showbase << std::hex;
for (int i=0; i<s.length(); i++) { for (int i = 0; i < s.length(); i++) {
char c = (char)((s.getTerminatedBuffer())[i]); char c = (char)((s.getTerminatedBuffer())[i]);
if (c & 0x80) if (c & 0x80) {
std::cout << (c & 0xffff) << " "; std::cout << (c & 0xffff) << " ";
else } else {
std::cout << c << " "; std::cout << c << " ";
} }
}
std::cout << std::endl; std::cout << std::endl;
} }
void kiwix::printStringInHexadecimal(const char *s) { void kiwix::printStringInHexadecimal(const char* s)
{
std::cout << std::showbase << std::hex; std::cout << std::showbase << std::hex;
for (char const* pc = s; *pc; ++pc) { for (char const* pc = s; *pc; ++pc) {
if (*pc & 0x80) if (*pc & 0x80) {
std::cout << (*pc & 0xffff); std::cout << (*pc & 0xffff);
else } else {
std::cout << *pc; std::cout << *pc;
}
std::cout << ' '; std::cout << ' ';
} }
std::cout << std::endl; std::cout << std::endl;
} }
void kiwix::stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr) { void kiwix::stringReplacement(std::string& str,
const std::string& oldStr,
const std::string& newStr)
{
size_t pos = 0; size_t pos = 0;
while((pos = str.find(oldStr, pos)) != std::string::npos) { while ((pos = str.find(oldStr, pos)) != std::string::npos) {
str.replace(pos, oldStr.length(), newStr); str.replace(pos, oldStr.length(), newStr);
pos += newStr.length(); pos += newStr.length();
} }
} }
/* Encode string to avoid XSS attacks */ /* Encode string to avoid XSS attacks */
std::string kiwix::encodeDiples(const std::string& str) { std::string kiwix::encodeDiples(const std::string& str)
{
std::string result = str; std::string result = str;
kiwix::stringReplacement(result, "<", "&lt;"); kiwix::stringReplacement(result, "<", "&lt;");
kiwix::stringReplacement(result, ">", "&gt;"); kiwix::stringReplacement(result, ">", "&gt;");
@ -120,39 +134,47 @@ std::string kiwix::encodeDiples(const std::string& str) {
} }
// Urlencode // Urlencode
//based on javascript encodeURIComponent() // based on javascript encodeURIComponent()
std::string char2hex(char dec) { std::string char2hex(char dec)
char dig1 = (dec&0xF0)>>4; {
char dig2 = (dec&0x0F); char dig1 = (dec & 0xF0) >> 4;
if ( 0<= dig1 && dig1<= 9) dig1+=48; //0,48inascii char dig2 = (dec & 0x0F);
if (10<= dig1 && dig1<=15) dig1+=97-10; //a,97inascii if (0 <= dig1 && dig1 <= 9) {
if ( 0<= dig2 && dig2<= 9) dig2+=48; dig1 += 48; // 0,48inascii
if (10<= dig2 && dig2<=15) dig2+=97-10; }
if (10 <= dig1 && dig1 <= 15) {
dig1 += 97 - 10; // a,97inascii
}
if (0 <= dig2 && dig2 <= 9) {
dig2 += 48;
}
if (10 <= dig2 && dig2 <= 15) {
dig2 += 97 - 10;
}
std::string r; std::string r;
r.append( &dig1, 1); r.append(&dig1, 1);
r.append( &dig2, 1); r.append(&dig2, 1);
return r; return r;
} }
std::string kiwix::urlEncode(const std::string &c) { std::string kiwix::urlEncode(const std::string& c)
std::string escaped=""; {
std::string escaped = "";
int max = c.length(); int max = c.length();
for(int i=0; i<max; i++) for (int i = 0; i < max; i++) {
{ if ((48 <= c[i] && c[i] <= 57) || // 0-9
if ( (48 <= c[i] && c[i] <= 57) ||//0-9 (65 <= c[i] && c[i] <= 90)
(65 <= c[i] && c[i] <= 90) ||//abc...xyz || // abc...xyz
(97 <= c[i] && c[i] <= 122) || //ABC...XYZ (97 <= c[i] && c[i] <= 122)
(c[i]=='~' || c[i]=='!' || c[i]=='*' || c[i]=='(' || c[i]==')' || c[i]=='\'') || // ABC...XYZ
) (c[i] == '~' || c[i] == '!' || c[i] == '*' || c[i] == '(' || c[i] == ')'
{ || c[i] == '\'')) {
escaped.append( &c[i], 1); escaped.append(&c[i], 1);
} } else {
else
{
escaped.append("%"); escaped.append("%");
escaped.append( char2hex(c[i]) );//converts char 255 to string "ff" escaped.append(char2hex(c[i])); // converts char 255 to string "ff"
} }
} }
return escaped; return escaped;
@ -160,18 +182,20 @@ std::string kiwix::urlEncode(const std::string &c) {
#endif #endif
static char charFromHex(std::string a) { static char charFromHex(std::string a)
{
std::istringstream Blat(a); std::istringstream Blat(a);
int Z; int Z;
Blat >> std::hex >> Z; Blat >> std::hex >> Z;
return char (Z); return char(Z);
} }
std::string kiwix::urlDecode(const std::string &originalUrl) { std::string kiwix::urlDecode(const std::string& originalUrl)
{
std::string url = originalUrl; std::string url = originalUrl;
std::string::size_type pos = 0; std::string::size_type pos = 0;
while ((pos = url.find('%', pos)) != std::string::npos && while ((pos = url.find('%', pos)) != std::string::npos
pos + 2 < url.length()) { && pos + 2 < url.length()) {
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
++pos; ++pos;
} }
@ -179,15 +203,14 @@ std::string kiwix::urlDecode(const std::string &originalUrl) {
} }
/* Split string in a token array */ /* Split string in a token array */
std::vector<std::string> kiwix::split(const std::string & str, std::vector<std::string> kiwix::split(const std::string& str,
const std::string & delims=" *-") const std::string& delims = " *-")
{ {
std::string::size_type lastPos = str.find_first_not_of(delims, 0); std::string::size_type lastPos = str.find_first_not_of(delims, 0);
std::string::size_type pos = str.find_first_of(delims, lastPos); std::string::size_type pos = str.find_first_of(delims, lastPos);
std::vector<std::string> tokens; std::vector<std::string> tokens;
while (std::string::npos != pos || std::string::npos != lastPos) while (std::string::npos != pos || std::string::npos != lastPos) {
{
tokens.push_back(str.substr(lastPos, pos - lastPos)); tokens.push_back(str.substr(lastPos, pos - lastPos));
lastPos = str.find_first_not_of(delims, pos); lastPos = str.find_first_not_of(delims, pos);
pos = str.find_first_of(delims, lastPos); pos = str.find_first_of(delims, lastPos);
@ -196,22 +219,27 @@ std::vector<std::string> kiwix::split(const std::string & str,
return tokens; return tokens;
} }
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs){ std::vector<std::string> kiwix::split(const char* lhs, const char* rhs)
const std::string m1 (lhs), m2 (rhs); {
const std::string m1(lhs), m2(rhs);
return split(m1, m2); return split(m1, m2);
} }
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs){ std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs)
{
return split(lhs, rhs.c_str()); return split(lhs, rhs.c_str());
} }
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs){ std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
{
return split(lhs.c_str(), rhs); return split(lhs.c_str(), rhs);
} }
std::string kiwix::ucFirst (const std::string &word) { std::string kiwix::ucFirst(const std::string& word)
if (word.empty()) {
if (word.empty()) {
return ""; return "";
}
std::string result; std::string result;
@ -223,9 +251,11 @@ std::string kiwix::ucFirst (const std::string &word) {
return result; return result;
} }
std::string kiwix::ucAll (const std::string &word) { std::string kiwix::ucAll(const std::string& word)
if (word.empty()) {
if (word.empty()) {
return ""; return "";
}
std::string result; std::string result;
@ -235,9 +265,11 @@ std::string kiwix::ucAll (const std::string &word) {
return result; return result;
} }
std::string kiwix::lcFirst (const std::string &word) { std::string kiwix::lcFirst(const std::string& word)
if (word.empty()) {
if (word.empty()) {
return ""; return "";
}
std::string result; std::string result;
@ -249,9 +281,11 @@ std::string kiwix::lcFirst (const std::string &word) {
return result; return result;
} }
std::string kiwix::lcAll (const std::string &word) { std::string kiwix::lcAll(const std::string& word)
if (word.empty()) {
if (word.empty()) {
return ""; return "";
}
std::string result; std::string result;
@ -261,9 +295,11 @@ std::string kiwix::lcAll (const std::string &word) {
return result; return result;
} }
std::string kiwix::toTitle (const std::string &word) { std::string kiwix::toTitle(const std::string& word)
if (word.empty()) {
if (word.empty()) {
return ""; return "";
}
std::string result; std::string result;
@ -274,6 +310,7 @@ std::string kiwix::toTitle (const std::string &word) {
return result; return result;
} }
std::string kiwix::normalize (const std::string &word) { std::string kiwix::normalize(const std::string& word)
{
return kiwix::lcAll(word); return kiwix::lcAll(word);
} }

View File

@ -19,47 +19,54 @@
#include "library.h" #include "library.h"
namespace kiwix { namespace kiwix
{
/* Constructor */ /* Constructor */
Book::Book(): Book::Book() : readOnly(false)
readOnly(false) { {
} }
/* Destructor */
/* Destructor */ Book::~Book()
Book::~Book() { {
} }
/* Sort functions */
/* Sort functions */ bool Book::sortByLastOpen(const kiwix::Book& a, const kiwix::Book& b)
bool Book::sortByLastOpen(const kiwix::Book &a, const kiwix::Book &b) { {
return atoi(a.last.c_str()) > atoi(b.last.c_str()); return atoi(a.last.c_str()) > atoi(b.last.c_str());
} }
bool Book::sortByTitle(const kiwix::Book &a, const kiwix::Book &b) { bool Book::sortByTitle(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.title.c_str(), b.title.c_str()) < 0; return strcmp(a.title.c_str(), b.title.c_str()) < 0;
} }
bool Book::sortByDate(const kiwix::Book &a, const kiwix::Book &b) { bool Book::sortByDate(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.date.c_str(), b.date.c_str()) > 0; return strcmp(a.date.c_str(), b.date.c_str()) > 0;
} }
bool Book::sortBySize(const kiwix::Book &a, const kiwix::Book &b) { bool Book::sortBySize(const kiwix::Book& a, const kiwix::Book& b)
{
return atoi(a.size.c_str()) < atoi(b.size.c_str()); return atoi(a.size.c_str()) < atoi(b.size.c_str());
} }
bool Book::sortByPublisher(const kiwix::Book &a, const kiwix::Book &b) { bool Book::sortByPublisher(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0; return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0;
} }
bool Book::sortByCreator(const kiwix::Book &a, const kiwix::Book &b) { bool Book::sortByCreator(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.creator.c_str(), b.creator.c_str()) < 0; return strcmp(a.creator.c_str(), b.creator.c_str()) < 0;
} }
bool Book::sortByLanguage(const kiwix::Book &a, const kiwix::Book &b) { bool Book::sortByLanguage(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.language.c_str(), b.language.c_str()) < 0; return strcmp(a.language.c_str(), b.language.c_str()) < 0;
} }
std::string Book::getHumanReadableIdFromPath() { std::string Book::getHumanReadableIdFromPath()
{
std::string id = pathAbsolute; std::string id = pathAbsolute;
if (!id.empty()) { if (!id.empty()) {
kiwix::removeAccents(id); kiwix::removeAccents(id);
@ -75,40 +82,44 @@ namespace kiwix {
id = replaceRegex(id, "plus", "\\+"); id = replaceRegex(id, "plus", "\\+");
} }
return id; return id;
} }
/* Constructor */
Library::Library():
version(KIWIX_LIBRARY_VERSION) {
}
/* Destructor */
Library::~Library() {
}
bool Library::addBook(const Book &book) {
/* Constructor */
Library::Library() : version(KIWIX_LIBRARY_VERSION)
{
}
/* Destructor */
Library::~Library()
{
}
bool Library::addBook(const Book& book)
{
/* Try to find it */ /* Try to find it */
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = this->books.begin(); itr != this->books.end(); ++itr ) { for (itr = this->books.begin(); itr != this->books.end(); ++itr) {
if (itr->id == book.id) { if (itr->id == book.id) {
if (!itr->readOnly) { if (!itr->readOnly) {
itr->readOnly = book.readOnly; itr->readOnly = book.readOnly;
if (itr->path.empty()) if (itr->path.empty()) {
itr->path = book.path; itr->path = book.path;
}
if (itr->pathAbsolute.empty()) if (itr->pathAbsolute.empty()) {
itr->pathAbsolute = book.pathAbsolute; itr->pathAbsolute = book.pathAbsolute;
}
if (itr->url.empty()) if (itr->url.empty()) {
itr->url = book.url; itr->url = book.url;
}
if (itr->tags.empty()) if (itr->tags.empty()) {
itr->tags = book.tags; itr->tags = book.tags;
}
if (itr->name.empty()) if (itr->name.empty()) {
itr->name = book.name; itr->name = book.name;
}
if (itr->indexPath.empty()) { if (itr->indexPath.empty()) {
itr->indexPath = book.indexPath; itr->indexPath = book.indexPath;
@ -133,11 +144,11 @@ namespace kiwix {
/* otherwise */ /* otherwise */
this->books.push_back(book); this->books.push_back(book);
return true; return true;
} }
bool Library::removeBookByIndex(const unsigned int bookIndex) { bool Library::removeBookByIndex(const unsigned int bookIndex)
books.erase(books.begin()+bookIndex); {
return true; books.erase(books.begin() + bookIndex);
} return true;
}
} }

View File

@ -19,18 +19,20 @@
#include "manager.h" #include "manager.h"
namespace kiwix { namespace kiwix
{
/* Constructor */ /* Constructor */
Manager::Manager() : Manager::Manager() : writableLibraryPath("")
writableLibraryPath("") { {
} }
/* Destructor */
/* Destructor */ Manager::~Manager()
Manager::~Manager() { {
} }
bool Manager::parseXmlDom(const pugi::xml_document& doc,
bool Manager::parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath) { const bool readOnly,
const string libraryPath)
{
pugi::xml_node libraryNode = doc.child("library"); pugi::xml_node libraryNode = doc.child("library");
if (strlen(libraryNode.attribute("current").value())) if (strlen(libraryNode.attribute("current").value()))
@ -38,15 +40,17 @@ namespace kiwix {
string libraryVersion = libraryNode.attribute("version").value(); string libraryVersion = libraryNode.attribute("version").value();
for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode; bookNode = bookNode.next_sibling("book")) { for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode;
bookNode = bookNode.next_sibling("book")) {
bool ok = true; bool ok = true;
kiwix::Book book; kiwix::Book book;
book.readOnly = readOnly; book.readOnly = readOnly;
book.id = bookNode.attribute("id").value(); book.id = bookNode.attribute("id").value();
book.path = bookNode.attribute("path").value(); book.path = bookNode.attribute("path").value();
book.last = (std::string(bookNode.attribute("last").value()) != "undefined" ? book.last = (std::string(bookNode.attribute("last").value()) != "undefined"
bookNode.attribute("last").value() : ""); ? bookNode.attribute("last").value()
: "");
book.indexPath = bookNode.attribute("indexPath").value(); book.indexPath = bookNode.attribute("indexPath").value();
book.indexType = XAPIAN; book.indexType = XAPIAN;
book.title = bookNode.attribute("title").value(); book.title = bookNode.attribute("title").value();
@ -69,7 +73,8 @@ namespace kiwix {
this->checkAndCleanBookPaths(book, libraryPath); this->checkAndCleanBookPaths(book, libraryPath);
/* Update the book properties with the new importer */ /* Update the book properties with the new importer */
if (libraryVersion.empty() || atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) { if (libraryVersion.empty()
|| atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) {
if (!book.path.empty()) { if (!book.path.empty()) {
ok = this->readBookFromPath(book.pathAbsolute); ok = this->readBookFromPath(book.pathAbsolute);
} }
@ -81,24 +86,32 @@ namespace kiwix {
} }
return true; return true;
} }
bool Manager::readXml(const string xml, const bool readOnly, const string libraryPath) { bool Manager::readXml(const string xml,
const bool readOnly,
const string libraryPath)
{
pugi::xml_document doc; pugi::xml_document doc;
pugi::xml_parse_result result = doc.load_buffer_inplace((void*)xml.data(), xml.size()); pugi::xml_parse_result result
= doc.load_buffer_inplace((void*)xml.data(), xml.size());
if (result) { if (result) {
this->parseXmlDom(doc, readOnly, libraryPath); this->parseXmlDom(doc, readOnly, libraryPath);
} }
return true; return true;
} }
bool Manager::readFile(const string path, const bool readOnly) { bool Manager::readFile(const string path, const bool readOnly)
{
return this->readFile(path, path, readOnly); return this->readFile(path, path, readOnly);
} }
bool Manager::readFile(const string nativePath, const string UTF8Path, const bool readOnly) { bool Manager::readFile(const string nativePath,
const string UTF8Path,
const bool readOnly)
{
bool retVal = true; bool retVal = true;
pugi::xml_document doc; pugi::xml_document doc;
pugi::xml_parse_result result = doc.load_file(nativePath.c_str()); pugi::xml_parse_result result = doc.load_file(nativePath.c_str());
@ -117,9 +130,10 @@ namespace kiwix {
} }
return retVal; return retVal;
} }
bool Manager::writeFile(const string path) { bool Manager::writeFile(const string path)
{
pugi::xml_document doc; pugi::xml_document doc;
/* Add the library node */ /* Add the library node */
@ -134,16 +148,16 @@ namespace kiwix {
/* Add each book */ /* Add each book */
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (!itr->readOnly) { if (!itr->readOnly) {
this->checkAndCleanBookPaths(*itr, path); this->checkAndCleanBookPaths(*itr, path);
pugi::xml_node bookNode = libraryNode.append_child("book"); pugi::xml_node bookNode = libraryNode.append_child("book");
bookNode.append_attribute("id") = itr->id.c_str(); bookNode.append_attribute("id") = itr->id.c_str();
if (!itr->path.empty()) if (!itr->path.empty()) {
bookNode.append_attribute("path") = itr->path.c_str(); bookNode.append_attribute("path") = itr->path.c_str();
}
if (!itr->last.empty() && itr->last != "undefined") { if (!itr->last.empty() && itr->last != "undefined") {
bookNode.append_attribute("last") = itr->last.c_str(); bookNode.append_attribute("last") = itr->last.c_str();
@ -153,9 +167,10 @@ namespace kiwix {
bookNode.append_attribute("indexPath") = itr->indexPath.c_str(); bookNode.append_attribute("indexPath") = itr->indexPath.c_str();
if (!itr->indexPath.empty() || !itr->indexPathAbsolute.empty()) { if (!itr->indexPath.empty() || !itr->indexPathAbsolute.empty()) {
if (itr->indexType == XAPIAN) if (itr->indexType == XAPIAN) {
bookNode.append_attribute("indexType") = "xapian"; bookNode.append_attribute("indexType") = "xapian";
} }
}
if (itr->origId.empty()) { if (itr->origId.empty()) {
if (!itr->title.empty()) if (!itr->title.empty())
@ -183,14 +198,17 @@ namespace kiwix {
bookNode.append_attribute("favicon") = itr->favicon.c_str(); bookNode.append_attribute("favicon") = itr->favicon.c_str();
if (!itr->faviconMimeType.empty()) if (!itr->faviconMimeType.empty())
bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); bookNode.append_attribute("faviconMimeType")
= itr->faviconMimeType.c_str();
} }
if (!itr->date.empty()) if (!itr->date.empty()) {
bookNode.append_attribute("date") = itr->date.c_str(); bookNode.append_attribute("date") = itr->date.c_str();
}
if (!itr->url.empty()) if (!itr->url.empty()) {
bookNode.append_attribute("url") = itr->url.c_str(); bookNode.append_attribute("url") = itr->url.c_str();
}
if (!itr->origId.empty()) if (!itr->origId.empty())
bookNode.append_attribute("origId") = itr->origId.c_str(); bookNode.append_attribute("origId") = itr->origId.c_str();
@ -201,47 +219,58 @@ namespace kiwix {
if (!itr->mediaCount.empty()) if (!itr->mediaCount.empty())
bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str(); bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str();
if (!itr->size.empty()) if (!itr->size.empty()) {
bookNode.append_attribute("size") = itr->size.c_str(); bookNode.append_attribute("size") = itr->size.c_str();
} }
} }
}
/* saving file */ /* saving file */
doc.save_file(path.c_str()); doc.save_file(path.c_str());
return true; return true;
} }
bool Manager::setCurrentBookId(const string id) { bool Manager::setCurrentBookId(const string id)
{
if (library.current.empty() || library.current.top() != id) { if (library.current.empty() || library.current.top() != id) {
if (id.empty() && !library.current.empty()) if (id.empty() && !library.current.empty()) {
library.current.pop(); library.current.pop();
else } else {
library.current.push(id); library.current.push(id);
} }
}
return true; return true;
} }
string Manager::getCurrentBookId() { string Manager::getCurrentBookId()
return library.current.empty() ? {
"" : library.current.top(); return library.current.empty() ? "" : library.current.top();
} }
/* Add a book to the library. Return empty string if failed, book id otherwise */ /* Add a book to the library. Return empty string if failed, book id otherwise
string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, */
const string url, const bool checkMetaData) { string Manager::addBookFromPathAndGetId(const string pathToOpen,
const string pathToSave,
const string url,
const bool checkMetaData)
{
kiwix::Book book; kiwix::Book book;
if (this->readBookFromPath(pathToOpen, &book)) { if (this->readBookFromPath(pathToOpen, &book)) {
if (pathToSave != pathToOpen) { if (pathToSave != pathToOpen) {
book.path = pathToSave; book.path = pathToSave;
book.pathAbsolute = isRelativePath(pathToSave) ? book.pathAbsolute
computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), pathToSave) : pathToSave; = isRelativePath(pathToSave)
? computeAbsolutePath(
removeLastPathElement(writableLibraryPath, true, false),
pathToSave)
: pathToSave;
} }
if (!checkMetaData || if (!checkMetaData
(checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) { || (checkMetaData && !book.title.empty() && !book.language.empty()
&& !book.date.empty())) {
book.url = url; book.url = url;
library.addBook(book); library.addBook(book);
return book.id; return book.id;
@ -249,16 +278,24 @@ namespace kiwix {
} }
return ""; return "";
} }
/* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ /* Wrapper over Manager::addBookFromPath which return a bool instead of a string
bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) { */
return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty()); bool Manager::addBookFromPath(const string pathToOpen,
} const string pathToSave,
const string url,
const bool checkMetaData)
{
return !(
this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData)
.empty());
}
bool Manager::readBookFromPath(const string path, kiwix::Book *book) { bool Manager::readBookFromPath(const string path, kiwix::Book* book)
{
try { try {
kiwix::Reader *reader = new kiwix::Reader(path); kiwix::Reader* reader = new kiwix::Reader(path);
if (book != NULL) { if (book != NULL) {
book->path = path; book->path = path;
@ -281,13 +318,16 @@ namespace kiwix {
mediaCountStream << reader->getMediaCount(); mediaCountStream << reader->getMediaCount();
book->mediaCount = mediaCountStream.str(); book->mediaCount = mediaCountStream.str();
ostringstream convert; convert << reader->getFileSize(); ostringstream convert;
convert << reader->getFileSize();
book->size = convert.str(); book->size = convert.str();
string favicon; string favicon;
string faviconMimeType; string faviconMimeType;
if (reader->getFavicon(favicon, faviconMimeType)) { if (reader->getFavicon(favicon, faviconMimeType)) {
book->favicon = base64_encode(reinterpret_cast<const unsigned char*>(favicon.c_str()), favicon.length()); book->favicon = base64_encode(
reinterpret_cast<const unsigned char*>(favicon.c_str()),
favicon.length());
book->faviconMimeType = faviconMimeType; book->faviconMimeType = faviconMimeType;
} }
} }
@ -299,30 +339,34 @@ namespace kiwix {
} }
return true; return true;
} }
bool Manager::removeBookByIndex(const unsigned int bookIndex) { bool Manager::removeBookByIndex(const unsigned int bookIndex)
{
return this->library.removeBookByIndex(bookIndex); return this->library.removeBookByIndex(bookIndex);
} }
bool Manager::removeBookById(const string id) { bool Manager::removeBookById(const string id)
{
unsigned int bookIndex = 0; unsigned int bookIndex = 0;
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if ( itr->id == id) { if (itr->id == id) {
return this->library.removeBookByIndex(bookIndex); return this->library.removeBookByIndex(bookIndex);
} }
bookIndex++; bookIndex++;
} }
return false; return false;
} }
vector<string> Manager::getBooksLanguages() { vector<string> Manager::getBooksLanguages()
{
std::vector<string> booksLanguages; std::vector<string> booksLanguages;
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
std::map<string, bool> booksLanguagesMap; std::map<string, bool> booksLanguagesMap;
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); std::sort(
library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage);
for (itr = library.books.begin(); itr != library.books.end(); ++itr) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) {
if (itr->origId.empty()) { if (itr->origId.empty()) {
@ -333,14 +377,16 @@ namespace kiwix {
} }
return booksLanguages; return booksLanguages;
} }
vector<string> Manager::getBooksCreators() { vector<string> Manager::getBooksCreators()
{
std::vector<string> booksCreators; std::vector<string> booksCreators;
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
std::map<string, bool> booksCreatorsMap; std::map<string, bool> booksCreatorsMap;
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); std::sort(
library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
for (itr = library.books.begin(); itr != library.books.end(); ++itr) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) {
if (itr->origId.empty()) { if (itr->origId.empty()) {
@ -351,27 +397,29 @@ namespace kiwix {
} }
return booksCreators; return booksCreators;
} }
vector<string> Manager::getBooksIds()
vector<string> Manager::getBooksIds() { {
std::vector<string> booksIds; std::vector<string> booksIds;
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
booksIds.push_back(itr->id); booksIds.push_back(itr->id);
} }
return booksIds; return booksIds;
} }
vector<string> Manager::getBooksPublishers() { vector<string> Manager::getBooksPublishers()
{
std::vector<string> booksPublishers; std::vector<string> booksPublishers;
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
std::map<string, bool> booksPublishersMap; std::map<string, bool> booksPublishersMap;
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); std::sort(
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher);
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) {
if (itr->origId.empty()) { if (itr->origId.empty()) {
booksPublishersMap[itr->publisher] = true; booksPublishersMap[itr->publisher] = true;
@ -381,13 +429,14 @@ namespace kiwix {
} }
return booksPublishers; return booksPublishers;
} }
kiwix::Library Manager::cloneLibrary() { kiwix::Library Manager::cloneLibrary()
{
return this->library; return this->library;
} }
bool Manager::getCurrentBook(Book& book)
bool Manager::getCurrentBook(Book &book) { {
string currentBookId = getCurrentBookId(); string currentBookId = getCurrentBookId();
if (currentBookId.empty()) { if (currentBookId.empty()) {
return false; return false;
@ -395,138 +444,182 @@ namespace kiwix {
getBookById(currentBookId, book); getBookById(currentBookId, book);
return true; return true;
} }
} }
bool Manager::getBookById(const string id, Book &book) { bool Manager::getBookById(const string id, Book& book)
{
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if ( itr->id == id) { if (itr->id == id) {
book = *itr; book = *itr;
return true; return true;
} }
} }
return false; return false;
} }
bool Manager::updateBookLastOpenDateById(const string id) { bool Manager::updateBookLastOpenDateById(const string id)
{
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if ( itr->id == id) { if (itr->id == id) {
char unixdate[12]; char unixdate[12];
sprintf (unixdate, "%d", (int)time(NULL)); sprintf(unixdate, "%d", (int)time(NULL));
itr->last = unixdate; itr->last = unixdate;
return true; return true;
} }
} }
return false; return false;
} }
bool Manager::setBookIndex(const string id, const string path, const supportedIndexType type) { bool Manager::setBookIndex(const string id,
const string path,
const supportedIndexType type)
{
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if ( itr->id == id) { if (itr->id == id) {
itr->indexPath = path; itr->indexPath = path;
itr->indexPathAbsolute = isRelativePath(path) ? itr->indexPathAbsolute
computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), path) : path; = isRelativePath(path)
? computeAbsolutePath(
removeLastPathElement(writableLibraryPath, true, false),
path)
: path;
itr->indexType = type; itr->indexType = type;
return true; return true;
} }
} }
return false; return false;
} }
bool Manager::setBookIndex(const string id, const string path) { bool Manager::setBookIndex(const string id, const string path)
{
return this->setBookIndex(id, path, XAPIAN); return this->setBookIndex(id, path, XAPIAN);
} }
bool Manager::setBookPath(const string id, const string path) { bool Manager::setBookPath(const string id, const string path)
{
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if ( itr->id == id) { if (itr->id == id) {
itr->path = path; itr->path = path;
itr->pathAbsolute = isRelativePath(path) ? itr->pathAbsolute
computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), path) : path; = isRelativePath(path)
? computeAbsolutePath(
removeLastPathElement(writableLibraryPath, true, false),
path)
: path;
return true; return true;
} }
} }
return false; return false;
} }
void Manager::removeBookPaths() { void Manager::removeBookPaths()
{
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
itr->path = ""; itr->path = "";
itr->pathAbsolute = ""; itr->pathAbsolute = "";
} }
} }
unsigned int Manager::getBookCount(const bool localBooks, const bool remoteBooks) { unsigned int Manager::getBookCount(const bool localBooks,
const bool remoteBooks)
{
unsigned int result = 0; unsigned int result = 0;
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if ((!itr->path.empty() && localBooks) || (itr->path.empty() && remoteBooks)) if ((!itr->path.empty() && localBooks)
|| (itr->path.empty() && remoteBooks)) {
result++; result++;
} }
return result;
} }
return result;
}
bool Manager::listBooks(const supportedListMode mode, const supportedListSortBy sortBy, bool Manager::listBooks(const supportedListMode mode,
const unsigned int maxSize, const string language, const string creator, const supportedListSortBy sortBy,
const string publisher, const string search) { const unsigned int maxSize,
const string language,
const string creator,
const string publisher,
const string search)
{
this->bookIdList.clear(); this->bookIdList.clear();
std::vector<kiwix::Book>::iterator itr; std::vector<kiwix::Book>::iterator itr;
/* Sort */ /* Sort */
if (sortBy == TITLE) { if (sortBy == TITLE) {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByTitle); std::sort(
library.books.begin(), library.books.end(), kiwix::Book::sortByTitle);
} else if (sortBy == SIZE) { } else if (sortBy == SIZE) {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortBySize); std::sort(
library.books.begin(), library.books.end(), kiwix::Book::sortBySize);
} else if (sortBy == DATE) { } else if (sortBy == DATE) {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByDate); std::sort(
library.books.begin(), library.books.end(), kiwix::Book::sortByDate);
} else if (sortBy == CREATOR) { } else if (sortBy == CREATOR) {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); std::sort(
library.books.begin(), library.books.end(), kiwix::Book::sortByCreator);
} else if (sortBy == PUBLISHER) { } else if (sortBy == PUBLISHER) {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); std::sort(library.books.begin(),
library.books.end(),
kiwix::Book::sortByPublisher);
} }
/* Special sort for LASTOPEN */ /* Special sort for LASTOPEN */
if (mode == LASTOPEN) { if (mode == LASTOPEN) {
std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLastOpen); std::sort(library.books.begin(),
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { library.books.end(),
if (!itr->last.empty()) kiwix::Book::sortByLastOpen);
for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
if (!itr->last.empty()) {
this->bookIdList.push_back(itr->id); this->bookIdList.push_back(itr->id);
} }
}
} else { } else {
/* Generate the list of book id */ /* Generate the list of book id */
for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { for (itr = library.books.begin(); itr != library.books.end(); ++itr) {
bool ok = true; bool ok = true;
if (mode == LOCAL && itr->path.empty()) if (mode == LOCAL && itr->path.empty()) {
ok = false; ok = false;
}
if (ok == true && mode == REMOTE && (!itr->path.empty() || itr->url.empty())) if (ok == true && mode == REMOTE
&& (!itr->path.empty() || itr->url.empty())) {
ok = false; ok = false;
}
if (ok == true && maxSize != 0 && (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024) if (ok == true && maxSize != 0
&& (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024) {
ok = false; ok = false;
}
if (ok == true && !language.empty() && !matchRegex(itr->language, language)) if (ok == true && !language.empty()
&& !matchRegex(itr->language, language)) {
ok = false; ok = false;
}
if (ok == true && !creator.empty() && itr->creator != creator) if (ok == true && !creator.empty() && itr->creator != creator) {
ok = false; ok = false;
}
if (ok == true && !publisher.empty() && itr->publisher != publisher) if (ok == true && !publisher.empty() && itr->publisher != publisher) {
ok = false; ok = false;
}
if ((ok == true && !search.empty()) && !(matchRegex(itr->title, "\\Q" + search + "\\E") || if ((ok == true && !search.empty())
matchRegex(itr->description, "\\Q" + search + "\\E") || && !(matchRegex(itr->title, "\\Q" + search + "\\E")
matchRegex(itr->language, "\\Q" + search + "\\E") || matchRegex(itr->description, "\\Q" + search + "\\E")
)) || matchRegex(itr->language, "\\Q" + search + "\\E"))) {
ok = false; ok = false;
}
if (ok == true) { if (ok == true) {
this->bookIdList.push_back(itr->id); this->bookIdList.push_back(itr->id);
@ -535,28 +628,31 @@ namespace kiwix {
} }
return true; return true;
} }
void Manager::checkAndCleanBookPaths(Book &book, const string &libraryPath) { void Manager::checkAndCleanBookPaths(Book& book, const string& libraryPath)
{
if (!book.path.empty()) { if (!book.path.empty()) {
if (isRelativePath(book.path)) { if (isRelativePath(book.path)) {
book.pathAbsolute = computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.path); book.pathAbsolute = computeAbsolutePath(
removeLastPathElement(libraryPath, true, false), book.path);
} else { } else {
book.pathAbsolute = book.path; book.pathAbsolute = book.path;
book.path = computeRelativePath(removeLastPathElement(libraryPath, true, false), book.pathAbsolute); book.path = computeRelativePath(
removeLastPathElement(libraryPath, true, false), book.pathAbsolute);
} }
} }
if (!book.indexPath.empty()) { if (!book.indexPath.empty()) {
if (isRelativePath(book.indexPath)) { if (isRelativePath(book.indexPath)) {
book.indexPathAbsolute = book.indexPathAbsolute = computeAbsolutePath(
computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.indexPath); removeLastPathElement(libraryPath, true, false), book.indexPath);
} else { } else {
book.indexPathAbsolute = book.indexPath; book.indexPathAbsolute = book.indexPath;
book.indexPath = book.indexPath
computeRelativePath(removeLastPathElement(libraryPath, true, false), book.indexPathAbsolute); = computeRelativePath(removeLastPathElement(libraryPath, true, false),
book.indexPathAbsolute);
} }
} }
} }
} }

View File

@ -20,55 +20,63 @@
#include "reader.h" #include "reader.h"
#include <time.h> #include <time.h>
inline char hi(char v) { inline char hi(char v)
{
char hex[] = "0123456789abcdef"; char hex[] = "0123456789abcdef";
return hex[(v >> 4) & 0xf]; return hex[(v >> 4) & 0xf];
} }
inline char lo(char v) { inline char lo(char v)
{
char hex[] = "0123456789abcdef"; char hex[] = "0123456789abcdef";
return hex[v & 0xf]; return hex[v & 0xf];
} }
std::string hexUUID (std::string in) { std::string hexUUID(std::string in)
{
std::ostringstream out; std::ostringstream out;
for (unsigned n = 0; n < 4; ++n) for (unsigned n = 0; n < 4; ++n) {
out << hi(in[n]) << lo(in[n]); out << hi(in[n]) << lo(in[n]);
}
out << '-'; out << '-';
for (unsigned n = 4; n < 6; ++n) for (unsigned n = 4; n < 6; ++n) {
out << hi(in[n]) << lo(in[n]); out << hi(in[n]) << lo(in[n]);
}
out << '-'; out << '-';
for (unsigned n = 6; n < 8; ++n) for (unsigned n = 6; n < 8; ++n) {
out << hi(in[n]) << lo(in[n]); out << hi(in[n]) << lo(in[n]);
}
out << '-'; out << '-';
for (unsigned n = 8; n < 10; ++n) for (unsigned n = 8; n < 10; ++n) {
out << hi(in[n]) << lo(in[n]); out << hi(in[n]) << lo(in[n]);
}
out << '-'; out << '-';
for (unsigned n = 10; n < 16; ++n) for (unsigned n = 10; n < 16; ++n) {
out << hi(in[n]) << lo(in[n]); out << hi(in[n]) << lo(in[n]);
std::string op=out.str(); }
std::string op = out.str();
return op; return op;
} }
namespace kiwix { namespace kiwix
{
/* Constructor */ /* Constructor */
Reader::Reader(const string zimFilePath) Reader::Reader(const string zimFilePath) : zimFileHandler(NULL)
: zimFileHandler(NULL) { {
string tmpZimFilePath = zimFilePath; string tmpZimFilePath = zimFilePath;
/* Remove potential trailing zimaa */ /* Remove potential trailing zimaa */
size_t found = tmpZimFilePath.rfind("zimaa"); size_t found = tmpZimFilePath.rfind("zimaa");
if (found != string::npos && if (found != string::npos && tmpZimFilePath.size() > 5
tmpZimFilePath.size() > 5 && && found == tmpZimFilePath.size() - 5) {
found == tmpZimFilePath.size() - 5) {
tmpZimFilePath.resize(tmpZimFilePath.size() - 2); tmpZimFilePath.resize(tmpZimFilePath.size() - 2);
} }
this->zimFileHandler = new zim::File(tmpZimFilePath); this->zimFileHandler = new zim::File(tmpZimFilePath);
if (this->zimFileHandler != NULL) { if (this->zimFileHandler != NULL) {
this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A'); this->firstArticleOffset
= this->zimFileHandler->getNamespaceBeginOffset('A');
this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A'); this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A');
this->currentArticleOffset = this->firstArticleOffset; this->currentArticleOffset = this->firstArticleOffset;
this->nsACount = this->zimFileHandler->getNamespaceCount('A'); this->nsACount = this->zimFileHandler->getNamespaceCount('A');
@ -77,36 +85,38 @@ namespace kiwix {
} }
/* initialize random seed: */ /* initialize random seed: */
srand ( time(NULL) ); srand(time(NULL));
} }
/* Destructor */ /* Destructor */
Reader::~Reader() { Reader::~Reader()
{
if (this->zimFileHandler != NULL) { if (this->zimFileHandler != NULL) {
delete this->zimFileHandler; delete this->zimFileHandler;
} }
} }
zim::File* Reader::getZimFileHandler() const { zim::File* Reader::getZimFileHandler() const
{
return this->zimFileHandler; return this->zimFileHandler;
} }
/* Reset the cursor for GetNextArticle() */
/* Reset the cursor for GetNextArticle() */ void Reader::reset()
void Reader::reset() { {
this->currentArticleOffset = this->firstArticleOffset; this->currentArticleOffset = this->firstArticleOffset;
} }
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const
std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const { {
std::map<const std::string, unsigned int> counters; std::map<const std::string, unsigned int> counters;
string mimeType, item, counterString; string mimeType, item, counterString;
unsigned int counter; unsigned int counter;
zim::Article article = this->zimFileHandler->getArticle('M',"Counter"); zim::Article article = this->zimFileHandler->getArticle('M', "Counter");
if ( article.good() ) { if (article.good()) {
stringstream ssContent(article.getData()); stringstream ssContent(article.getData());
while(getline(ssContent, item, ';')) { while (getline(ssContent, item, ';')) {
stringstream ssItem(item); stringstream ssItem(item);
getline(ssItem, mimeType, '='); getline(ssItem, mimeType, '=');
getline(ssItem, counterString, '='); getline(ssItem, counterString, '=');
@ -118,100 +128,113 @@ namespace kiwix {
} }
return counters; return counters;
} }
/* Get the count of articles which can be indexed/displayed */ /* Get the count of articles which can be indexed/displayed */
unsigned int Reader::getArticleCount() const { unsigned int Reader::getArticleCount() const
std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata(); {
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) { if (counterMap.empty()) {
counter = this->nsACount; counter = this->nsACount;
} else { } else {
auto it = counterMap.find("text/html"); auto it = counterMap.find("text/html");
if (it != counterMap.end()) if (it != counterMap.end()) {
counter = it->second; counter = it->second;
} }
return counter;
} }
/* Get the count of medias content in the ZIM file */ return counter;
unsigned int Reader::getMediaCount() const { }
std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata();
/* Get the count of medias content in the ZIM file */
unsigned int Reader::getMediaCount() const
{
std::map<const std::string, unsigned int> counterMap
= this->parseCounterMetadata();
unsigned int counter = 0; unsigned int counter = 0;
if (counterMap.empty()) if (counterMap.empty()) {
counter = this->nsICount; counter = this->nsICount;
else { } else {
auto it = counterMap.find("image/jpeg"); auto it = counterMap.find("image/jpeg");
if (it != counterMap.end()) if (it != counterMap.end()) {
counter += it->second; counter += it->second;
}
it = counterMap.find("image/gif"); it = counterMap.find("image/gif");
if (it != counterMap.end()) if (it != counterMap.end()) {
counter += it->second; counter += it->second;
}
it = counterMap.find("image/png"); it = counterMap.find("image/png");
if (it != counterMap.end()) if (it != counterMap.end()) {
counter += it->second; counter += it->second;
} }
}
return counter; return counter;
} }
/* Get the total of all items of a ZIM file, redirects included */ /* Get the total of all items of a ZIM file, redirects included */
unsigned int Reader::getGlobalCount() const { unsigned int Reader::getGlobalCount() const
{
return this->zimFileHandler->getCountArticles(); return this->zimFileHandler->getCountArticles();
} }
/* Return the UID of the ZIM file */ /* Return the UID of the ZIM file */
string Reader::getId() const { string Reader::getId() const
{
std::ostringstream s; std::ostringstream s;
s << this->zimFileHandler->getFileheader().getUuid(); s << this->zimFileHandler->getFileheader().getUuid();
return s.str(); return s.str();
} }
/* Return a page url from a title */ /* Return a page url from a title */
bool Reader::getPageUrlFromTitle(const string &title, string &url) const { bool Reader::getPageUrlFromTitle(const string& title, string& url) const
{
/* Extract the content from the zim file */ /* Extract the content from the zim file */
zim::Article article = this->zimFileHandler->getArticleByTitle('A', title); zim::Article article = this->zimFileHandler->getArticleByTitle('A', title);
if ( ! article.good() ) if (!article.good()) {
{
return false; return false;
} }
unsigned int loopCounter = 0; unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++ < 42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
url = article.getLongUrl(); url = article.getLongUrl();
return true; return true;
} }
/* Return an URL from a title*/ /* Return an URL from a title */
string Reader::getRandomPageUrl() const { string Reader::getRandomPageUrl() const
{
zim::Article article; zim::Article article;
zim::size_type idx; zim::size_type idx;
std::string mainPageUrl = this->getMainPageUrl(); std::string mainPageUrl = this->getMainPageUrl();
do { do {
idx = this->firstArticleOffset + idx = this->firstArticleOffset
(zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1)
* this->nsACount);
article = zimFileHandler->getArticle(idx); article = zimFileHandler->getArticle(idx);
} while (article.getLongUrl() == mainPageUrl); } while (article.getLongUrl() == mainPageUrl);
return article.getLongUrl(); return article.getLongUrl();
} }
/* Return the welcome page URL */ /* Return the welcome page URL */
string Reader::getMainPageUrl() const { string Reader::getMainPageUrl() const
{
string url = ""; string url = "";
if (this->zimFileHandler->getFileheader().hasMainPage()) { if (this->zimFileHandler->getFileheader().hasMainPage()) {
zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); zim::Article article = zimFileHandler->getArticle(
this->zimFileHandler->getFileheader().getMainPage());
url = article.getLongUrl(); url = article.getLongUrl();
if (url.empty()) { if (url.empty()) {
@ -222,47 +245,44 @@ namespace kiwix {
} }
return url; return url;
} }
bool Reader::getFavicon(string &content, string &mimeType) const { bool Reader::getFavicon(string& content, string& mimeType) const
{
unsigned int contentLength = 0; unsigned int contentLength = 0;
this->getContentByUrl( "/-/favicon.png", content, this->getContentByUrl("/-/favicon.png", content, contentLength, mimeType);
contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/I/favicon.png", content, this->getContentByUrl("/I/favicon.png", content, contentLength, mimeType);
contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/I/favicon", content, this->getContentByUrl("/I/favicon", content, contentLength, mimeType);
contentLength, mimeType);
if (content.empty()) { if (content.empty()) {
this->getContentByUrl( "/-/favicon", content, this->getContentByUrl("/-/favicon", content, contentLength, mimeType);
contentLength, mimeType);
} }
} }
} }
return content.empty() ? false : true; return content.empty() ? false : true;
} }
string Reader::getZimFilePath() const { string Reader::getZimFilePath() const
{
return this->zimFilePath; return this->zimFilePath;
} }
/* Return a metatag value */
/* Return a metatag value */ bool Reader::getMetatag(const string& name, string& value) const
bool Reader::getMetatag(const string &name, string &value) const { {
unsigned int contentLength = 0; unsigned int contentLength = 0;
string contentType = ""; string contentType = "";
return this->getContentByUrl( "/M/" + name, value, return this->getContentByUrl("/M/" + name, value, contentLength, contentType);
contentLength, contentType); }
}
string Reader::getTitle() const { string Reader::getTitle() const
{
string value; string value;
this->getMetatag("Title", value); this->getMetatag("Title", value);
if (value.empty()) { if (value.empty()) {
@ -272,21 +292,24 @@ namespace kiwix {
value = value.substr(0, pos); value = value.substr(0, pos);
} }
return value; return value;
} }
string Reader::getName() const { string Reader::getName() const
{
string value; string value;
this->getMetatag("Name", value); this->getMetatag("Name", value);
return value; return value;
} }
string Reader::getTags() const { string Reader::getTags() const
{
string value; string value;
this->getMetatag("Tags", value); this->getMetatag("Tags", value);
return value; return value;
} }
string Reader::getDescription() const{ string Reader::getDescription() const
{
string value; string value;
this->getMetatag("Description", value); this->getMetatag("Description", value);
@ -296,82 +319,90 @@ namespace kiwix {
} }
return value; return value;
} }
string Reader::getLanguage() const { string Reader::getLanguage() const
{
string value; string value;
this->getMetatag("Language", value); this->getMetatag("Language", value);
return value; return value;
} }
string Reader::getDate() const { string Reader::getDate() const
{
string value; string value;
this->getMetatag("Date", value); this->getMetatag("Date", value);
return value; return value;
} }
string Reader::getCreator() const { string Reader::getCreator() const
{
string value; string value;
this->getMetatag("Creator", value); this->getMetatag("Creator", value);
return value; return value;
} }
string Reader::getPublisher() const { string Reader::getPublisher() const
{
string value; string value;
this->getMetatag("Publisher", value); this->getMetatag("Publisher", value);
return value; return value;
} }
string Reader::getOrigId() const { string Reader::getOrigId() const
{
string value; string value;
this->getMetatag("startfileuid", value); this->getMetatag("startfileuid", value);
if(value.empty()) if (value.empty()) {
return ""; return "";
std::string id=value; }
std::string id = value;
std::string origID; std::string origID;
std::string temp=""; std::string temp = "";
unsigned int k=0; unsigned int k = 0;
char tempArray[16]=""; char tempArray[16] = "";
for(unsigned int i=0; i<id.size(); i++) for (unsigned int i = 0; i < id.size(); i++) {
{ if (id[i] == '\n') {
if(id[i]=='\n') tempArray[k] = atoi(temp.c_str());
{ temp = "";
tempArray[k]= atoi(temp.c_str());
temp="";
k++; k++;
} } else {
else temp += id[i];
{
temp+=id[i];
} }
} }
origID=hexUUID(tempArray); origID = hexUUID(tempArray);
return origID; return origID;
} }
/* Return the first page URL */ /* Return the first page URL */
string Reader::getFirstPageUrl() const { string Reader::getFirstPageUrl() const
{
zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A');
zim::Article article = zimFileHandler->getArticle(firstPageOffset); zim::Article article = zimFileHandler->getArticle(firstPageOffset);
return article.getLongUrl(); return article.getLongUrl();
} }
bool Reader::parseUrl(const string &url, char *ns, string &title) const { bool Reader::parseUrl(const string& url, char* ns, string& title) const
{
/* Offset to visit the url */ /* Offset to visit the url */
unsigned int urlLength = url.size(); unsigned int urlLength = url.size();
unsigned int offset = 0; unsigned int offset = 0;
/* Ignore the '/' */ /* Ignore the '/' */
while ((offset < urlLength) && (url[offset] == '/')) offset++; while ((offset < urlLength) && (url[offset] == '/')) {
offset++;
}
/* Get namespace */ /* Get namespace */
while ((offset < urlLength) && (url[offset] != '/')) { while ((offset < urlLength) && (url[offset] != '/')) {
*ns= url[offset]; *ns = url[offset];
offset++; offset++;
} }
/* Ignore the '/' */ /* Ignore the '/' */
while ((offset < urlLength) && (url[offset] == '/')) offset++; while ((offset < urlLength) && (url[offset] == '/')) {
offset++;
}
/* Get content title */ /* Get content title */
unsigned int titleOffset = offset; unsigned int titleOffset = offset;
@ -383,10 +414,12 @@ namespace kiwix {
title = url.substr(titleOffset, offset - titleOffset); title = url.substr(titleOffset, offset - titleOffset);
return true; return true;
} }
/* Return article by url */ /* Return article by url */
bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const { bool Reader::getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const
{
if (this->zimFileHandler == NULL) { if (this->zimFileHandler == NULL) {
return false; return false;
} }
@ -404,10 +437,11 @@ namespace kiwix {
/* Extract the content from the zim file */ /* Extract the content from the zim file */
article = zimFileHandler->getArticle(ns, urlStr); article = zimFileHandler->getArticle(ns, urlStr);
return article.good(); return article.good();
} }
/* Return the mimeType without the content */ /* Return the mimeType without the content */
bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const { bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const
{
if (this->zimFileHandler == NULL) { if (this->zimFileHandler == NULL) {
return false; return false;
} }
@ -416,8 +450,9 @@ namespace kiwix {
if (this->getArticleObjectByDecodedUrl(url, article)) { if (this->getArticleObjectByDecodedUrl(url, article)) {
try { try {
mimeType = article.getMimeType(); mimeType = article.getMimeType();
} catch (exception &e) { } catch (exception& e) {
cerr << "Unable to get the mimetype for " << url << ":" << e.what() << endl; cerr << "Unable to get the mimetype for " << url << ":" << e.what()
<< endl;
mimeType = "application/octet-stream"; mimeType = "application/octet-stream";
} }
return true; return true;
@ -425,52 +460,86 @@ namespace kiwix {
mimeType = ""; mimeType = "";
return false; return false;
} }
} }
/* Get a content from a zim file */ /* Get a content from a zim file */
bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { bool Reader::getContentByUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const
{
return this->getContentByEncodedUrl(url, content, contentLength, contentType); return this->getContentByEncodedUrl(url, content, contentLength, contentType);
} }
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { bool Reader::getContentByEncodedUrl(const string& url,
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl); string& content,
} unsigned int& contentLength,
string& contentType,
string& baseUrl) const
{
return this->getContentByDecodedUrl(
kiwix::urlDecode(url), content, contentLength, contentType, baseUrl);
}
bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { bool Reader::getContentByEncodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl; std::string stubRedirectUrl;
return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); return this->getContentByEncodedUrl(kiwix::urlDecode(url),
} content,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { bool Reader::getContentByDecodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const
{
std::string stubRedirectUrl; std::string stubRedirectUrl;
return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); return this->getContentByDecodedUrl(kiwix::urlDecode(url),
} content,
contentLength,
contentType,
stubRedirectUrl);
}
bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { bool Reader::getContentByDecodedUrl(const string& url,
content=""; string& content,
contentType=""; unsigned int& contentLength,
string& contentType,
string& baseUrl) const
{
content = "";
contentType = "";
contentLength = 0; contentLength = 0;
zim::Article article; zim::Article article;
if ( ! this->getArticleObjectByDecodedUrl(url, article)) { if (!this->getArticleObjectByDecodedUrl(url, article)) {
return false; return false;
} }
/* If redirect */ /* If redirect */
unsigned int loopCounter = 0; unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++ < 42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
if (loopCounter < 42) { if (loopCounter < 42) {
/* Compute base url (might be different from the url if redirects */ /* Compute base url (might be different from the url if redirects */
baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl(); baseUrl
= "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl();
/* Get the content mime-type */ /* Get the content mime-type */
try { try {
contentType = string(article.getMimeType().data(), article.getMimeType().size()); contentType
} catch (exception &e) { = string(article.getMimeType().data(), article.getMimeType().size());
cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl; } catch (exception& e) {
cerr << "Unable to get the mimetype for " << baseUrl << ":" << e.what()
<< endl;
contentType = "application/octet-stream"; contentType = "application/octet-stream";
} }
@ -479,39 +548,48 @@ namespace kiwix {
} }
/* Try to set a stub HTML header/footer if necesssary */ /* Try to set a stub HTML header/footer if necesssary */
if (contentType.find("text/html") != string::npos && if (contentType.find("text/html") != string::npos
content.find("<body") == std::string::npos && && content.find("<body") == std::string::npos
content.find("<BODY") == std::string::npos) { && content.find("<BODY") == std::string::npos) {
content = "<html><head><title>" + article.getTitle() + "</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" /></head><body>" + content + "</body></html>"; content = "<html><head><title>" + article.getTitle() +
"</title><meta http-equiv=\"Content-Type\" content=\"text/html; "
"charset=utf-8\" /></head><body>" +
content + "</body></html>";
} }
/* Get the data length */ /* Get the data length */
contentLength = article.getArticleSize(); contentLength = article.getArticleSize();
return true; return true;
} }
/* Check if an article exists */ /* Check if an article exists */
bool Reader::urlExists(const string &url) const { bool Reader::urlExists(const string& url) const
{
char ns = 0; char ns = 0;
string titleStr; string titleStr;
this->parseUrl(url, &ns, titleStr); this->parseUrl(url, &ns, titleStr);
titleStr = "/" + titleStr; titleStr = "/" + titleStr;
zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr); zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr);
return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr; return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr;
} }
/* Does the ZIM file has a fulltext index */ /* Does the ZIM file has a fulltext index */
bool Reader::hasFulltextIndex() const { bool Reader::hasFulltextIndex() const
{
return this->urlExists("/Z/fulltextIndex/xapian"); return this->urlExists("/Z/fulltextIndex/xapian");
} }
/* Search titles by prefix */ /* Search titles by prefix */
bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { bool Reader::searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset)
{
bool retVal = false; bool retVal = false;
zim::File::const_iterator articleItr; zim::File::const_iterator articleItr;
/* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */ /* Reset the suggestions otherwise check if the suggestions number is less
* than the suggestionsCount */
if (reset) { if (reset) {
this->suggestions.clear(); this->suggestions.clear();
this->suggestionsOffset = this->suggestions.begin(); this->suggestionsOffset = this->suggestions.begin();
@ -527,29 +605,31 @@ namespace kiwix {
} }
for (articleItr = zimFileHandler->findByTitle('A', prefix); for (articleItr = zimFileHandler->findByTitle('A', prefix);
articleItr != zimFileHandler->end() && articleItr != zimFileHandler->end()
articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0
this->suggestions.size() < suggestionsCount ; && this->suggestions.size() < suggestionsCount;
++articleItr) { ++articleItr) {
/* Extract the interesting part of article title & url */ /* Extract the interesting part of article title & url */
std::string normalizedArticleTitle = kiwix::normalize(articleItr->getTitle()); std::string normalizedArticleTitle
std::string articleFinalUrl = "/A/"+articleItr->getUrl(); = kiwix::normalize(articleItr->getTitle());
std::string articleFinalUrl = "/A/" + articleItr->getUrl();
if (articleItr->isRedirect()) { if (articleItr->isRedirect()) {
zim::Article article = *articleItr; zim::Article article = *articleItr;
unsigned int loopCounter = 0; unsigned int loopCounter = 0;
while (article.isRedirect() && loopCounter++<42) { while (article.isRedirect() && loopCounter++ < 42) {
article = article.getRedirectArticle(); article = article.getRedirectArticle();
} }
articleFinalUrl = "/A/"+article.getUrl(); articleFinalUrl = "/A/" + article.getUrl();
} }
/* Go through all already found suggestions and skip if this /* Go through all already found suggestions and skip if this
article is already in the suggestions list (with an other article is already in the suggestions list (with an other
title) */ title) */
bool insert = true; bool insert = true;
std::vector< std::vector<std::string> >::iterator suggestionItr; std::vector<std::vector<std::string>>::iterator suggestionItr;
for (suggestionItr = this->suggestions.begin(); suggestionItr != this->suggestions.end(); suggestionItr++) { for (suggestionItr = this->suggestions.begin();
suggestionItr != this->suggestions.end();
suggestionItr++) {
int result = normalizedArticleTitle.compare((*suggestionItr)[2]); int result = normalizedArticleTitle.compare((*suggestionItr)[2]);
if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) { if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) {
insert = false; insert = false;
@ -576,19 +656,23 @@ namespace kiwix {
this->suggestionsOffset = this->suggestions.begin(); this->suggestionsOffset = this->suggestions.begin();
return retVal; return retVal;
} }
std::vector<std::string> Reader::getTitleVariants(const std::string &title) const { std::vector<std::string> Reader::getTitleVariants(
const std::string& title) const
{
std::vector<std::string> variants; std::vector<std::string> variants;
variants.push_back(title); variants.push_back(title);
variants.push_back(kiwix::ucFirst(title)); variants.push_back(kiwix::ucFirst(title));
variants.push_back(kiwix::lcFirst(title)); variants.push_back(kiwix::lcFirst(title));
variants.push_back(kiwix::toTitle(title)); variants.push_back(kiwix::toTitle(title));
return variants; return variants;
} }
/* Try also a few variations of the prefix to have better results */ /* Try also a few variations of the prefix to have better results */
bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) { bool Reader::searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount)
{
std::vector<std::string> variants = this->getTitleVariants(prefix); std::vector<std::string> variants = this->getTitleVariants(prefix);
bool retVal; bool retVal;
@ -597,14 +681,16 @@ namespace kiwix {
for (std::vector<std::string>::iterator variantsItr = variants.begin(); for (std::vector<std::string>::iterator variantsItr = variants.begin();
variantsItr != variants.end(); variantsItr != variants.end();
variantsItr++) { variantsItr++) {
retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false) || retVal; retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false)
|| retVal;
} }
return retVal; return retVal;
} }
/* Get next suggestion */ /* Get next suggestion */
bool Reader::getNextSuggestion(string &title) { bool Reader::getNextSuggestion(string& title)
{
if (this->suggestionsOffset != this->suggestions.end()) { if (this->suggestionsOffset != this->suggestions.end()) {
/* title */ /* title */
title = (*(this->suggestionsOffset))[0]; title = (*(this->suggestionsOffset))[0];
@ -616,9 +702,10 @@ namespace kiwix {
} }
return false; return false;
} }
bool Reader::getNextSuggestion(string &title, string &url) { bool Reader::getNextSuggestion(string& title, string& url)
{
if (this->suggestionsOffset != this->suggestions.end()) { if (this->suggestionsOffset != this->suggestions.end()) {
/* title */ /* title */
title = (*(this->suggestionsOffset))[0]; title = (*(this->suggestionsOffset))[0];
@ -631,29 +718,33 @@ namespace kiwix {
} }
return false; return false;
} }
/* Check if the file has as checksum */ /* Check if the file has as checksum */
bool Reader::canCheckIntegrity() const { bool Reader::canCheckIntegrity() const
{
return this->zimFileHandler->getChecksum() != ""; return this->zimFileHandler->getChecksum() != "";
} }
/* Return true if corrupted, false otherwise */ /* Return true if corrupted, false otherwise */
bool Reader::isCorrupted() const { bool Reader::isCorrupted() const
{
try { try {
if (this->zimFileHandler->verify() == true) if (this->zimFileHandler->verify() == true) {
return false; return false;
} catch (exception &e) { }
} catch (exception& e) {
cerr << e.what() << endl; cerr << e.what() << endl;
return true; return true;
} }
return true; return true;
} }
/* Return the file size, works also for splitted files */ /* Return the file size, works also for splitted files */
unsigned int Reader::getFileSize() const { unsigned int Reader::getFileSize() const
zim::File *file = this->getZimFileHandler(); {
zim::File* file = this->getZimFileHandler();
zim::offset_type size = 0; zim::offset_type size = 0;
if (file != NULL) { if (file != NULL) {
@ -661,5 +752,5 @@ namespace kiwix {
} }
return (size / 1024); return (size / 1024);
} }
} }

View File

@ -18,9 +18,9 @@
*/ */
#include "searcher.h" #include "searcher.h"
#include "xapianSearcher.h"
#include "reader.h"
#include "kiwixlib-resources.h" #include "kiwixlib-resources.h"
#include "reader.h"
#include "xapianSearcher.h"
#include <zim/search.h> #include <zim/search.h>
@ -33,13 +33,13 @@
using namespace CTPP; using namespace CTPP;
#endif #endif
namespace kiwix
namespace kiwix { {
class _Result : public Result
class _Result : public Result { {
public: public:
_Result(Searcher* searcher, zim::Search::iterator& iterator); _Result(Searcher* searcher, zim::Search::iterator& iterator);
virtual ~_Result() {}; virtual ~_Result(){};
virtual std::string get_url(); virtual std::string get_url();
virtual std::string get_title(); virtual std::string get_title();
@ -51,30 +51,28 @@ namespace kiwix {
private: private:
Searcher* searcher; Searcher* searcher;
zim::Search::iterator iterator; zim::Search::iterator iterator;
}; };
struct SearcherInternal { struct SearcherInternal {
const zim::Search *_search; const zim::Search* _search;
XapianSearcher *_xapianSearcher; XapianSearcher* _xapianSearcher;
zim::Search::iterator current_iterator; zim::Search::iterator current_iterator;
SearcherInternal() : _search(NULL), _xapianSearcher(NULL) {}
SearcherInternal() : ~SearcherInternal()
_search(NULL), {
_xapianSearcher(NULL) if (_search != NULL) {
{}
~SearcherInternal() {
if ( _search != NULL )
delete _search; delete _search;
if ( _xapianSearcher != NULL ) }
if (_xapianSearcher != NULL) {
delete _xapianSearcher; delete _xapianSearcher;
} }
}
};
}; /* Constructor */
Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
/* Constructor */ : reader(reader),
Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
reader(reader),
internal(new SearcherInternal()), internal(new SearcherInternal()),
searchPattern(""), searchPattern(""),
protocolPrefix("zim://"), protocolPrefix("zim://"),
@ -83,22 +81,25 @@ namespace kiwix {
estimatedResultCount(0), estimatedResultCount(0),
resultStart(0), resultStart(0),
resultEnd(0) resultEnd(0)
{ {
template_ct2 = RESOURCE::results_ct2; template_ct2 = RESOURCE::results_ct2;
loadICUExternalTables(); loadICUExternalTables();
if ( !reader || !reader->hasFulltextIndex() ) { if (!reader || !reader->hasFulltextIndex()) {
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader); internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
} }
} }
/* Destructor */ /* Destructor */
Searcher::~Searcher() { Searcher::~Searcher()
{
delete internal; delete internal;
} }
/* Search strings in the database */
/* Search strings in the database */ void Searcher::search(std::string& search,
void Searcher::search(std::string &search, unsigned int resultStart, unsigned int resultStart,
unsigned int resultEnd, const bool verbose) { unsigned int resultEnd,
const bool verbose)
{
this->reset(); this->reset();
if (verbose == true) { if (verbose == true) {
@ -114,7 +115,6 @@ namespace kiwix {
/* Try to find results */ /* Try to find results */
if (resultStart != resultEnd) { if (resultStart != resultEnd) {
/* Avoid big researches */ /* Avoid big researches */
this->resultCountPerPage = resultEnd - resultStart; this->resultCountPerPage = resultEnd - resultStart;
if (this->resultCountPerPage > 70) { if (this->resultCountPerPage > 70) {
@ -127,29 +127,34 @@ namespace kiwix {
this->resultStart = resultStart; this->resultStart = resultStart;
this->resultEnd = resultEnd; this->resultEnd = resultEnd;
string unaccentedSearch = removeAccents(search); string unaccentedSearch = removeAccents(search);
if ( internal->_xapianSearcher ) { if (internal->_xapianSearcher) {
internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose); internal->_xapianSearcher->searchInIndex(
this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated(); unaccentedSearch, resultStart, resultEnd, verbose);
this->estimatedResultCount
= internal->_xapianSearcher->results.get_matches_estimated();
} else { } else {
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd); internal->_search = this->reader->getZimFileHandler()->search(
unaccentedSearch, resultStart, resultEnd);
internal->current_iterator = internal->_search->begin(); internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated(); this->estimatedResultCount = internal->_search->get_matches_estimated();
} }
} }
return; return;
} }
void Searcher::restart_search() { void Searcher::restart_search()
if ( internal->_xapianSearcher ) { {
if (internal->_xapianSearcher) {
internal->_xapianSearcher->restart_search(); internal->_xapianSearcher->restart_search();
} else { } else {
internal->current_iterator = internal->_search->begin(); internal->current_iterator = internal->_search->begin();
} }
} }
Result* Searcher::getNextResult() { Result* Searcher::getNextResult()
if ( internal->_xapianSearcher ) { {
if (internal->_xapianSearcher) {
return internal->_xapianSearcher->getNextResult(); return internal->_xapianSearcher->getNextResult();
} else if (internal->current_iterator != internal->_search->end()) { } else if (internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(this, internal->current_iterator); Result* result = new _Result(this, internal->current_iterator);
@ -157,69 +162,72 @@ namespace kiwix {
return result; return result;
} }
return NULL; return NULL;
} }
/* Reset the results */
/* Reset the results */ void Searcher::reset()
void Searcher::reset() { {
this->estimatedResultCount = 0; this->estimatedResultCount = 0;
this->searchPattern = ""; this->searchPattern = "";
return; return;
} }
/* Return the result count estimation */ /* Return the result count estimation */
unsigned int Searcher::getEstimatedResultCount() { unsigned int Searcher::getEstimatedResultCount()
{
return this->estimatedResultCount; return this->estimatedResultCount;
} }
bool Searcher::setProtocolPrefix(const std::string prefix) { bool Searcher::setProtocolPrefix(const std::string prefix)
{
this->protocolPrefix = prefix; this->protocolPrefix = prefix;
return true; return true;
} }
bool Searcher::setSearchProtocolPrefix(const std::string prefix) { bool Searcher::setSearchProtocolPrefix(const std::string prefix)
{
this->searchProtocolPrefix = prefix; this->searchProtocolPrefix = prefix;
return true; return true;
} }
void Searcher::setContentHumanReadableId(const string &contentHumanReadableId) { void Searcher::setContentHumanReadableId(const string& contentHumanReadableId)
{
this->contentHumanReadableId = contentHumanReadableId; this->contentHumanReadableId = contentHumanReadableId;
} }
_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator): _Result::_Result(Searcher* searcher, zim::Search::iterator& iterator)
searcher(searcher), : searcher(searcher), iterator(iterator)
iterator(iterator) {
{ }
}
std::string _Result::get_url() { std::string _Result::get_url()
{
return iterator.get_url(); return iterator.get_url();
} }
std::string _Result::get_title()
std::string _Result::get_title() { {
return iterator.get_title(); return iterator.get_title();
} }
int _Result::get_score()
int _Result::get_score() { {
return iterator.get_score(); return iterator.get_score();
} }
std::string _Result::get_snippet()
std::string _Result::get_snippet() { {
return iterator.get_snippet(); return iterator.get_snippet();
} }
int _Result::get_size()
int _Result::get_size() { {
return iterator.get_size(); return iterator.get_size();
} }
int _Result::get_wordCount()
int _Result::get_wordCount() { {
return iterator.get_wordCount(); return iterator.get_wordCount();
} }
#ifdef ENABLE_CTPP2 #ifdef ENABLE_CTPP2
string Searcher::getHtml() { string Searcher::getHtml()
{
SimpleVM oSimpleVM; SimpleVM oSimpleVM;
// Fill data // Fill data
@ -227,18 +235,20 @@ namespace kiwix {
CDT resultsCDT(CDT::ARRAY_VAL); CDT resultsCDT(CDT::ARRAY_VAL);
this->restart_search(); this->restart_search();
Result * p_result = NULL; Result* p_result = NULL;
while ( (p_result = this->getNextResult()) ) { while ((p_result = this->getNextResult())) {
CDT result; CDT result;
result["title"] = p_result->get_title(); result["title"] = p_result->get_title();
result["url"] = p_result->get_url(); result["url"] = p_result->get_url();
result["snippet"] = p_result->get_snippet(); result["snippet"] = p_result->get_snippet();
if (p_result->get_size() >= 0) if (p_result->get_size() >= 0) {
result["size"] = kiwix::beautifyInteger(p_result->get_size()); result["size"] = kiwix::beautifyInteger(p_result->get_size());
}
if (p_result->get_wordCount() >= 0) if (p_result->get_wordCount() >= 0) {
result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount()); result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
}
resultsCDT.PushBack(result); resultsCDT.PushBack(result);
delete p_result; delete p_result;
@ -249,22 +259,28 @@ namespace kiwix {
// pages // pages
CDT pagesCDT(CDT::ARRAY_VAL); CDT pagesCDT(CDT::ARRAY_VAL);
unsigned int pageStart = this->resultStart / this->resultCountPerPage >= 5 ? this->resultStart / this->resultCountPerPage - 4 : 0; unsigned int pageStart
unsigned int pageCount = this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart; = this->resultStart / this->resultCountPerPage >= 5
? this->resultStart / this->resultCountPerPage - 4
: 0;
unsigned int pageCount
= this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart;
if (pageCount > 10) if (pageCount > 10) {
pageCount = 10; pageCount = 10;
else if (pageCount == 1) } else if (pageCount == 1) {
pageCount = 0; pageCount = 0;
}
for (unsigned int i=pageStart; i<pageStart+pageCount; i++) { for (unsigned int i = pageStart; i < pageStart + pageCount; i++) {
CDT page; CDT page;
page["label"] = i + 1; page["label"] = i + 1;
page["start"] = i * this->resultCountPerPage; page["start"] = i * this->resultCountPerPage;
page["end"] = (i+1) * this->resultCountPerPage; page["end"] = (i + 1) * this->resultCountPerPage;
if (i * this->resultCountPerPage == this->resultStart) if (i * this->resultCountPerPage == this->resultStart) {
page["selected"] = true; page["selected"] = true;
}
pagesCDT.PushBack(page); pagesCDT.PushBack(page);
} }
@ -274,9 +290,14 @@ namespace kiwix {
oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern); oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern);
oData["searchPatternEncoded"] = urlEncode(this->searchPattern); oData["searchPatternEncoded"] = urlEncode(this->searchPattern);
oData["resultStart"] = this->resultStart + 1; oData["resultStart"] = this->resultStart + 1;
oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd); oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount
? this->estimatedResultCount
: this->resultEnd);
oData["resultRange"] = this->resultCountPerPage; oData["resultRange"] = this->resultCountPerPage;
oData["resultLastPageStart"] = this->estimatedResultCount > this->resultCountPerPage ? this->estimatedResultCount - this->resultCountPerPage : 0; oData["resultLastPageStart"]
= this->estimatedResultCount > this->resultCountPerPage
? this->estimatedResultCount - this->resultCountPerPage
: 0;
oData["protocolPrefix"] = this->protocolPrefix; oData["protocolPrefix"] = this->protocolPrefix;
oData["searchProtocolPrefix"] = this->searchProtocolPrefix; oData["searchProtocolPrefix"] = this->searchProtocolPrefix;
oData["contentId"] = this->contentHumanReadableId; oData["contentId"] = this->contentHumanReadableId;
@ -292,9 +313,7 @@ namespace kiwix {
oSimpleVM.Run(oData, oLoader, sResult, oLogger); oSimpleVM.Run(oData, oLoader, sResult, oLogger);
return sResult; return sResult;
}
}
#endif #endif
} }

View File

@ -18,58 +18,61 @@
*/ */
#include "xapianSearcher.h" #include "xapianSearcher.h"
#include "xapian/myhtmlparse.h" #include <sys/types.h>
#include <zim/zim.h> #include <unicode/locid.h>
#include <zim/file.h> #include <unistd.h>
#include <zim/article.h> #include <zim/article.h>
#include <zim/error.h> #include <zim/error.h>
#include <sys/types.h> #include <zim/file.h>
#include <unistd.h> #include <zim/zim.h>
#include <unicode/locid.h> #include "xapian/myhtmlparse.h"
#include <vector> #include <vector>
namespace kiwix { namespace kiwix
{
std::map<std::string, int> read_valuesmap(const std::string &s) { std::map<std::string, int> read_valuesmap(const std::string& s)
{
std::map<std::string, int> result; std::map<std::string, int> result;
std::vector<std::string> elems = split(s, ";"); std::vector<std::string> elems = split(s, ";");
for(std::vector<std::string>::iterator elem = elems.begin(); for (std::vector<std::string>::iterator elem = elems.begin();
elem != elems.end(); elem != elems.end();
elem++) elem++) {
{
std::vector<std::string> tmp_elems = split(*elem, ":"); std::vector<std::string> tmp_elems = split(*elem, ":");
result.insert( std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())) ); result.insert(
std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())));
} }
return result; return result;
} }
/* Constructor */ /* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) XapianSearcher::XapianSearcher(const string& xapianDirectoryPath,
Reader* reader)
: reader(reader) : reader(reader)
{ {
this->openIndex(xapianDirectoryPath); this->openIndex(xapianDirectoryPath);
} }
/* Open Xapian readable database */ /* Open Xapian readable database */
void XapianSearcher::openIndex(const string &directoryPath) { void XapianSearcher::openIndex(const string& directoryPath)
{
this->readableDatabase = Xapian::Database(directoryPath); this->readableDatabase = Xapian::Database(directoryPath);
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); this->valuesmap
= read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language"); this->language = this->readableDatabase.get_metadata("language");
this->stopwords = this->readableDatabase.get_metadata("stopwords"); this->stopwords = this->readableDatabase.get_metadata("stopwords");
setup_queryParser(); setup_queryParser();
} }
/* Close Xapian writable database */ /* Close Xapian writable database */
void XapianSearcher::closeIndex() { void XapianSearcher::closeIndex()
{
return; return;
} }
void XapianSearcher::setup_queryParser()
void XapianSearcher::setup_queryParser() {
{
queryParser.set_database(readableDatabase); queryParser.set_database(readableDatabase);
if ( ! language.empty() ) if (!language.empty()) {
{
/* Build ICU Local object to retrieve ISO-639 language code (from /* Build ICU Local object to retrieve ISO-639 language code (from
ISO-639-3) */ ISO-639-3) */
icu::Locale languageLocale(language.c_str()); icu::Locale languageLocale(language.c_str());
@ -80,12 +83,12 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
queryParser.set_stemmer(stemmer); queryParser.set_stemmer(stemmer);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
} catch (...) { } catch (...) {
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl; std::cout << "No steemming for language '" << languageLocale.getLanguage()
<< "'" << std::endl;
} }
} }
if ( ! stopwords.empty() ) if (!stopwords.empty()) {
{
std::string stopWord; std::string stopWord;
std::istringstream file(this->stopwords); std::istringstream file(this->stopwords);
while (std::getline(file, stopWord, '\n')) { while (std::getline(file, stopWord, '\n')) {
@ -93,11 +96,14 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
} }
queryParser.set_stopper(&(this->stopper)); queryParser.set_stopper(&(this->stopper));
} }
} }
/* Search strings in the database */ /* Search strings in the database */
void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart, void XapianSearcher::searchInIndex(string& search,
const unsigned int resultEnd, const bool verbose) { const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose)
{
/* Create the query */ /* Create the query */
Xapian::Query query = queryParser.parse_query(search); Xapian::Query query = queryParser.parse_query(search);
@ -108,66 +114,65 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
/* Get the results */ /* Get the results */
this->results = enquire.get_mset(resultStart, resultEnd - resultStart); this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
this->current_result = this->results.begin(); this->current_result = this->results.begin();
} }
/* Get next result */ /* Get next result */
Result* XapianSearcher::getNextResult() { Result* XapianSearcher::getNextResult()
{
if (this->current_result != this->results.end()) { if (this->current_result != this->results.end()) {
XapianResult* result = new XapianResult(this, this->current_result); XapianResult* result = new XapianResult(this, this->current_result);
this->current_result++; this->current_result++;
return result; return result;
} }
return NULL; return NULL;
} }
void XapianSearcher::restart_search() { void XapianSearcher::restart_search()
{
this->current_result = this->results.begin(); this->current_result = this->results.begin();
} }
XapianResult::XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator): XapianResult::XapianResult(XapianSearcher* searcher,
searcher(searcher), Xapian::MSetIterator& iterator)
iterator(iterator), : searcher(searcher), iterator(iterator), document(iterator.get_document())
document(iterator.get_document()) {
{ }
}
std::string XapianResult::get_url() { std::string XapianResult::get_url()
{
return document.get_data(); return document.get_data();
} }
std::string XapianResult::get_title()
std::string XapianResult::get_title() { {
if ( searcher->valuesmap.empty() ) if (searcher->valuesmap.empty()) {
{
/* This is the old legacy version. Guess and try */ /* This is the old legacy version. Guess and try */
return document.get_value(0); return document.get_value(0);
} } else if (searcher->valuesmap.find("title") != searcher->valuesmap.end()) {
else if ( searcher->valuesmap.find("title") != searcher->valuesmap.end() )
{
return document.get_value(searcher->valuesmap["title"]); return document.get_value(searcher->valuesmap["title"]);
} }
return ""; return "";
} }
int XapianResult::get_score() { int XapianResult::get_score()
{
return iterator.get_percent(); return iterator.get_percent();
} }
std::string XapianResult::get_snippet()
std::string XapianResult::get_snippet() { {
if ( searcher->valuesmap.empty() ) if (searcher->valuesmap.empty()) {
{
/* This is the old legacy version. Guess and try */ /* This is the old legacy version. Guess and try */
std::string stored_snippet = document.get_value(1); std::string stored_snippet = document.get_value(1);
if ( ! stored_snippet.empty() ) if (!stored_snippet.empty()) {
return stored_snippet; return stored_snippet;
/* Let's continue here, and see if we can genenate one */
} }
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() ) /* Let's continue here, and see if we can genenate one */
{ } else if (searcher->valuesmap.find("snippet") != searcher->valuesmap.end()) {
return document.get_value(searcher->valuesmap["snippet"]); return document.get_value(searcher->valuesmap["snippet"]);
} }
/* No reader, no snippet */ /* No reader, no snippet */
if ( ! searcher->reader ) if (!searcher->reader) {
return ""; return "";
}
/* Get the content of the article to generate a snippet. /* Get the content of the article to generate a snippet.
We parse it and use the html dump to avoid remove html tags in the We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */ content and be able to nicely cut the text at random place. */
@ -175,39 +180,42 @@ std::map<std::string, int> read_valuesmap(const std::string &s) {
std::string content; std::string content;
unsigned int contentLength; unsigned int contentLength;
std::string contentType; std::string contentType;
searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType); searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
try { try {
htmlParser.parse_html(content, "UTF-8", true); htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {} } catch (...) {
}
return searcher->results.snippet(htmlParser.dump, 500); return searcher->results.snippet(htmlParser.dump, 500);
} }
int XapianResult::get_size() { int XapianResult::get_size()
if ( searcher->valuesmap.empty() ) {
{ if (searcher->valuesmap.empty()) {
/* This is the old legacy version. Guess and try */ /* This is the old legacy version. Guess and try */
return document.get_value(2).empty() == true ? -1 : atoi(document.get_value(2).c_str()); return document.get_value(2).empty() == true
} ? -1
else if ( searcher->valuesmap.find("size") != searcher->valuesmap.end() ) : atoi(document.get_value(2).c_str());
{ } else if (searcher->valuesmap.find("size") != searcher->valuesmap.end()) {
return atoi(document.get_value(searcher->valuesmap["size"]).c_str()); return atoi(document.get_value(searcher->valuesmap["size"]).c_str());
} }
/* The size is never used. Do we really want to get the content and /* The size is never used. Do we really want to get the content and
calculate the size ? */ calculate the size ? */
return -1; return -1;
} }
int XapianResult::get_wordCount() { int XapianResult::get_wordCount()
if ( searcher->valuesmap.empty() ) {
{ if (searcher->valuesmap.empty()) {
/* This is the old legacy version. Guess and try */ /* This is the old legacy version. Guess and try */
return document.get_value(3).empty() == true ? -1 : atoi(document.get_value(3).c_str()); return document.get_value(3).empty() == true
} ? -1
else if ( searcher->valuesmap.find("wordcount") != searcher->valuesmap.end() ) : atoi(document.get_value(3).c_str());
{ } else if (searcher->valuesmap.find("wordcount")
!= searcher->valuesmap.end()) {
return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str()); return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str());
} }
return -1; return -1;
} }
} // Kiwix namespace } // Kiwix namespace