From f76e9d2dbf39f75c7116e3002107aaa6d6199ad6 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Wed, 5 Jul 2017 15:21:57 +0200 Subject: [PATCH] Format all the code using clang-format. Add a script `format_code.sh` to easily format the code. --- .clang-format | 12 + format_code.sh | 36 + include/common/networkTools.h | 17 +- include/common/otherTools.h | 5 +- include/common/pathTools.h | 43 +- include/common/regexTools.h | 12 +- include/common/stringTools.h | 52 +- include/kiwix.h | 1 - include/library.h | 129 +- include/manager.h | 114 +- include/reader.h | 154 ++- include/searcher.h | 106 +- include/xapianSearcher.h | 102 +- src/android/kiwix.cpp | 229 +-- src/android/org/kiwix/kiwixlib/JNIKiwix.java | 13 +- .../org/kiwix/kiwixlib/JNIKiwixBool.java | 4 +- .../org/kiwix/kiwixlib/JNIKiwixInt.java | 5 +- .../org/kiwix/kiwixlib/JNIKiwixString.java | 4 +- src/common/networkTools.cpp | 99 +- src/common/otherTools.cpp | 7 +- src/common/pathTools.cpp | 110 +- src/common/regexTools.cpp | 33 +- src/common/stringTools.cpp | 207 +-- src/library.cpp | 227 +-- src/manager.cpp | 1078 ++++++++------- src/reader.cpp | 1229 +++++++++-------- src/searcher.cpp | 473 ++++--- src/xapianSearcher.cpp | 336 ++--- 28 files changed, 2647 insertions(+), 2190 deletions(-) create mode 100644 .clang-format create mode 100755 format_code.sh diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..d5bd238c4 --- /dev/null +++ b/.clang-format @@ -0,0 +1,12 @@ +BasedOnStyle: Google +BinPackArguments: false +BinPackParameters: false +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Linux +DerivePointerAlignment: false +SpacesInContainerLiterals: false +Standard: Cpp11 + +AllowShortFunctionsOnASingleLine: Inline +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false diff --git a/format_code.sh b/format_code.sh new file mode 100755 index 000000000..b844eec8c --- /dev/null +++ b/format_code.sh @@ -0,0 +1,36 @@ +#!/usr/bin/bash + +files=( +"include/library.h" +"include/common/stringTools.h" +"include/common/pathTools.h" +"include/common/otherTools.h" +"include/common/regexTools.h" +"include/common/networkTools.h" +"include/manager.h" +"include/reader.h" +"include/kiwix.h" +"include/xapianSearcher.h" +"include/searcher.h" +"src/library.cpp" +"src/android/kiwix.cpp" +"src/android/org/kiwix/kiwixlib/JNIKiwixBool.java" +"src/android/org/kiwix/kiwixlib/JNIKiwix.java" +"src/android/org/kiwix/kiwixlib/JNIKiwixString.java" +"src/android/org/kiwix/kiwixlib/JNIKiwixInt.java" +"src/searcher.cpp" +"src/common/pathTools.cpp" +"src/common/regexTools.cpp" +"src/common/otherTools.cpp" +"src/common/networkTools.cpp" +"src/common/stringTools.cpp" +"src/xapianSearcher.cpp" +"src/manager.cpp" +"src/reader.cpp" +) + +for i in "${files[@]}" +do + echo $i + clang-format -i -style=file $i +done diff --git a/include/common/networkTools.h b/include/common/networkTools.h index 8bf700078..6ab2e1e56 100644 --- a/include/common/networkTools.h +++ b/include/common/networkTools.h @@ -24,25 +24,26 @@ #include #include #else +#include +#include #include #include #include -#include #include #include #include -#include -#include +#include #endif #include -#include -#include #include +#include +#include -namespace kiwix { - std::map getNetworkInterfaces(); - std::string getBestPublicIp(); +namespace kiwix +{ +std::map getNetworkInterfaces(); +std::string getBestPublicIp(); } #endif diff --git a/include/common/otherTools.h b/include/common/otherTools.h index 6911049d8..9b3102f9b 100644 --- a/include/common/otherTools.h +++ b/include/common/otherTools.h @@ -26,8 +26,9 @@ #include #endif -namespace kiwix { - void sleep(unsigned int milliseconds); +namespace kiwix +{ +void sleep(unsigned int milliseconds); } #endif diff --git a/include/common/pathTools.h b/include/common/pathTools.h index 6e87e5af4..82f41e737 100644 --- a/include/common/pathTools.h +++ b/include/common/pathTools.h @@ -20,18 +20,18 @@ #ifndef KIWIX_PATHTOOLS_H #define KIWIX_PATHTOOLS_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #ifdef _WIN32 #include @@ -41,20 +41,21 @@ using namespace std; -bool isRelativePath(const string &path); +bool isRelativePath(const string& path); string computeAbsolutePath(const string path, const string relativePath); string computeRelativePath(const string path, const string absolutePath); -string removeLastPathElement(const string path, const bool removePreSeparator = false, - const bool removePostSeparator = false); -string appendToDirectory(const string &directoryPath, const string &filename); +string removeLastPathElement(const string path, + const bool removePreSeparator = false, + const bool removePostSeparator = false); +string appendToDirectory(const string& directoryPath, const string& filename); -unsigned int getFileSize(const string &path); -string getFileSizeAsString(const string &path); -bool fileExists(const string &path); -bool makeDirectory(const string &path); -bool copyFile(const string &sourcePath, const string &destPath); -string getLastPathElement(const string &path); +unsigned int getFileSize(const string& path); +string getFileSizeAsString(const string& path); +bool fileExists(const string& path); +bool makeDirectory(const string& path); +bool copyFile(const string& sourcePath, const string& destPath); +string getLastPathElement(const string& path); string getExecutablePath(); string getCurrentDirectory(); -bool writeTextFile(const string &path, const string &content); +bool writeTextFile(const string& path, const string& content); #endif diff --git a/include/common/regexTools.h b/include/common/regexTools.h index d87d3d28c..cff6aaf98 100644 --- a/include/common/regexTools.h +++ b/include/common/regexTools.h @@ -22,11 +22,15 @@ #include #include -#include #include +#include -bool matchRegex(const std::string &content, const std::string ®ex); -std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string ®ex); -std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement); +bool matchRegex(const std::string& content, const std::string& regex); +std::string replaceRegex(const std::string& content, + const std::string& replacement, + const std::string& regex); +std::string appendToFirstOccurence(const std::string& content, + const std::string regex, + const std::string& replacement); #endif diff --git a/include/common/stringTools.h b/include/common/stringTools.h index 179bfa02a..1f1449bf6 100644 --- a/include/common/stringTools.h +++ b/include/common/stringTools.h @@ -22,44 +22,46 @@ #include -#include -#include -#include #include +#include #include +#include +#include #include "pathTools.h" -namespace kiwix { - +namespace kiwix +{ #ifndef __ANDROID__ - std::string beautifyInteger(const unsigned int number); - std::string beautifyFileSize(const unsigned int number); - std::string urlEncode(const std::string &c); - void printStringInHexadecimal(const char *s); - void printStringInHexadecimal(UnicodeString s); - void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr); - std::string encodeDiples(const std::string& str); +std::string beautifyInteger(const unsigned int number); +std::string beautifyFileSize(const unsigned int number); +std::string urlEncode(const std::string& c); +void printStringInHexadecimal(const char* s); +void printStringInHexadecimal(UnicodeString s); +void stringReplacement(std::string& str, + const std::string& oldStr, + const std::string& newStr); +std::string encodeDiples(const std::string& str); #endif - std::string removeAccents(const std::string &text); - void loadICUExternalTables(); - std::string urlDecode(const std::string &c); +std::string removeAccents(const std::string& text); +void loadICUExternalTables(); +std::string urlDecode(const std::string& c); - std::vector split(const std::string&, const std::string&); - std::vector split(const char*, const char*); - std::vector split(const std::string&, const char*); - std::vector split(const char*, const std::string&); +std::vector split(const std::string&, const std::string&); +std::vector split(const char*, const char*); +std::vector split(const std::string&, const char*); +std::vector split(const char*, const std::string&); - std::string ucAll(const std::string &word); - std::string lcAll(const std::string &word); - std::string ucFirst(const std::string &word); - std::string lcFirst(const std::string &word); - std::string toTitle(const std::string &word); +std::string ucAll(const std::string& word); +std::string lcAll(const std::string& word); +std::string ucFirst(const std::string& word); +std::string lcFirst(const std::string& word); +std::string toTitle(const std::string& word); - std::string normalize(const std::string &word); +std::string normalize(const std::string& word); } #endif diff --git a/include/kiwix.h b/include/kiwix.h index a83c3a9c5..16cd27b4a 100644 --- a/include/kiwix.h +++ b/include/kiwix.h @@ -22,5 +22,4 @@ #include "library.h" - #endif \ No newline at end of file diff --git a/include/library.h b/include/library.h index 1479fb690..90b443d7c 100644 --- a/include/library.h +++ b/include/library.h @@ -22,86 +22,85 @@ #include #include -#include #include -#include #include +#include +#include -#include "common/stringTools.h" #include "common/regexTools.h" +#include "common/stringTools.h" #define KIWIX_LIBRARY_VERSION "20110515" using namespace std; -namespace kiwix { +namespace kiwix +{ +enum supportedIndexType { UNKNOWN, XAPIAN }; - enum supportedIndexType { UNKNOWN, XAPIAN }; +class Book +{ + public: + Book(); + ~Book(); - class Book { + static bool sortByLastOpen(const Book& a, const Book& b); + static bool sortByTitle(const Book& a, const Book& b); + static bool sortBySize(const Book& a, const Book& b); + static bool sortByDate(const Book& a, const Book& b); + static bool sortByCreator(const Book& a, const Book& b); + static bool sortByPublisher(const Book& a, const Book& b); + static bool sortByLanguage(const Book& a, const Book& b); + string getHumanReadableIdFromPath(); - public: - Book(); - ~Book(); + string id; + string path; + string pathAbsolute; + string last; + string indexPath; + string indexPathAbsolute; + supportedIndexType indexType; + string title; + string description; + string language; + string creator; + string publisher; + string date; + string url; + string name; + string tags; + string origId; + string articleCount; + string mediaCount; + bool readOnly; + string size; + string favicon; + string faviconMimeType; +}; - static bool sortByLastOpen(const Book &a, const Book &b); - static bool sortByTitle(const Book &a, const Book &b); - static bool sortBySize(const Book &a, const Book &b); - static bool sortByDate(const Book &a, const Book &b); - static bool sortByCreator(const Book &a, const Book &b); - static bool sortByPublisher(const Book &a, const Book &b); - static bool sortByLanguage(const Book &a, const Book &b); - string getHumanReadableIdFromPath(); +class Library +{ + public: + Library(); + ~Library(); - string id; - string path; - string pathAbsolute; - string last; - string indexPath; - string indexPathAbsolute; - supportedIndexType indexType; - string title; - string description; - string language; - string creator; - string publisher; - string date; - string url; - string name; - string tags; - string origId; - string articleCount; - string mediaCount; - bool readOnly; - string size; - string favicon; - string faviconMimeType; - }; - - class Library { - - public: - Library(); - ~Library(); - - string version; - bool addBook(const Book &book); - bool removeBookByIndex(const unsigned int bookIndex); - vector books; - - /* - * 'current' is the variable storing the current content/book id - * in the library. This is used to be able to load per default a - * content. As Kiwix may work with many library XML files, you may - * have "current" defined many time with different values. The - * last XML file read has the priority, Although we do not have an - * library object for each file, we want to be able to fallback to - * an 'old' current book if the one which should be load - * failed. That is the reason why we need a stack here - */ - stack current; - }; + string version; + bool addBook(const Book& book); + bool removeBookByIndex(const unsigned int bookIndex); + vector books; + /* + * 'current' is the variable storing the current content/book id + * in the library. This is used to be able to load per default a + * content. As Kiwix may work with many library XML files, you may + * have "current" defined many time with different values. The + * last XML file read has the priority, Although we do not have an + * library object for each file, we want to be able to fallback to + * an 'old' current book if the one which should be load + * failed. That is the reason why we need a stack here + */ + stack current; +}; } #endif diff --git a/include/manager.h b/include/manager.h index 5c2a87ba4..ef1b357e2 100644 --- a/include/manager.h +++ b/include/manager.h @@ -20,73 +20,89 @@ #ifndef KIWIX_MANAGER_H #define KIWIX_MANAGER_H -#include -#include #include +#include +#include #include #include "common/base64.h" -#include "common/regexTools.h" #include "common/pathTools.h" +#include "common/regexTools.h" #include "library.h" #include "reader.h" using namespace std; -namespace kiwix { +namespace kiwix +{ +enum supportedListMode { LASTOPEN, REMOTE, LOCAL }; +enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER }; - enum supportedListMode { LASTOPEN, REMOTE, LOCAL }; - enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER }; +class Manager +{ + public: + Manager(); + ~Manager(); - class Manager { + bool readFile(const string path, const bool readOnly = true); + bool readFile(const string nativePath, + const string UTF8Path, + const bool readOnly = true); + bool readXml(const string xml, + const bool readOnly = true, + const string libraryPath = ""); + bool writeFile(const string path); + bool removeBookByIndex(const unsigned int bookIndex); + bool removeBookById(const string id); + bool setCurrentBookId(const string id); + string getCurrentBookId(); + bool setBookIndex(const string id, + const string path, + const supportedIndexType type); + bool setBookIndex(const string id, const string path); + bool setBookPath(const string id, const string path); + string addBookFromPathAndGetId(const string pathToOpen, + const string pathToSave = "", + const string url = "", + const bool checkMetaData = false); + bool addBookFromPath(const string pathToOpen, + const string pathToSave = "", + const string url = "", + const bool checkMetaData = false); + Library cloneLibrary(); + bool getBookById(const string id, Book& book); + bool getCurrentBook(Book& book); + unsigned int getBookCount(const bool localBooks, const bool remoteBooks); + bool updateBookLastOpenDateById(const string id); + void removeBookPaths(); + bool listBooks(const supportedListMode mode, + const supportedListSortBy sortBy, + const unsigned int maxSize, + const string language, + const string creator, + const string publisher, + const string search); + vector getBooksLanguages(); + vector getBooksCreators(); + vector getBooksPublishers(); + vector getBooksIds(); - public: - Manager(); - ~Manager(); + string writableLibraryPath; - bool readFile(const string path, const bool readOnly = true); - bool readFile(const string nativePath, const string UTF8Path, const bool readOnly = true); - bool readXml(const string xml, const bool readOnly = true, const string libraryPath = ""); - bool writeFile(const string path); - bool removeBookByIndex(const unsigned int bookIndex); - bool removeBookById(const string id); - bool setCurrentBookId(const string id); - string getCurrentBookId(); - bool setBookIndex(const string id, const string path, const supportedIndexType type); - bool setBookIndex(const string id, const string path); - bool setBookPath(const string id, const string path); - string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "", - const bool checkMetaData = false); - bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "", - const bool checkMetaData = false); - Library cloneLibrary(); - bool getBookById(const string id, Book &book); - bool getCurrentBook(Book &book); - unsigned int getBookCount(const bool localBooks, const bool remoteBooks); - bool updateBookLastOpenDateById(const string id); - void removeBookPaths(); - bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, - const string language, const string creator, const string publisher, const string search); - vector getBooksLanguages(); - vector getBooksCreators(); - vector getBooksPublishers(); - vector getBooksIds(); + vector bookIdList; - string writableLibraryPath; + protected: + kiwix::Library library; - vector bookIdList; - - protected: - kiwix::Library library; - - bool readBookFromPath(const string path, Book *book = NULL); - bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath); - - private: - void checkAndCleanBookPaths(Book &book, const string &libraryPath); - }; + bool readBookFromPath(const string path, Book* book = NULL); + bool parseXmlDom(const pugi::xml_document& doc, + const bool readOnly, + const string libraryPath); + private: + void checkAndCleanBookPaths(Book& book, const string& libraryPath); +}; } #endif diff --git a/include/reader.h b/include/reader.h index ccc2ef4c3..9e0609e79 100644 --- a/include/reader.h +++ b/include/reader.h @@ -20,85 +20,105 @@ #ifndef KIWIX_READER_H #define KIWIX_READER_H -#include -#include -#include -#include #include -#include +#include +#include +#include +#include #include -#include #include +#include +#include #include "common/pathTools.h" #include "common/stringTools.h" using namespace std; -namespace kiwix { +namespace kiwix +{ +class Reader +{ + public: + Reader(const string zimFilePath); + ~Reader(); - class Reader { + void reset(); + unsigned int getArticleCount() const; + unsigned int getMediaCount() const; + unsigned int getGlobalCount() const; + string getZimFilePath() const; + string getId() const; + string getRandomPageUrl() const; + string getFirstPageUrl() const; + string getMainPageUrl() const; + bool getMetatag(const string& url, string& content) const; + string getTitle() const; + string getDescription() const; + string getLanguage() const; + string getName() const; + string getTags() const; + string getDate() const; + string getCreator() const; + string getPublisher() const; + string getOrigId() const; + bool getFavicon(string& content, string& mimeType) const; + bool getPageUrlFromTitle(const string& title, string& url) const; + bool getMimeTypeByUrl(const string& url, string& mimeType) const; + bool getContentByUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType) const; + bool getContentByEncodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType, + string& baseUrl) const; + bool getContentByEncodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType) const; + bool getContentByDecodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType, + string& baseUrl) const; + bool getContentByDecodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType) const; + bool searchSuggestions(const string& prefix, + unsigned int suggestionsCount, + const bool reset = true); + bool searchSuggestionsSmart(const string& prefix, + unsigned int suggestionsCount); + bool urlExists(const string& url) const; + bool hasFulltextIndex() const; + std::vector getTitleVariants(const std::string& title) const; + bool getNextSuggestion(string& title); + bool getNextSuggestion(string& title, string& url); + bool canCheckIntegrity() const; + bool isCorrupted() const; + bool parseUrl(const string& url, char* ns, string& title) const; + unsigned int getFileSize() const; + zim::File* getZimFileHandler() const; + bool getArticleObjectByDecodedUrl(const string& url, + zim::Article& article) const; - public: - Reader(const string zimFilePath); - ~Reader(); + protected: + zim::File* zimFileHandler; + zim::size_type firstArticleOffset; + zim::size_type lastArticleOffset; + zim::size_type currentArticleOffset; + zim::size_type nsACount; + zim::size_type nsICount; + std::string zimFilePath; - void reset(); - unsigned int getArticleCount() const; - unsigned int getMediaCount() const; - unsigned int getGlobalCount() const; - string getZimFilePath() const; - string getId() const; - string getRandomPageUrl() const; - string getFirstPageUrl() const; - string getMainPageUrl() const; - bool getMetatag(const string &url, string &content) const; - string getTitle() const; - string getDescription() const; - string getLanguage() const; - string getName() const; - string getTags() const; - string getDate() const; - string getCreator() const; - string getPublisher() const; - string getOrigId() const; - bool getFavicon(string &content, string &mimeType) const; - bool getPageUrlFromTitle(const string &title, string &url) const; - bool getMimeTypeByUrl(const string &url, string &mimeType) const; - bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; - bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const; - bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; - bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const; - bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const; - bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true); - bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount); - bool urlExists(const string &url) const; - bool hasFulltextIndex() const; - std::vector getTitleVariants(const std::string &title) const; - bool getNextSuggestion(string &title); - bool getNextSuggestion(string &title, string &url); - bool canCheckIntegrity() const; - bool isCorrupted() const; - bool parseUrl(const string &url, char *ns, string &title) const; - unsigned int getFileSize() const; - zim::File* getZimFileHandler() const; - bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const; - - protected: - zim::File* zimFileHandler; - zim::size_type firstArticleOffset; - zim::size_type lastArticleOffset; - zim::size_type currentArticleOffset; - zim::size_type nsACount; - zim::size_type nsICount; - std::string zimFilePath; - - std::vector< std::vector > suggestions; - std::vector< std::vector >::iterator suggestionsOffset; - - private: - std::map parseCounterMetadata() const; - }; + std::vector> suggestions; + std::vector>::iterator suggestionsOffset; + private: + std::map parseCounterMetadata() const; +}; } #endif diff --git a/include/searcher.h b/include/searcher.h index 3a53e8604..89787873b 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -22,73 +22,77 @@ #include #include -#include +#include #include -#include -#include #include +#include +#include +#include #include #include "common/pathTools.h" #include "common/stringTools.h" -#include #include "kiwix_config.h" using namespace std; -namespace kiwix { - class Reader; - class Result { - public: - virtual ~Result() {}; - virtual std::string get_url() = 0; - virtual std::string get_title() = 0; - virtual int get_score() = 0; - virtual std::string get_snippet() = 0; - virtual int get_wordCount() = 0; - virtual int get_size() = 0; - }; +namespace kiwix +{ +class Reader; +class Result +{ + public: + virtual ~Result(){}; + virtual std::string get_url() = 0; + virtual std::string get_title() = 0; + virtual int get_score() = 0; + virtual std::string get_snippet() = 0; + virtual int get_wordCount() = 0; + virtual int get_size() = 0; +}; +struct SearcherInternal; +class Searcher +{ + public: + Searcher(const string& xapianDirectoryPath, Reader* reader); + ~Searcher(); - struct SearcherInternal; - class Searcher { - - public: - Searcher(const string &xapianDirectoryPath, Reader* reader); - ~Searcher(); - - void search(std::string &search, unsigned int resultStart, - unsigned int resultEnd, const bool verbose=false); - Result* getNextResult(); - void restart_search(); - unsigned int getEstimatedResultCount(); - bool setProtocolPrefix(const std::string prefix); - bool setSearchProtocolPrefix(const std::string prefix); - void reset(); - void setContentHumanReadableId(const string &contentHumanReadableId); + void search(std::string& search, + unsigned int resultStart, + unsigned int resultEnd, + const bool verbose = false); + Result* getNextResult(); + void restart_search(); + unsigned int getEstimatedResultCount(); + bool setProtocolPrefix(const std::string prefix); + bool setSearchProtocolPrefix(const std::string prefix); + void reset(); + void setContentHumanReadableId(const string& contentHumanReadableId); #ifdef ENABLE_CTPP2 - string getHtml(); + string getHtml(); #endif - - protected: - std::string beautifyInteger(const unsigned int number); - void closeIndex() ; - void searchInIndex(string &search, const unsigned int resultStart, - const unsigned int resultEnd, const bool verbose=false); - Reader* reader; - SearcherInternal* internal; - std::string searchPattern; - std::string protocolPrefix; - std::string searchProtocolPrefix; - std::string template_ct2; - unsigned int resultCountPerPage; - unsigned int estimatedResultCount; - unsigned int resultStart; - unsigned int resultEnd; - std::string contentHumanReadableId; - }; + protected: + std::string beautifyInteger(const unsigned int number); + void closeIndex(); + void searchInIndex(string& search, + const unsigned int resultStart, + const unsigned int resultEnd, + const bool verbose = false); + Reader* reader; + SearcherInternal* internal; + std::string searchPattern; + std::string protocolPrefix; + std::string searchProtocolPrefix; + std::string template_ct2; + unsigned int resultCountPerPage; + unsigned int estimatedResultCount; + unsigned int resultStart; + unsigned int resultEnd; + std::string contentHumanReadableId; +}; } #endif diff --git a/include/xapianSearcher.h b/include/xapianSearcher.h index 8604ae9c8..907ca733e 100644 --- a/include/xapianSearcher.h +++ b/include/xapianSearcher.h @@ -21,70 +21,76 @@ #define KIWIX_XAPIAN_SEARCHER_H #include -#include "searcher.h" #include "reader.h" +#include "searcher.h" #include #include using namespace std; -namespace kiwix { +namespace kiwix +{ +class XapianSearcher; - class XapianSearcher; +class XapianResult : public Result +{ + public: + XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator); + virtual ~XapianResult(){}; - class XapianResult : public Result { - public: - XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator); - virtual ~XapianResult() {}; + virtual std::string get_url(); + virtual std::string get_title(); + virtual int get_score(); + virtual std::string get_snippet(); + virtual int get_wordCount(); + virtual int get_size(); - virtual std::string get_url(); - virtual std::string get_title(); - virtual int get_score(); - virtual std::string get_snippet(); - virtual int get_wordCount(); - virtual int get_size(); + private: + XapianSearcher* searcher; + Xapian::MSetIterator iterator; + Xapian::Document document; +}; - private: - XapianSearcher* searcher; - Xapian::MSetIterator iterator; - Xapian::Document document; - }; +class NoXapianIndexInZim : public exception +{ + virtual const char* what() const throw() + { + return "There is no fulltext index in the zim file"; + } +}; - class NoXapianIndexInZim: public exception { - virtual const char* what() const throw() { - return "There is no fulltext index in the zim file"; - } - }; +class XapianSearcher +{ + friend class XapianResult; - class XapianSearcher { - friend class XapianResult; - public: - XapianSearcher(const string &xapianDirectoryPath, Reader* reader); - virtual ~XapianSearcher() {}; - void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, - const bool verbose=false); - virtual Result* getNextResult(); - void restart_search(); + public: + XapianSearcher(const string& xapianDirectoryPath, Reader* reader); + virtual ~XapianSearcher(){}; + void searchInIndex(string& search, + const unsigned int resultStart, + const unsigned int resultEnd, + const bool verbose = false); + virtual Result* getNextResult(); + void restart_search(); - Xapian::MSet results; + Xapian::MSet results; - protected: - void closeIndex(); - void openIndex(const string &xapianDirectoryPath); - void setup_queryParser(); - - Reader* reader; - Xapian::Database readableDatabase; - std::string language; - std::string stopwords; - Xapian::QueryParser queryParser; - Xapian::Stem stemmer; - Xapian::SimpleStopper stopper; - Xapian::MSetIterator current_result; - std::map valuesmap; - }; + protected: + void closeIndex(); + void openIndex(const string& xapianDirectoryPath); + void setup_queryParser(); + Reader* reader; + Xapian::Database readableDatabase; + std::string language; + std::string stopwords; + Xapian::QueryParser queryParser; + Xapian::Stem stemmer; + Xapian::SimpleStopper stopper; + Xapian::MSetIterator current_result; + std::map valuesmap; +}; } #endif diff --git a/src/android/kiwix.cpp b/src/android/kiwix.cpp index 279b0b21f..fa8daafb7 100644 --- a/src/android/kiwix.cpp +++ b/src/android/kiwix.cpp @@ -7,80 +7,87 @@ #include #include -#include "unicode/putil.h" +#include "common/base64.h" #include "reader.h" #include "searcher.h" -#include "common/base64.h" +#include "unicode/putil.h" #include -#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__) +#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__) #include -#include -#include #include #include +#include +#include /* global variables */ -kiwix::Reader *reader = NULL; -kiwix::Searcher *searcher = NULL; +kiwix::Reader* reader = NULL; +kiwix::Searcher* searcher = NULL; static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER; /* c2jni type conversion functions */ -jboolean c2jni(const bool &val) { +jboolean c2jni(const bool& val) +{ return val ? JNI_TRUE : JNI_FALSE; } - -jstring c2jni(const std::string &val, JNIEnv *env) { +jstring c2jni(const std::string& val, JNIEnv* env) +{ return env->NewStringUTF(val.c_str()); } -jint c2jni(const int val) { +jint c2jni(const int val) +{ return (jint)val; } - -jint c2jni(const unsigned val) { +jint c2jni(const unsigned val) +{ return (unsigned)val; } - /* jni2c type conversion functions */ -bool jni2c(const jboolean &val) { +bool jni2c(const jboolean& val) +{ return val == JNI_TRUE; } - -std::string jni2c(const jstring &val, JNIEnv *env) { +std::string jni2c(const jstring& val, JNIEnv* env) +{ return std::string(env->GetStringUTFChars(val, 0)); } -int jni2c(const jint val) { +int jni2c(const jint val) +{ return (int)val; } - /* Method to deal with variable passed by reference */ -void setStringObjValue(const std::string &value, const jobject obj, JNIEnv *env) { +void setStringObjValue(const std::string& value, const jobject obj, JNIEnv* env) +{ jclass objClass = env->GetObjectClass(obj); jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;"); env->SetObjectField(obj, objFid, c2jni(value, env)); } -void setIntObjValue(const int value, const jobject obj, JNIEnv *env) { +void setIntObjValue(const int value, const jobject obj, JNIEnv* env) +{ jclass objClass = env->GetObjectClass(obj); jfieldID objFid = env->GetFieldID(objClass, "value", "I"); env->SetIntField(obj, objFid, value); } -void setBoolObjValue(const bool value, const jobject obj, JNIEnv *env) { +void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env) +{ jclass objClass = env->GetObjectClass(obj); jfieldID objFid = env->GetFieldID(objClass, "value", "Z"); env->SetIntField(obj, objFid, c2jni(value)); } /* Kiwix library functions */ -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv* env, jobject obj) +{ jstring url; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -91,13 +98,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv *e } } pthread_mutex_unlock(&readerLock); - + return url; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv* env, + jobject obj) +{ jstring id; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -108,13 +117,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv *env, jo } } pthread_mutex_unlock(&readerLock); - + return id; } -JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv *env, jobject obj) { +JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv* env, + jobject obj) +{ jint size; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -125,13 +136,15 @@ JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv *env, } } pthread_mutex_unlock(&readerLock); - + return size; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv* env, jobject obj) +{ jstring creator; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -142,13 +155,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv *en } } pthread_mutex_unlock(&readerLock); - + return creator; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv* env, jobject obj) +{ jstring publisher; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -159,13 +174,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv * } } pthread_mutex_unlock(&readerLock); - + return publisher; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv* env, + jobject obj) +{ jstring name; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -176,33 +193,40 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv *env, } } pthread_mutex_unlock(&readerLock); - + return name; } - -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv* env, jobject obj) +{ jstring favicon; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { std::string cContent; std::string cMime; reader->getFavicon(cContent, cMime); - favicon = c2jni(base64_encode(reinterpret_cast(cContent.c_str()), cContent.length()), env); + favicon + = c2jni(base64_encode( + reinterpret_cast(cContent.c_str()), + cContent.length()), + env); } catch (...) { std::cerr << "Unable to get ZIM favicon" << std::endl; } } pthread_mutex_unlock(&readerLock); - + return favicon; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv* env, + jobject obj) +{ jstring date; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -213,13 +237,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv *env, } } pthread_mutex_unlock(&readerLock); - + return date; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv* env, jobject obj) +{ jstring language; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -230,13 +256,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv *e } } pthread_mutex_unlock(&readerLock); - + return language; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(JNIEnv *env, jobject obj, jstring url) { +JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType( + JNIEnv* env, jobject obj, jstring url) +{ jstring mimeType; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { std::string cUrl = jni2c(url, env); @@ -249,17 +277,21 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(JNIEnv *e } } pthread_mutex_unlock(&readerLock); - + return mimeType; } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv *env, jobject obj, jstring path) { +JNIEXPORT jboolean JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv* env, jobject obj, jstring path) +{ jboolean retVal = JNI_TRUE; std::string cPath = jni2c(path, env); pthread_mutex_lock(&readerLock); try { - if (reader != NULL) delete reader; + if (reader != NULL) { + delete reader; + } reader = new kiwix::Reader(cPath); } catch (...) { std::cerr << "Unable to load ZIM " << cPath << std::endl; @@ -271,8 +303,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv *env, return retVal; } -JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv *env, jobject obj, jstring url, jobject mimeTypeObj, jobject sizeObj) { - +JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent( + JNIEnv* env, jobject obj, jstring url, jobject mimeTypeObj, jobject sizeObj) +{ /* Default values */ setStringObjValue("", mimeTypeObj, env); setIntObjValue(0, sizeObj, env); @@ -289,7 +322,8 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv try { if (reader->getContentByUrl(cUrl, cData, cSize, cMimeType)) { data = env->NewByteArray(cSize); - env->SetByteArrayRegion(data, 0, cSize, reinterpret_cast(cData.c_str())); + env->SetByteArrayRegion( + data, 0, cSize, reinterpret_cast(cData.c_str())); setStringObjValue(cMimeType, mimeTypeObj, env); setIntObjValue(cSize, sizeObj, env); } @@ -298,12 +332,13 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv } pthread_mutex_unlock(&readerLock); } - + return data; } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions -(JNIEnv *env, jobject obj, jstring prefix, jint count) { +JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions( + JNIEnv* env, jobject obj, jstring prefix, jint count) +{ jboolean retVal = JNI_FALSE; std::string cPrefix = jni2c(prefix, env); unsigned int cCount = jni2c(count); @@ -316,15 +351,17 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions } } } catch (...) { - std::cerr << "Unable to search suggestions for pattern " << cPrefix << std::endl; + std::cerr << "Unable to search suggestions for pattern " << cPrefix + << std::endl; } pthread_mutex_unlock(&readerLock); return retVal; } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion -(JNIEnv *env, jobject obj, jobject titleObj) { +JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion( + JNIEnv* env, jobject obj, jobject titleObj) +{ jboolean retVal = JNI_FALSE; std::string cTitle; @@ -344,8 +381,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion return retVal; } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle -(JNIEnv *env, jobject obj, jstring title, jobject urlObj) { +JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle( + JNIEnv* env, jobject obj, jstring title, jobject urlObj) +{ jboolean retVal = JNI_FALSE; std::string cTitle = jni2c(title, env); std::string cUrl; @@ -362,12 +400,13 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle std::cerr << "Unable to get URL for title " << cTitle << std::endl; } pthread_mutex_unlock(&readerLock); - + return retVal; } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle -(JNIEnv *env , jobject obj, jobject titleObj) { +JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle( + JNIEnv* env, jobject obj, jobject titleObj) +{ jboolean retVal = JNI_FALSE; std::string cTitle; @@ -384,12 +423,13 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle pthread_mutex_unlock(&readerLock); return retVal; - } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv *env, jobject obj) { +JNIEXPORT jstring JNICALL +Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv* env, jobject obj) +{ jstring description; - + pthread_mutex_lock(&readerLock); if (reader != NULL) { try { @@ -400,12 +440,13 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv } } pthread_mutex_unlock(&readerLock); - + return description; } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage -(JNIEnv *env, jobject obj, jobject urlObj) { +JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage( + JNIEnv* env, jobject obj, jobject urlObj) +{ jboolean retVal = JNI_FALSE; std::string cUrl; @@ -424,11 +465,12 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage return retVal; } -JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory - (JNIEnv *env, jobject obj, jstring dirStr) { +JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory( + JNIEnv* env, jobject obj, jstring dirStr) +{ std::string cPath = jni2c(dirStr, env); - pthread_mutex_lock(&readerLock); + pthread_mutex_lock(&readerLock); try { u_setDataDirectory(cPath.c_str()); } catch (...) { @@ -437,14 +479,18 @@ JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory pthread_mutex_unlock(&readerLock); } -JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JNIEnv *env, jobject obj, jstring path) { +JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex( + JNIEnv* env, jobject obj, jstring path) +{ jboolean retVal = JNI_TRUE; std::string cPath = jni2c(path, env); pthread_mutex_lock(&searcherLock); searcher = NULL; try { - if (searcher != NULL) delete searcher; + if (searcher != NULL) { + delete searcher; + } searcher = new kiwix::Searcher(cPath, reader); } catch (...) { searcher = NULL; @@ -456,22 +502,23 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN return retVal; } -JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery - (JNIEnv *env, jclass obj, jstring query, jint count) { +JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery( + JNIEnv* env, jclass obj, jstring query, jint count) +{ std::string cQuery = jni2c(query, env); unsigned int cCount = jni2c(count); - kiwix::Result *p_result; + kiwix::Result* p_result; std::string result; - + pthread_mutex_lock(&searcherLock); try { if (searcher != NULL) { searcher->search(cQuery, 0, count); - while ( (p_result = searcher->getNextResult()) && - !(p_result->get_title().empty()) && - !(p_result->get_url().empty())) { - result += p_result->get_title() + "\n"; - delete p_result; + while ((p_result = searcher->getNextResult()) + && !(p_result->get_title().empty()) + && !(p_result->get_url().empty())) { + result += p_result->get_title() + "\n"; + delete p_result; } } } catch (...) { @@ -481,5 +528,3 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery return env->NewStringUTF(result.c_str()); } - - diff --git a/src/android/org/kiwix/kiwixlib/JNIKiwix.java b/src/android/org/kiwix/kiwixlib/JNIKiwix.java index e925d65a0..41711009e 100644 --- a/src/android/org/kiwix/kiwixlib/JNIKiwix.java +++ b/src/android/org/kiwix/kiwixlib/JNIKiwix.java @@ -23,12 +23,9 @@ import org.kiwix.kiwixlib.JNIKiwixString; import org.kiwix.kiwixlib.JNIKiwixBool; import org.kiwix.kiwixlib.JNIKiwixInt; -public class JNIKiwix { - - static { - System.loadLibrary("kiwix"); - } - +public class JNIKiwix +{ + static { System.loadLibrary("kiwix"); } public native String getMainPage(); public native String getId(); @@ -39,8 +36,8 @@ public class JNIKiwix { public native boolean loadZIM(String path); - public native boolean loadFulltextIndex(String path); - + public native boolean loadFulltextIndex(String path); + public native byte[] getContent(String url, JNIKiwixString mimeType, JNIKiwixInt size); public native boolean searchSuggestions(String prefix, int count); diff --git a/src/android/org/kiwix/kiwixlib/JNIKiwixBool.java b/src/android/org/kiwix/kiwixlib/JNIKiwixBool.java index 7cf645460..0563f30a4 100644 --- a/src/android/org/kiwix/kiwixlib/JNIKiwixBool.java +++ b/src/android/org/kiwix/kiwixlib/JNIKiwixBool.java @@ -19,7 +19,7 @@ package org.kiwix.kiwixlib; -public class JNIKiwixBool { - +public class JNIKiwixBool +{ public boolean value; } diff --git a/src/android/org/kiwix/kiwixlib/JNIKiwixInt.java b/src/android/org/kiwix/kiwixlib/JNIKiwixInt.java index 49509c257..af8d77219 100644 --- a/src/android/org/kiwix/kiwixlib/JNIKiwixInt.java +++ b/src/android/org/kiwix/kiwixlib/JNIKiwixInt.java @@ -19,8 +19,7 @@ package org.kiwix.kiwixlib; -public class JNIKiwixInt { - +public class JNIKiwixInt +{ public int value; } - diff --git a/src/android/org/kiwix/kiwixlib/JNIKiwixString.java b/src/android/org/kiwix/kiwixlib/JNIKiwixString.java index c73e8b925..1e9876e59 100644 --- a/src/android/org/kiwix/kiwixlib/JNIKiwixString.java +++ b/src/android/org/kiwix/kiwixlib/JNIKiwixString.java @@ -19,7 +19,7 @@ package org.kiwix.kiwixlib; -public class JNIKiwixString { - +public class JNIKiwixString +{ public String value; } diff --git a/src/common/networkTools.cpp b/src/common/networkTools.cpp index 92c4211bc..639e995c0 100644 --- a/src/common/networkTools.cpp +++ b/src/common/networkTools.cpp @@ -19,44 +19,54 @@ #include -std::map kiwix::getNetworkInterfaces() { +std::map kiwix::getNetworkInterfaces() +{ std::map interfaces; #ifdef _WIN32 SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0); if (sd == SOCKET_ERROR) { - std::cerr << "Failed to get a socket. Error " << WSAGetLastError() << - std::endl; + std::cerr << "Failed to get a socket. Error " << WSAGetLastError() + << std::endl; return interfaces; } INTERFACE_INFO InterfaceList[20]; unsigned long nBytesReturned; - if (WSAIoctl(sd, SIO_GET_INTERFACE_LIST, 0, 0, &InterfaceList, - sizeof(InterfaceList), &nBytesReturned, 0, 0) == SOCKET_ERROR) { - std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() << - std::endl; + if (WSAIoctl(sd, + SIO_GET_INTERFACE_LIST, + 0, + 0, + &InterfaceList, + sizeof(InterfaceList), + &nBytesReturned, + 0, + 0) + == SOCKET_ERROR) { + std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() + << std::endl; return interfaces; } int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO); for (int i = 0; i < nNumInterfaces; ++i) { - sockaddr_in *pAddress; - pAddress = (sockaddr_in *) & (InterfaceList[i].iiAddress); + sockaddr_in* pAddress; + pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress); /* Add to the map */ std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr)); std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr)); - interfaces.insert(std::pair(interfaceName, interfaceIp)); + interfaces.insert( + std::pair(interfaceName, interfaceIp)); } #else /* Get Network interfaces information */ char buf[16384]; struct ifconf ifconf; int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */ - ifconf.ifc_len=sizeof buf; - ifconf.ifc_buf=buf; - if(ioctl(fd, SIOCGIFCONF, &ifconf)!=0) { + ifconf.ifc_len = sizeof buf; + ifconf.ifc_buf = buf; + if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) { perror("ioctl(SIOCGIFCONF)"); exit(EXIT_FAILURE); } @@ -64,73 +74,86 @@ std::map kiwix::getNetworkInterfaces() { /* Go through each interface */ int i; size_t len; - struct ifreq *ifreq; + struct ifreq* ifreq; ifreq = ifconf.ifc_req; - for (i = 0; i < ifconf.ifc_len; ) { + for (i = 0; i < ifconf.ifc_len;) { if (ifreq->ifr_addr.sa_family == AF_INET) { /* Get the network interface ip */ - char host[128] = { 0 }; - const int error = getnameinfo(&(ifreq->ifr_addr), sizeof ifreq->ifr_addr, - host, sizeof host, - 0, 0, NI_NUMERICHOST); + char host[128] = {0}; + const int error = getnameinfo(&(ifreq->ifr_addr), + sizeof ifreq->ifr_addr, + host, + sizeof host, + 0, + 0, + NI_NUMERICHOST); if (!error) { std::string interfaceName = std::string(ifreq->ifr_name); std::string interfaceIp = std::string(host); /* Add to the map */ - interfaces.insert(std::pair(interfaceName, interfaceIp)); + interfaces.insert( + std::pair(interfaceName, interfaceIp)); } else { perror("getnameinfo()"); } } - /* some systems have ifr_addr.sa_len and adjust the length that - * way, but not mine. weird */ +/* some systems have ifr_addr.sa_len and adjust the length that + * way, but not mine. weird */ #ifndef __linux__ - len=IFNAMSIZ + ifreq->ifr_addr.sa_len; + len = IFNAMSIZ + ifreq->ifr_addr.sa_len; #else - len=sizeof *ifreq; + len = sizeof *ifreq; #endif - ifreq=(struct ifreq*)((char*)ifreq+len); - i+=len; + ifreq = (struct ifreq*)((char*)ifreq + len); + i += len; } #endif return interfaces; } -std::string kiwix::getBestPublicIp() { +std::string kiwix::getBestPublicIp() +{ std::map interfaces = kiwix::getNetworkInterfaces(); #ifndef _WIN32 - const char* const prioritizedNames[] = - { "eth0", "eth1", "wlan0", "wlan1", "en0", "en1" }; + const char* const prioritizedNames[] + = {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"}; const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]); for (int i = 0; i < count; ++i) { - std::map::const_iterator it = - interfaces.find(prioritizedNames[i]); - if (it != interfaces.end()) + std::map::const_iterator it + = interfaces.find(prioritizedNames[i]); + if (it != interfaces.end()) { return it->second; + } } #endif for (std::map::iterator iter = interfaces.begin(); - iter != interfaces.end(); ++iter) { + iter != interfaces.end(); + ++iter) { std::string interfaceIp = iter->second; - if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") + if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") { return interfaceIp; + } } for (std::map::iterator iter = interfaces.begin(); - iter != interfaces.end(); ++iter) { + iter != interfaces.end(); + ++iter) { std::string interfaceIp = iter->second; - if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") + if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") { return interfaceIp; + } } for (std::map::iterator iter = interfaces.begin(); - iter != interfaces.end(); ++iter) { + iter != interfaces.end(); + ++iter) { std::string interfaceIp = iter->second; - if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") + if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") { return interfaceIp; + } } return "127.0.0.1"; diff --git a/src/common/otherTools.cpp b/src/common/otherTools.cpp index 4ec60b48c..0cb852376 100644 --- a/src/common/otherTools.cpp +++ b/src/common/otherTools.cpp @@ -19,10 +19,11 @@ #include -void kiwix::sleep(unsigned int milliseconds) { +void kiwix::sleep(unsigned int milliseconds) +{ #ifdef _WIN32 - Sleep(milliseconds); + Sleep(milliseconds); #else - usleep(1000 * milliseconds); + usleep(1000 * milliseconds); #endif } diff --git a/src/common/pathTools.cpp b/src/common/pathTools.cpp index 83182b7f5..1d46bf87a 100644 --- a/src/common/pathTools.cpp +++ b/src/common/pathTools.cpp @@ -20,13 +20,13 @@ #include #ifdef __APPLE__ -#include #include +#include #elif _WIN32 +#include #include #include "shlwapi.h" -#include -#define getcwd _getcwd // stupid MSFT "deprecation" warning +#define getcwd _getcwd // stupid MSFT "deprecation" warning #endif #ifdef _WIN32 @@ -47,7 +47,8 @@ #define PATH_MAX 1024 #endif -bool isRelativePath(const string &path) { +bool isRelativePath(const string& path) +{ #ifdef _WIN32 return path.empty() || path.substr(1, 2) == ":\\" ? false : true; #else @@ -55,19 +56,21 @@ bool isRelativePath(const string &path) { #endif } -string computeRelativePath(const string path, const string absolutePath) { +string computeRelativePath(const string path, const string absolutePath) +{ std::vector pathParts = kiwix::split(path, SEPARATOR); - std::vector absolutePathParts = kiwix::split(absolutePath, SEPARATOR); + std::vector absolutePathParts + = kiwix::split(absolutePath, SEPARATOR); unsigned int commonCount = 0; - while (commonCount < pathParts.size() && - commonCount < absolutePathParts.size() && - pathParts[commonCount] == absolutePathParts[commonCount]) { + while (commonCount < pathParts.size() + && commonCount < absolutePathParts.size() + && pathParts[commonCount] == absolutePathParts[commonCount]) { if (!pathParts[commonCount].empty()) { commonCount++; } } - + string relativePath; #ifdef _WIN32 /* On Windows you have a token more because the root is represented @@ -77,10 +80,10 @@ string computeRelativePath(const string path, const string absolutePath) { } #endif - for (unsigned int i = commonCount ; i < pathParts.size() ; i++) { + for (unsigned int i = commonCount; i < pathParts.size(); i++) { relativePath += "../"; } - for (unsigned int i = commonCount ; i < absolutePathParts.size() ; i++) { + for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) { relativePath += absolutePathParts[i]; relativePath += i + 1 < absolutePathParts.size() ? "/" : ""; } @@ -89,11 +92,12 @@ string computeRelativePath(const string path, const string absolutePath) { } /* Warning: the relative path must be with slashes */ -string computeAbsolutePath(const string path, const string relativePath) { +string computeAbsolutePath(const string path, const string relativePath) +{ string absolutePath; if (path.empty()) { - char *path=NULL; + char* path = NULL; size_t size = 0; #ifdef _WIN32 @@ -104,15 +108,17 @@ string computeAbsolutePath(const string path, const string relativePath) { absolutePath = string(path) + SEPARATOR; } else { - absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR ? path : path + SEPARATOR; + absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR + ? path + : path + SEPARATOR; } #if _WIN32 - char *cRelativePath = _strdup(relativePath.c_str()); + char* cRelativePath = _strdup(relativePath.c_str()); #else - char *cRelativePath = strdup(relativePath.c_str()); + char* cRelativePath = strdup(relativePath.c_str()); #endif - char *token = strtok(cRelativePath, "/"); + char* token = strtok(cRelativePath, "/"); while (token != NULL) { if (string(token) == "..") { @@ -121,8 +127,9 @@ string computeAbsolutePath(const string path, const string relativePath) { } else if (strcmp(token, ".") && strcmp(token, "")) { absolutePath += string(token); token = strtok(NULL, "/"); - if (token != NULL) - absolutePath += SEPARATOR; + if (token != NULL) { + absolutePath += SEPARATOR; + } } else { token = strtok(NULL, "/"); } @@ -131,31 +138,38 @@ string computeAbsolutePath(const string path, const string relativePath) { return absolutePath; } -string removeLastPathElement(const string path, const bool removePreSeparator, const bool removePostSeparator) { +string removeLastPathElement(const string path, + const bool removePreSeparator, + const bool removePostSeparator) +{ string newPath = path; size_t offset = newPath.find_last_of(SEPARATOR); - if (removePreSeparator && + if (removePreSeparator && #ifndef _WIN32 - offset != newPath.find_first_of(SEPARATOR) && + offset != newPath.find_first_of(SEPARATOR) && #endif - offset == newPath.length()-1) { + offset == newPath.length() - 1) { newPath = newPath.substr(0, offset); offset = newPath.find_last_of(SEPARATOR); } - newPath = removePostSeparator ? newPath.substr(0, offset) : newPath.substr(0, offset+1); + newPath = removePostSeparator ? newPath.substr(0, offset) + : newPath.substr(0, offset + 1); return newPath; } -string appendToDirectory(const string &directoryPath, const string &filename) { +string appendToDirectory(const string& directoryPath, const string& filename) +{ string newPath = directoryPath + SEPARATOR + filename; return newPath; } -string getLastPathElement(const string &path) { +string getLastPathElement(const string& path) +{ return path.substr(path.find_last_of(SEPARATOR) + 1); } -unsigned int getFileSize(const string &path) { +unsigned int getFileSize(const string& path) +{ #ifdef _WIN32 struct _stat filestatus; _stat(path.c_str(), &filestatus); @@ -167,12 +181,15 @@ unsigned int getFileSize(const string &path) { return filestatus.st_size / 1024; } -string getFileSizeAsString(const string &path) { - ostringstream convert; convert << getFileSize(path); +string getFileSizeAsString(const string& path) +{ + ostringstream convert; + convert << getFileSize(path); return convert.str(); } -bool fileExists(const string &path) { +bool fileExists(const string& path) +{ #ifdef _WIN32 return PathFileExists(path.c_str()); #else @@ -187,7 +204,8 @@ bool fileExists(const string &path) { #endif } -bool makeDirectory(const string &path) { +bool makeDirectory(const string& path) +{ #ifdef _WIN32 int status = _mkdir(path.c_str()); #else @@ -197,18 +215,19 @@ bool makeDirectory(const string &path) { } /* Try to create a link and if does not work then make a copy */ -bool copyFile(const string &sourcePath, const string &destPath) { +bool copyFile(const string& sourcePath, const string& destPath) +{ try { #ifndef _WIN32 if (link(sourcePath.c_str(), destPath.c_str()) != 0) { #endif - std::ifstream infile(sourcePath.c_str(), std::ios_base::binary); - std::ofstream outfile(destPath.c_str(), std::ios_base::binary); - outfile << infile.rdbuf(); + std::ifstream infile(sourcePath.c_str(), std::ios_base::binary); + std::ofstream outfile(destPath.c_str(), std::ios_base::binary); + outfile << infile.rdbuf(); #ifndef _WIN32 } #endif - } catch (exception &e) { + } catch (exception& e) { cerr << e.what() << endl; return false; } @@ -216,18 +235,19 @@ bool copyFile(const string &sourcePath, const string &destPath) { return true; } -string getExecutablePath() { +string getExecutablePath() +{ char binRootPath[PATH_MAX]; - + #ifdef _WIN32 - GetModuleFileName( NULL, binRootPath, PATH_MAX); + GetModuleFileName(NULL, binRootPath, PATH_MAX); return std::string(binRootPath); #elif __APPLE__ uint32_t max = (uint32_t)PATH_MAX; _NSGetExecutablePath(binRootPath, &max); return std::string(binRootPath); #else - ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX); + ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX); if (size != -1) { return std::string(binRootPath, size); } @@ -236,7 +256,8 @@ string getExecutablePath() { return ""; } -bool writeTextFile(const string &path, const string &content) { +bool writeTextFile(const string& path, const string& content) +{ std::ofstream file; file.open(path.c_str()); file << content; @@ -244,8 +265,9 @@ bool writeTextFile(const string &path, const string &content) { return true; } -string getCurrentDirectory() { - char* a_cwd = getcwd(NULL,0); +string getCurrentDirectory() +{ + char* a_cwd = getcwd(NULL, 0); string s_cwd(a_cwd); free(a_cwd); return s_cwd; diff --git a/src/common/regexTools.cpp b/src/common/regexTools.cpp index 32f38dbea..d12522056 100644 --- a/src/common/regexTools.cpp +++ b/src/common/regexTools.cpp @@ -21,10 +21,11 @@ std::map regexCache; -RegexMatcher *buildRegex(const std::string ®ex) { - RegexMatcher *matcher; +RegexMatcher* buildRegex(const std::string& regex) +{ + RegexMatcher* matcher; std::map::iterator itr = regexCache.find(regex); - + /* Regex is in cache */ if (itr != regexCache.end()) { matcher = itr->second; @@ -42,22 +43,26 @@ RegexMatcher *buildRegex(const std::string ®ex) { } /* todo */ -void freeRegexCache() { +void freeRegexCache() +{ } - -bool matchRegex(const std::string &content, const std::string ®ex) { +bool matchRegex(const std::string& content, const std::string& regex) +{ ucnv_setDefaultName("UTF-8"); UnicodeString ucontent = UnicodeString(content.c_str()); - RegexMatcher *matcher = buildRegex(regex); + RegexMatcher* matcher = buildRegex(regex); matcher->reset(ucontent); return matcher->find(); } -std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string ®ex) { +std::string replaceRegex(const std::string& content, + const std::string& replacement, + const std::string& regex) +{ ucnv_setDefaultName("UTF-8"); UnicodeString ucontent = UnicodeString(content.c_str()); UnicodeString ureplacement = UnicodeString(replacement.c_str()); - RegexMatcher *matcher = buildRegex(regex); + RegexMatcher* matcher = buildRegex(regex); matcher->reset(ucontent); UErrorCode status = U_ZERO_ERROR; UnicodeString uresult = matcher->replaceAll(ureplacement, status); @@ -66,16 +71,19 @@ std::string replaceRegex(const std::string &content, const std::string &replacem return tmp; } -std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement) { +std::string appendToFirstOccurence(const std::string& content, + const std::string regex, + const std::string& replacement) +{ ucnv_setDefaultName("UTF-8"); UnicodeString ucontent = UnicodeString(content.c_str()); UnicodeString ureplacement = UnicodeString(replacement.c_str()); - RegexMatcher *matcher = buildRegex(regex); + RegexMatcher* matcher = buildRegex(regex); matcher->reset(ucontent); if (matcher->find()) { UErrorCode status = U_ZERO_ERROR; - ucontent.insert(matcher->end(status), ureplacement); + ucontent.insert(matcher->end(status), ureplacement); std::string tmp; ucontent.toUTF8String(tmp); return tmp; @@ -83,4 +91,3 @@ std::string appendToFirstOccurence(const std::string &content, const std::strin return content; } - diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 120a9c6f8..6e9a8175d 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -19,32 +19,36 @@ #include -#include #include -#include #include -#include +#include #include +#include +#include /* tell ICU where to find its dat file (tables) */ -void kiwix::loadICUExternalTables() { +void kiwix::loadICUExternalTables() +{ #ifdef __APPLE__ - std::string executablePath = getExecutablePath(); - std::string executableDirectory = removeLastPathElement(executablePath); - std::string datPath = computeAbsolutePath(executableDirectory, "icudt49l.dat"); - try { - u_setDataDirectory(datPath.c_str()); - } catch (exception &e) { - std::cerr << e.what() << std::endl; - } + std::string executablePath = getExecutablePath(); + std::string executableDirectory = removeLastPathElement(executablePath); + std::string datPath + = computeAbsolutePath(executableDirectory, "icudt49l.dat"); + try { + u_setDataDirectory(datPath.c_str()); + } catch (exception& e) { + std::cerr << e.what() << std::endl; + } #endif } -std::string kiwix::removeAccents(const std::string &text) { +std::string kiwix::removeAccents(const std::string& text) +{ loadICUExternalTables(); ucnv_setDefaultName("UTF-8"); UErrorCode status = U_ZERO_ERROR; - Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); + Transliterator* removeAccentsTrans = Transliterator::createInstance( + "Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); UnicodeString ustring = UnicodeString(text.c_str()); removeAccentsTrans->transliterate(ustring); delete removeAccentsTrans; @@ -56,7 +60,8 @@ std::string kiwix::removeAccents(const std::string &text) { #ifndef __ANDROID__ /* Prepare integer for display */ -std::string kiwix::beautifyInteger(const unsigned int number) { +std::string kiwix::beautifyInteger(const unsigned int number) +{ std::stringstream numberStream; numberStream << number; std::string numberString = numberStream.str(); @@ -70,49 +75,58 @@ std::string kiwix::beautifyInteger(const unsigned int number) { return numberString; } -std::string kiwix::beautifyFileSize(const unsigned int number) { - if (number > 1024*1024) { - return kiwix::beautifyInteger(number/(1024*1024)) + " GB"; +std::string kiwix::beautifyFileSize(const unsigned int number) +{ + if (number > 1024 * 1024) { + return kiwix::beautifyInteger(number / (1024 * 1024)) + " GB"; } else { - return kiwix::beautifyInteger(number/1024 != - 0 ? number/1024 : 1) + " MB"; + return kiwix::beautifyInteger(number / 1024 != 0 ? number / 1024 : 1) + + " MB"; } } -void kiwix::printStringInHexadecimal(UnicodeString s) { +void kiwix::printStringInHexadecimal(UnicodeString s) +{ std::cout << std::showbase << std::hex; - for (int i=0; i", ">"); @@ -120,58 +134,68 @@ std::string kiwix::encodeDiples(const std::string& str) { } // Urlencode -//based on javascript encodeURIComponent() +// based on javascript encodeURIComponent() -std::string char2hex(char dec) { - char dig1 = (dec&0xF0)>>4; - char dig2 = (dec&0x0F); - if ( 0<= dig1 && dig1<= 9) dig1+=48; //0,48inascii - if (10<= dig1 && dig1<=15) dig1+=97-10; //a,97inascii - if ( 0<= dig2 && dig2<= 9) dig2+=48; - if (10<= dig2 && dig2<=15) dig2+=97-10; +std::string char2hex(char dec) +{ + char dig1 = (dec & 0xF0) >> 4; + char dig2 = (dec & 0x0F); + if (0 <= dig1 && dig1 <= 9) { + dig1 += 48; // 0,48inascii + } + if (10 <= dig1 && dig1 <= 15) { + dig1 += 97 - 10; // a,97inascii + } + if (0 <= dig2 && dig2 <= 9) { + dig2 += 48; + } + if (10 <= dig2 && dig2 <= 15) { + dig2 += 97 - 10; + } std::string r; - r.append( &dig1, 1); - r.append( &dig2, 1); + r.append(&dig1, 1); + r.append(&dig2, 1); return r; } -std::string kiwix::urlEncode(const std::string &c) { - std::string escaped=""; +std::string kiwix::urlEncode(const std::string& c) +{ + std::string escaped = ""; int max = c.length(); - for(int i=0; i> std::hex >> Z; - return char (Z); + return char(Z); } -std::string kiwix::urlDecode(const std::string &originalUrl) { +std::string kiwix::urlDecode(const std::string& originalUrl) +{ std::string url = originalUrl; std::string::size_type pos = 0; - while ((pos = url.find('%', pos)) != std::string::npos && - pos + 2 < url.length()) { + while ((pos = url.find('%', pos)) != std::string::npos + && pos + 2 < url.length()) { url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2))); ++pos; } @@ -179,39 +203,43 @@ std::string kiwix::urlDecode(const std::string &originalUrl) { } /* Split string in a token array */ -std::vector kiwix::split(const std::string & str, - const std::string & delims=" *-") +std::vector kiwix::split(const std::string& str, + const std::string& delims = " *-") { std::string::size_type lastPos = str.find_first_not_of(delims, 0); std::string::size_type pos = str.find_first_of(delims, lastPos); std::vector tokens; - while (std::string::npos != pos || std::string::npos != lastPos) - { - tokens.push_back(str.substr(lastPos, pos - lastPos)); - lastPos = str.find_first_not_of(delims, pos); - pos = str.find_first_of(delims, lastPos); - } + while (std::string::npos != pos || std::string::npos != lastPos) { + tokens.push_back(str.substr(lastPos, pos - lastPos)); + lastPos = str.find_first_not_of(delims, pos); + pos = str.find_first_of(delims, lastPos); + } return tokens; } -std::vector kiwix::split(const char* lhs, const char* rhs){ - const std::string m1 (lhs), m2 (rhs); +std::vector kiwix::split(const char* lhs, const char* rhs) +{ + const std::string m1(lhs), m2(rhs); return split(m1, m2); } -std::vector kiwix::split(const char* lhs, const std::string& rhs){ +std::vector kiwix::split(const char* lhs, const std::string& rhs) +{ return split(lhs, rhs.c_str()); } -std::vector kiwix::split(const std::string& lhs, const char* rhs){ +std::vector kiwix::split(const std::string& lhs, const char* rhs) +{ return split(lhs.c_str(), rhs); } -std::string kiwix::ucFirst (const std::string &word) { - if (word.empty()) +std::string kiwix::ucFirst(const std::string& word) +{ + if (word.empty()) { return ""; + } std::string result; @@ -223,9 +251,11 @@ std::string kiwix::ucFirst (const std::string &word) { return result; } -std::string kiwix::ucAll (const std::string &word) { - if (word.empty()) +std::string kiwix::ucAll(const std::string& word) +{ + if (word.empty()) { return ""; + } std::string result; @@ -235,9 +265,11 @@ std::string kiwix::ucAll (const std::string &word) { return result; } -std::string kiwix::lcFirst (const std::string &word) { - if (word.empty()) +std::string kiwix::lcFirst(const std::string& word) +{ + if (word.empty()) { return ""; + } std::string result; @@ -249,9 +281,11 @@ std::string kiwix::lcFirst (const std::string &word) { return result; } -std::string kiwix::lcAll (const std::string &word) { - if (word.empty()) +std::string kiwix::lcAll(const std::string& word) +{ + if (word.empty()) { return ""; + } std::string result; @@ -261,9 +295,11 @@ std::string kiwix::lcAll (const std::string &word) { return result; } -std::string kiwix::toTitle (const std::string &word) { - if (word.empty()) +std::string kiwix::toTitle(const std::string& word) +{ + if (word.empty()) { return ""; + } std::string result; @@ -274,6 +310,7 @@ std::string kiwix::toTitle (const std::string &word) { return result; } -std::string kiwix::normalize (const std::string &word) { +std::string kiwix::normalize(const std::string& word) +{ return kiwix::lcAll(word); } diff --git a/src/library.cpp b/src/library.cpp index 2399d78d0..541e3c661 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -19,125 +19,136 @@ #include "library.h" -namespace kiwix { +namespace kiwix +{ +/* Constructor */ +Book::Book() : readOnly(false) +{ +} +/* Destructor */ +Book::~Book() +{ +} +/* Sort functions */ +bool Book::sortByLastOpen(const kiwix::Book& a, const kiwix::Book& b) +{ + return atoi(a.last.c_str()) > atoi(b.last.c_str()); +} - /* Constructor */ - Book::Book(): - readOnly(false) { - } - - /* Destructor */ - Book::~Book() { - } +bool Book::sortByTitle(const kiwix::Book& a, const kiwix::Book& b) +{ + return strcmp(a.title.c_str(), b.title.c_str()) < 0; +} - /* Sort functions */ - bool Book::sortByLastOpen(const kiwix::Book &a, const kiwix::Book &b) { - return atoi(a.last.c_str()) > atoi(b.last.c_str()); - } +bool Book::sortByDate(const kiwix::Book& a, const kiwix::Book& b) +{ + return strcmp(a.date.c_str(), b.date.c_str()) > 0; +} - bool Book::sortByTitle(const kiwix::Book &a, const kiwix::Book &b) { - return strcmp(a.title.c_str(), b.title.c_str()) < 0; - } +bool Book::sortBySize(const kiwix::Book& a, const kiwix::Book& b) +{ + return atoi(a.size.c_str()) < atoi(b.size.c_str()); +} - bool Book::sortByDate(const kiwix::Book &a, const kiwix::Book &b) { - return strcmp(a.date.c_str(), b.date.c_str()) > 0; - } +bool Book::sortByPublisher(const kiwix::Book& a, const kiwix::Book& b) +{ + return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0; +} - bool Book::sortBySize(const kiwix::Book &a, const kiwix::Book &b) { - return atoi(a.size.c_str()) < atoi(b.size.c_str()); - } +bool Book::sortByCreator(const kiwix::Book& a, const kiwix::Book& b) +{ + return strcmp(a.creator.c_str(), b.creator.c_str()) < 0; +} - bool Book::sortByPublisher(const kiwix::Book &a, const kiwix::Book &b) { - return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0; - } +bool Book::sortByLanguage(const kiwix::Book& a, const kiwix::Book& b) +{ + return strcmp(a.language.c_str(), b.language.c_str()) < 0; +} - bool Book::sortByCreator(const kiwix::Book &a, const kiwix::Book &b) { - return strcmp(a.creator.c_str(), b.creator.c_str()) < 0; - } - - bool Book::sortByLanguage(const kiwix::Book &a, const kiwix::Book &b) { - return strcmp(a.language.c_str(), b.language.c_str()) < 0; - } - - std::string Book::getHumanReadableIdFromPath() { - std::string id = pathAbsolute; - if (!id.empty()) { - kiwix::removeAccents(id); +std::string Book::getHumanReadableIdFromPath() +{ + std::string id = pathAbsolute; + if (!id.empty()) { + kiwix::removeAccents(id); #ifdef _WIN32 - id = replaceRegex(id, "", "^.*\\\\"); + id = replaceRegex(id, "", "^.*\\\\"); #else - id = replaceRegex(id, "", "^.*/"); + id = replaceRegex(id, "", "^.*/"); #endif - id = replaceRegex(id, "", "\\.zim[a-z]*$"); - id = replaceRegex(id, "_", " "); - id = replaceRegex(id, "plus", "\\+"); - } - return id; + id = replaceRegex(id, "", "\\.zim[a-z]*$"); + id = replaceRegex(id, "_", " "); + id = replaceRegex(id, "plus", "\\+"); } - - /* Constructor */ - Library::Library(): - version(KIWIX_LIBRARY_VERSION) { - } - - /* Destructor */ - Library::~Library() { - } - - bool Library::addBook(const Book &book) { - - /* Try to find it */ - std::vector::iterator itr; - for ( itr = this->books.begin(); itr != this->books.end(); ++itr ) { - if (itr->id == book.id) { - if (!itr->readOnly) { - itr->readOnly = book.readOnly; - - if (itr->path.empty()) - itr->path = book.path; - - if (itr->pathAbsolute.empty()) - itr->pathAbsolute = book.pathAbsolute; - - if (itr->url.empty()) - itr->url = book.url; - - if (itr->tags.empty()) - itr->tags = book.tags; - - if (itr->name.empty()) - itr->name = book.name; - - if (itr->indexPath.empty()) { - itr->indexPath = book.indexPath; - itr->indexType = book.indexType; - } - - if (itr->indexPathAbsolute.empty()) { - itr->indexPathAbsolute = book.indexPathAbsolute; - itr->indexType = book.indexType; - } - - if (itr->faviconMimeType.empty()) { - itr->favicon = book.favicon; - itr->faviconMimeType = book.faviconMimeType; - } - } - - return false; - } - } - - /* otherwise */ - this->books.push_back(book); - return true; - } - - bool Library::removeBookByIndex(const unsigned int bookIndex) { - books.erase(books.begin()+bookIndex); - return true; - } - + return id; +} + +/* Constructor */ +Library::Library() : version(KIWIX_LIBRARY_VERSION) +{ +} +/* Destructor */ +Library::~Library() +{ +} +bool Library::addBook(const Book& book) +{ + /* Try to find it */ + std::vector::iterator itr; + for (itr = this->books.begin(); itr != this->books.end(); ++itr) { + if (itr->id == book.id) { + if (!itr->readOnly) { + itr->readOnly = book.readOnly; + + if (itr->path.empty()) { + itr->path = book.path; + } + + if (itr->pathAbsolute.empty()) { + itr->pathAbsolute = book.pathAbsolute; + } + + if (itr->url.empty()) { + itr->url = book.url; + } + + if (itr->tags.empty()) { + itr->tags = book.tags; + } + + if (itr->name.empty()) { + itr->name = book.name; + } + + if (itr->indexPath.empty()) { + itr->indexPath = book.indexPath; + itr->indexType = book.indexType; + } + + if (itr->indexPathAbsolute.empty()) { + itr->indexPathAbsolute = book.indexPathAbsolute; + itr->indexType = book.indexType; + } + + if (itr->faviconMimeType.empty()) { + itr->favicon = book.favicon; + itr->faviconMimeType = book.faviconMimeType; + } + } + + return false; + } + } + + /* otherwise */ + this->books.push_back(book); + return true; +} + +bool Library::removeBookByIndex(const unsigned int bookIndex) +{ + books.erase(books.begin() + bookIndex); + return true; +} } diff --git a/src/manager.cpp b/src/manager.cpp index e7111c8a2..7446d9a90 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -19,544 +19,640 @@ #include "manager.h" -namespace kiwix { +namespace kiwix +{ +/* Constructor */ +Manager::Manager() : writableLibraryPath("") +{ +} +/* Destructor */ +Manager::~Manager() +{ +} +bool Manager::parseXmlDom(const pugi::xml_document& doc, + const bool readOnly, + const string libraryPath) +{ + pugi::xml_node libraryNode = doc.child("library"); - /* Constructor */ - Manager::Manager() : - writableLibraryPath("") { - } + if (strlen(libraryNode.attribute("current").value())) + this->setCurrentBookId(libraryNode.attribute("current").value()); - /* Destructor */ - Manager::~Manager() { - } + string libraryVersion = libraryNode.attribute("version").value(); - bool Manager::parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath) { - pugi::xml_node libraryNode = doc.child("library"); - - if (strlen(libraryNode.attribute("current").value())) - this->setCurrentBookId(libraryNode.attribute("current").value()); - - string libraryVersion = libraryNode.attribute("version").value(); - - for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode; bookNode = bookNode.next_sibling("book")) { - bool ok = true; - kiwix::Book book; - - book.readOnly = readOnly; - book.id = bookNode.attribute("id").value(); - book.path = bookNode.attribute("path").value(); - book.last = (std::string(bookNode.attribute("last").value()) != "undefined" ? - bookNode.attribute("last").value() : ""); - book.indexPath = bookNode.attribute("indexPath").value(); - book.indexType = XAPIAN; - book.title = bookNode.attribute("title").value(); - book.name = bookNode.attribute("name").value(); - book.tags = bookNode.attribute("tags").value(); - book.description = bookNode.attribute("description").value(); - book.language = bookNode.attribute("language").value(); - book.date = bookNode.attribute("date").value(); - book.creator = bookNode.attribute("creator").value(); - book.publisher = bookNode.attribute("publisher").value(); - book.url = bookNode.attribute("url").value(); - book.origId = bookNode.attribute("origId").value(); - book.articleCount = bookNode.attribute("articleCount").value(); - book.mediaCount = bookNode.attribute("mediaCount").value(); - book.size = bookNode.attribute("size").value(); - book.favicon = bookNode.attribute("favicon").value(); - book.faviconMimeType = bookNode.attribute("faviconMimeType").value(); - - /* Check absolute and relative paths */ - this->checkAndCleanBookPaths(book, libraryPath); - - /* Update the book properties with the new importer */ - if (libraryVersion.empty() || atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) { - if (!book.path.empty()) { - ok = this->readBookFromPath(book.pathAbsolute); - } - } - - if (ok) { - library.addBook(book); - } - } - - return true; - } - - bool Manager::readXml(const string xml, const bool readOnly, const string libraryPath) { - pugi::xml_document doc; - pugi::xml_parse_result result = doc.load_buffer_inplace((void*)xml.data(), xml.size()); - - if (result) { - this->parseXmlDom(doc, readOnly, libraryPath); - } - - return true; - } - - bool Manager::readFile(const string path, const bool readOnly) { - return this->readFile(path, path, readOnly); - } - - bool Manager::readFile(const string nativePath, const string UTF8Path, const bool readOnly) { - bool retVal = true; - pugi::xml_document doc; - pugi::xml_parse_result result = doc.load_file(nativePath.c_str()); - - if (result) { - this->parseXmlDom(doc, readOnly, UTF8Path); - } else { - retVal = false; - } - - /* This has to be set (although if the file does not exists) to be - * able to know where to save the library if new content are - * available */ - if (!readOnly) { - this->writableLibraryPath = UTF8Path; - } - - return retVal; - } - - bool Manager::writeFile(const string path) { - pugi::xml_document doc; - - /* Add the library node */ - pugi::xml_node libraryNode = doc.append_child("library"); - - if (!getCurrentBookId().empty()) { - libraryNode.append_attribute("current") = getCurrentBookId().c_str(); - } - - if (!library.version.empty()) - libraryNode.append_attribute("version") = library.version.c_str(); - - /* Add each book */ - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - - if (!itr->readOnly) { - this->checkAndCleanBookPaths(*itr, path); - - pugi::xml_node bookNode = libraryNode.append_child("book"); - bookNode.append_attribute("id") = itr->id.c_str(); - - if (!itr->path.empty()) - bookNode.append_attribute("path") = itr->path.c_str(); - - if (!itr->last.empty() && itr->last != "undefined") { - bookNode.append_attribute("last") = itr->last.c_str(); - } - - if (!itr->indexPath.empty()) - bookNode.append_attribute("indexPath") = itr->indexPath.c_str(); - - if (!itr->indexPath.empty() || !itr->indexPathAbsolute.empty()) { - if (itr->indexType == XAPIAN) - bookNode.append_attribute("indexType") = "xapian"; - } - - if (itr->origId.empty()) { - if (!itr->title.empty()) - bookNode.append_attribute("title") = itr->title.c_str(); - - if (!itr->name.empty()) - bookNode.append_attribute("name") = itr->name.c_str(); - - if (!itr->tags.empty()) - bookNode.append_attribute("tags") = itr->tags.c_str(); - - if (!itr->description.empty()) - bookNode.append_attribute("description") = itr->description.c_str(); - - if (!itr->language.empty()) - bookNode.append_attribute("language") = itr->language.c_str(); - - if (!itr->creator.empty()) - bookNode.append_attribute("creator") = itr->creator.c_str(); - - if (!itr->publisher.empty()) - bookNode.append_attribute("publisher") = itr->publisher.c_str(); - - if (!itr->favicon.empty()) - bookNode.append_attribute("favicon") = itr->favicon.c_str(); - - if (!itr->faviconMimeType.empty()) - bookNode.append_attribute("faviconMimeType") = itr->faviconMimeType.c_str(); - } - - if (!itr->date.empty()) - bookNode.append_attribute("date") = itr->date.c_str(); - - if (!itr->url.empty()) - bookNode.append_attribute("url") = itr->url.c_str(); - - if (!itr->origId.empty()) - bookNode.append_attribute("origId") = itr->origId.c_str(); - - if (!itr->articleCount.empty()) - bookNode.append_attribute("articleCount") = itr->articleCount.c_str(); - - if (!itr->mediaCount.empty()) - bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str(); - - if (!itr->size.empty()) - bookNode.append_attribute("size") = itr->size.c_str(); - } - } - - /* saving file */ - doc.save_file(path.c_str()); - - return true; - } - - bool Manager::setCurrentBookId(const string id) { - if (library.current.empty() || library.current.top() != id) { - if (id.empty() && !library.current.empty()) - library.current.pop(); - else - library.current.push(id); - } - return true; - } - - string Manager::getCurrentBookId() { - return library.current.empty() ? - "" : library.current.top(); - } - - /* Add a book to the library. Return empty string if failed, book id otherwise */ - string Manager::addBookFromPathAndGetId(const string pathToOpen, const string pathToSave, - const string url, const bool checkMetaData) { + for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode; + bookNode = bookNode.next_sibling("book")) { + bool ok = true; kiwix::Book book; - if (this->readBookFromPath(pathToOpen, &book)) { + book.readOnly = readOnly; + book.id = bookNode.attribute("id").value(); + book.path = bookNode.attribute("path").value(); + book.last = (std::string(bookNode.attribute("last").value()) != "undefined" + ? bookNode.attribute("last").value() + : ""); + book.indexPath = bookNode.attribute("indexPath").value(); + book.indexType = XAPIAN; + book.title = bookNode.attribute("title").value(); + book.name = bookNode.attribute("name").value(); + book.tags = bookNode.attribute("tags").value(); + book.description = bookNode.attribute("description").value(); + book.language = bookNode.attribute("language").value(); + book.date = bookNode.attribute("date").value(); + book.creator = bookNode.attribute("creator").value(); + book.publisher = bookNode.attribute("publisher").value(); + book.url = bookNode.attribute("url").value(); + book.origId = bookNode.attribute("origId").value(); + book.articleCount = bookNode.attribute("articleCount").value(); + book.mediaCount = bookNode.attribute("mediaCount").value(); + book.size = bookNode.attribute("size").value(); + book.favicon = bookNode.attribute("favicon").value(); + book.faviconMimeType = bookNode.attribute("faviconMimeType").value(); - if (pathToSave != pathToOpen) { - book.path = pathToSave; - book.pathAbsolute = isRelativePath(pathToSave) ? - computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), pathToSave) : pathToSave; - } + /* Check absolute and relative paths */ + this->checkAndCleanBookPaths(book, libraryPath); - if (!checkMetaData || - (checkMetaData && !book.title.empty() && !book.language.empty() && !book.date.empty())) { - book.url = url; - library.addBook(book); - return book.id; + /* Update the book properties with the new importer */ + if (libraryVersion.empty() + || atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) { + if (!book.path.empty()) { + ok = this->readBookFromPath(book.pathAbsolute); } } - return ""; + if (ok) { + library.addBook(book); + } } - /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ - bool Manager::addBookFromPath(const string pathToOpen, const string pathToSave, const string url, const bool checkMetaData) { - return !(this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData).empty()); + return true; +} + +bool Manager::readXml(const string xml, + const bool readOnly, + const string libraryPath) +{ + pugi::xml_document doc; + pugi::xml_parse_result result + = doc.load_buffer_inplace((void*)xml.data(), xml.size()); + + if (result) { + this->parseXmlDom(doc, readOnly, libraryPath); } - bool Manager::readBookFromPath(const string path, kiwix::Book *book) { - try { - kiwix::Reader *reader = new kiwix::Reader(path); + return true; +} - if (book != NULL) { - book->path = path; - book->pathAbsolute = path; - book->id = reader->getId(); - book->description = reader->getDescription(); - book->language = reader->getLanguage(); - book->date = reader->getDate(); - book->creator = reader->getCreator(); - book->publisher = reader->getPublisher(); - book->title = reader->getTitle(); - book->name = reader->getName(); - book->tags = reader->getTags(); - book->origId = reader->getOrigId(); - std::ostringstream articleCountStream; - articleCountStream << reader->getArticleCount(); - book->articleCount = articleCountStream.str(); +bool Manager::readFile(const string path, const bool readOnly) +{ + return this->readFile(path, path, readOnly); +} - std::ostringstream mediaCountStream; - mediaCountStream << reader->getMediaCount(); - book->mediaCount = mediaCountStream.str(); +bool Manager::readFile(const string nativePath, + const string UTF8Path, + const bool readOnly) +{ + bool retVal = true; + pugi::xml_document doc; + pugi::xml_parse_result result = doc.load_file(nativePath.c_str()); - ostringstream convert; convert << reader->getFileSize(); - book->size = convert.str(); + if (result) { + this->parseXmlDom(doc, readOnly, UTF8Path); + } else { + retVal = false; + } - string favicon; - string faviconMimeType; - if (reader->getFavicon(favicon, faviconMimeType)) { - book->favicon = base64_encode(reinterpret_cast(favicon.c_str()), favicon.length()); - book->faviconMimeType = faviconMimeType; - } + /* This has to be set (although if the file does not exists) to be + * able to know where to save the library if new content are + * available */ + if (!readOnly) { + this->writableLibraryPath = UTF8Path; + } + + return retVal; +} + +bool Manager::writeFile(const string path) +{ + pugi::xml_document doc; + + /* Add the library node */ + pugi::xml_node libraryNode = doc.append_child("library"); + + if (!getCurrentBookId().empty()) { + libraryNode.append_attribute("current") = getCurrentBookId().c_str(); + } + + if (!library.version.empty()) + libraryNode.append_attribute("version") = library.version.c_str(); + + /* Add each book */ + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (!itr->readOnly) { + this->checkAndCleanBookPaths(*itr, path); + + pugi::xml_node bookNode = libraryNode.append_child("book"); + bookNode.append_attribute("id") = itr->id.c_str(); + + if (!itr->path.empty()) { + bookNode.append_attribute("path") = itr->path.c_str(); } - delete reader; - } catch (const std::exception& e) { - std::cerr << e.what() << std::endl; - return false; - } - - return true; - } - - bool Manager::removeBookByIndex(const unsigned int bookIndex) { - return this->library.removeBookByIndex(bookIndex); - } - - bool Manager::removeBookById(const string id) { - unsigned int bookIndex = 0; - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if ( itr->id == id) { - return this->library.removeBookByIndex(bookIndex); + if (!itr->last.empty() && itr->last != "undefined") { + bookNode.append_attribute("last") = itr->last.c_str(); + } + + if (!itr->indexPath.empty()) + bookNode.append_attribute("indexPath") = itr->indexPath.c_str(); + + if (!itr->indexPath.empty() || !itr->indexPathAbsolute.empty()) { + if (itr->indexType == XAPIAN) { + bookNode.append_attribute("indexType") = "xapian"; + } + } + + if (itr->origId.empty()) { + if (!itr->title.empty()) + bookNode.append_attribute("title") = itr->title.c_str(); + + if (!itr->name.empty()) + bookNode.append_attribute("name") = itr->name.c_str(); + + if (!itr->tags.empty()) + bookNode.append_attribute("tags") = itr->tags.c_str(); + + if (!itr->description.empty()) + bookNode.append_attribute("description") = itr->description.c_str(); + + if (!itr->language.empty()) + bookNode.append_attribute("language") = itr->language.c_str(); + + if (!itr->creator.empty()) + bookNode.append_attribute("creator") = itr->creator.c_str(); + + if (!itr->publisher.empty()) + bookNode.append_attribute("publisher") = itr->publisher.c_str(); + + if (!itr->favicon.empty()) + bookNode.append_attribute("favicon") = itr->favicon.c_str(); + + if (!itr->faviconMimeType.empty()) + bookNode.append_attribute("faviconMimeType") + = itr->faviconMimeType.c_str(); + } + + if (!itr->date.empty()) { + bookNode.append_attribute("date") = itr->date.c_str(); + } + + if (!itr->url.empty()) { + bookNode.append_attribute("url") = itr->url.c_str(); + } + + if (!itr->origId.empty()) + bookNode.append_attribute("origId") = itr->origId.c_str(); + + if (!itr->articleCount.empty()) + bookNode.append_attribute("articleCount") = itr->articleCount.c_str(); + + if (!itr->mediaCount.empty()) + bookNode.append_attribute("mediaCount") = itr->mediaCount.c_str(); + + if (!itr->size.empty()) { + bookNode.append_attribute("size") = itr->size.c_str(); } - bookIndex++; } + } + + /* saving file */ + doc.save_file(path.c_str()); + + return true; +} + +bool Manager::setCurrentBookId(const string id) +{ + if (library.current.empty() || library.current.top() != id) { + if (id.empty() && !library.current.empty()) { + library.current.pop(); + } else { + library.current.push(id); + } + } + return true; +} + +string Manager::getCurrentBookId() +{ + return library.current.empty() ? "" : library.current.top(); +} + +/* Add a book to the library. Return empty string if failed, book id otherwise + */ +string Manager::addBookFromPathAndGetId(const string pathToOpen, + const string pathToSave, + const string url, + const bool checkMetaData) +{ + kiwix::Book book; + + if (this->readBookFromPath(pathToOpen, &book)) { + if (pathToSave != pathToOpen) { + book.path = pathToSave; + book.pathAbsolute + = isRelativePath(pathToSave) + ? computeAbsolutePath( + removeLastPathElement(writableLibraryPath, true, false), + pathToSave) + : pathToSave; + } + + if (!checkMetaData + || (checkMetaData && !book.title.empty() && !book.language.empty() + && !book.date.empty())) { + book.url = url; + library.addBook(book); + return book.id; + } + } + + return ""; +} + +/* Wrapper over Manager::addBookFromPath which return a bool instead of a string + */ +bool Manager::addBookFromPath(const string pathToOpen, + const string pathToSave, + const string url, + const bool checkMetaData) +{ + return !( + this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData) + .empty()); +} + +bool Manager::readBookFromPath(const string path, kiwix::Book* book) +{ + try { + kiwix::Reader* reader = new kiwix::Reader(path); + + if (book != NULL) { + book->path = path; + book->pathAbsolute = path; + book->id = reader->getId(); + book->description = reader->getDescription(); + book->language = reader->getLanguage(); + book->date = reader->getDate(); + book->creator = reader->getCreator(); + book->publisher = reader->getPublisher(); + book->title = reader->getTitle(); + book->name = reader->getName(); + book->tags = reader->getTags(); + book->origId = reader->getOrigId(); + std::ostringstream articleCountStream; + articleCountStream << reader->getArticleCount(); + book->articleCount = articleCountStream.str(); + + std::ostringstream mediaCountStream; + mediaCountStream << reader->getMediaCount(); + book->mediaCount = mediaCountStream.str(); + + ostringstream convert; + convert << reader->getFileSize(); + book->size = convert.str(); + + string favicon; + string faviconMimeType; + if (reader->getFavicon(favicon, faviconMimeType)) { + book->favicon = base64_encode( + reinterpret_cast(favicon.c_str()), + favicon.length()); + book->faviconMimeType = faviconMimeType; + } + } + + delete reader; + } catch (const std::exception& e) { + std::cerr << e.what() << std::endl; return false; } - vector Manager::getBooksLanguages() { - std::vector booksLanguages; - std::vector::iterator itr; - std::map booksLanguagesMap; + return true; +} - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); - for (itr = library.books.begin(); itr != library.books.end(); ++itr) { - if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { - if (itr->origId.empty()) { - booksLanguagesMap[itr->language] = true; - booksLanguages.push_back(itr->language); - } +bool Manager::removeBookByIndex(const unsigned int bookIndex) +{ + return this->library.removeBookByIndex(bookIndex); +} + +bool Manager::removeBookById(const string id) +{ + unsigned int bookIndex = 0; + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (itr->id == id) { + return this->library.removeBookByIndex(bookIndex); + } + bookIndex++; + } + return false; +} + +vector Manager::getBooksLanguages() +{ + std::vector booksLanguages; + std::vector::iterator itr; + std::map booksLanguagesMap; + + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortByLanguage); + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (booksLanguagesMap.find(itr->language) == booksLanguagesMap.end()) { + if (itr->origId.empty()) { + booksLanguagesMap[itr->language] = true; + booksLanguages.push_back(itr->language); } } - - return booksLanguages; } - vector Manager::getBooksCreators() { - std::vector booksCreators; - std::vector::iterator itr; - std::map booksCreatorsMap; + return booksLanguages; +} - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); - for (itr = library.books.begin(); itr != library.books.end(); ++itr) { - if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { - if (itr->origId.empty()) { - booksCreatorsMap[itr->creator] = true; - booksCreators.push_back(itr->creator); - } +vector Manager::getBooksCreators() +{ + std::vector booksCreators; + std::vector::iterator itr; + std::map booksCreatorsMap; + + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (booksCreatorsMap.find(itr->creator) == booksCreatorsMap.end()) { + if (itr->origId.empty()) { + booksCreatorsMap[itr->creator] = true; + booksCreators.push_back(itr->creator); } } - - return booksCreators; } + return booksCreators; +} - vector Manager::getBooksIds() { - std::vector booksIds; - std::vector::iterator itr; +vector Manager::getBooksIds() +{ + std::vector booksIds; + std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - booksIds.push_back(itr->id); - } - - return booksIds; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + booksIds.push_back(itr->id); } - vector Manager::getBooksPublishers() { - std::vector booksPublishers; - std::vector::iterator itr; - std::map booksPublishersMap; + return booksIds; +} - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { - if (itr->origId.empty()) { - booksPublishersMap[itr->publisher] = true; - booksPublishers.push_back(itr->publisher); - } +vector Manager::getBooksPublishers() +{ + std::vector booksPublishers; + std::vector::iterator itr; + std::map booksPublishersMap; + + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (booksPublishersMap.find(itr->publisher) == booksPublishersMap.end()) { + if (itr->origId.empty()) { + booksPublishersMap[itr->publisher] = true; + booksPublishers.push_back(itr->publisher); } } - - return booksPublishers; } - kiwix::Library Manager::cloneLibrary() { - return this->library; - } + return booksPublishers; +} - bool Manager::getCurrentBook(Book &book) { - string currentBookId = getCurrentBookId(); - if (currentBookId.empty()) { - return false; - } else { - getBookById(currentBookId, book); +kiwix::Library Manager::cloneLibrary() +{ + return this->library; +} +bool Manager::getCurrentBook(Book& book) +{ + string currentBookId = getCurrentBookId(); + if (currentBookId.empty()) { + return false; + } else { + getBookById(currentBookId, book); + return true; + } +} + +bool Manager::getBookById(const string id, Book& book) +{ + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (itr->id == id) { + book = *itr; + return true; + } + } + return false; +} + +bool Manager::updateBookLastOpenDateById(const string id) +{ + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (itr->id == id) { + char unixdate[12]; + sprintf(unixdate, "%d", (int)time(NULL)); + itr->last = unixdate; return true; } } - bool Manager::getBookById(const string id, Book &book) { - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if ( itr->id == id) { - book = *itr; - return true; - } - } - return false; - } - - bool Manager::updateBookLastOpenDateById(const string id) { - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if ( itr->id == id) { - char unixdate[12]; - sprintf (unixdate, "%d", (int)time(NULL)); - itr->last = unixdate; - return true; - } - } - - return false; - } - - bool Manager::setBookIndex(const string id, const string path, const supportedIndexType type) { - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if ( itr->id == id) { - itr->indexPath = path; - itr->indexPathAbsolute = isRelativePath(path) ? - computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), path) : path; - itr->indexType = type; - return true; - } - } - - return false; - } - - bool Manager::setBookIndex(const string id, const string path) { - return this->setBookIndex(id, path, XAPIAN); - } - - bool Manager::setBookPath(const string id, const string path) { - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if ( itr->id == id) { - itr->path = path; - itr->pathAbsolute = isRelativePath(path) ? - computeAbsolutePath(removeLastPathElement(writableLibraryPath, true, false), path) : path; - return true; - } - } - - return false; - } - - void Manager::removeBookPaths() { - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - itr->path = ""; - itr->pathAbsolute = ""; - } - } - - unsigned int Manager::getBookCount(const bool localBooks, const bool remoteBooks) { - unsigned int result = 0; - std::vector::iterator itr; - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if ((!itr->path.empty() && localBooks) || (itr->path.empty() && remoteBooks)) - result++; - } - return result; - } - - bool Manager::listBooks(const supportedListMode mode, const supportedListSortBy sortBy, - const unsigned int maxSize, const string language, const string creator, - const string publisher, const string search) { - this->bookIdList.clear(); - std::vector::iterator itr; - - /* Sort */ - if (sortBy == TITLE) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByTitle); - } else if (sortBy == SIZE) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortBySize); - } else if (sortBy == DATE) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByDate); - } else if (sortBy == CREATOR) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); - } else if (sortBy == PUBLISHER) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByPublisher); - } - - /* Special sort for LASTOPEN */ - if (mode == LASTOPEN) { - std::sort(library.books.begin(), library.books.end(), kiwix::Book::sortByLastOpen); - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - if (!itr->last.empty()) - this->bookIdList.push_back(itr->id); - } - } else { - /* Generate the list of book id */ - for ( itr = library.books.begin(); itr != library.books.end(); ++itr ) { - bool ok = true; - - if (mode == LOCAL && itr->path.empty()) - ok = false; - - if (ok == true && mode == REMOTE && (!itr->path.empty() || itr->url.empty())) - ok = false; - - if (ok == true && maxSize != 0 && (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024) - ok = false; - - if (ok == true && !language.empty() && !matchRegex(itr->language, language)) - ok = false; - - if (ok == true && !creator.empty() && itr->creator != creator) - ok = false; - - if (ok == true && !publisher.empty() && itr->publisher != publisher) - ok = false; - - if ((ok == true && !search.empty()) && !(matchRegex(itr->title, "\\Q" + search + "\\E") || - matchRegex(itr->description, "\\Q" + search + "\\E") || - matchRegex(itr->language, "\\Q" + search + "\\E") - )) - ok = false; - - if (ok == true) { - this->bookIdList.push_back(itr->id); - } - } - } - - return true; - } - - void Manager::checkAndCleanBookPaths(Book &book, const string &libraryPath) { - if (!book.path.empty()) { - if (isRelativePath(book.path)) { - book.pathAbsolute = computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.path); - } else { - book.pathAbsolute = book.path; - book.path = computeRelativePath(removeLastPathElement(libraryPath, true, false), book.pathAbsolute); - } - } - - if (!book.indexPath.empty()) { - if (isRelativePath(book.indexPath)) { - book.indexPathAbsolute = - computeAbsolutePath(removeLastPathElement(libraryPath, true, false), book.indexPath); - } else { - book.indexPathAbsolute = book.indexPath; - book.indexPath = - computeRelativePath(removeLastPathElement(libraryPath, true, false), book.indexPathAbsolute); - } - } - } - + return false; +} + +bool Manager::setBookIndex(const string id, + const string path, + const supportedIndexType type) +{ + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (itr->id == id) { + itr->indexPath = path; + itr->indexPathAbsolute + = isRelativePath(path) + ? computeAbsolutePath( + removeLastPathElement(writableLibraryPath, true, false), + path) + : path; + itr->indexType = type; + return true; + } + } + + return false; +} + +bool Manager::setBookIndex(const string id, const string path) +{ + return this->setBookIndex(id, path, XAPIAN); +} + +bool Manager::setBookPath(const string id, const string path) +{ + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (itr->id == id) { + itr->path = path; + itr->pathAbsolute + = isRelativePath(path) + ? computeAbsolutePath( + removeLastPathElement(writableLibraryPath, true, false), + path) + : path; + return true; + } + } + + return false; +} + +void Manager::removeBookPaths() +{ + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + itr->path = ""; + itr->pathAbsolute = ""; + } +} + +unsigned int Manager::getBookCount(const bool localBooks, + const bool remoteBooks) +{ + unsigned int result = 0; + std::vector::iterator itr; + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if ((!itr->path.empty() && localBooks) + || (itr->path.empty() && remoteBooks)) { + result++; + } + } + return result; +} + +bool Manager::listBooks(const supportedListMode mode, + const supportedListSortBy sortBy, + const unsigned int maxSize, + const string language, + const string creator, + const string publisher, + const string search) +{ + this->bookIdList.clear(); + std::vector::iterator itr; + + /* Sort */ + if (sortBy == TITLE) { + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortByTitle); + } else if (sortBy == SIZE) { + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortBySize); + } else if (sortBy == DATE) { + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortByDate); + } else if (sortBy == CREATOR) { + std::sort( + library.books.begin(), library.books.end(), kiwix::Book::sortByCreator); + } else if (sortBy == PUBLISHER) { + std::sort(library.books.begin(), + library.books.end(), + kiwix::Book::sortByPublisher); + } + + /* Special sort for LASTOPEN */ + if (mode == LASTOPEN) { + std::sort(library.books.begin(), + library.books.end(), + kiwix::Book::sortByLastOpen); + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + if (!itr->last.empty()) { + this->bookIdList.push_back(itr->id); + } + } + } else { + /* Generate the list of book id */ + for (itr = library.books.begin(); itr != library.books.end(); ++itr) { + bool ok = true; + + if (mode == LOCAL && itr->path.empty()) { + ok = false; + } + + if (ok == true && mode == REMOTE + && (!itr->path.empty() || itr->url.empty())) { + ok = false; + } + + if (ok == true && maxSize != 0 + && (unsigned int)atoi(itr->size.c_str()) > maxSize * 1024 * 1024) { + ok = false; + } + + if (ok == true && !language.empty() + && !matchRegex(itr->language, language)) { + ok = false; + } + + if (ok == true && !creator.empty() && itr->creator != creator) { + ok = false; + } + + if (ok == true && !publisher.empty() && itr->publisher != publisher) { + ok = false; + } + + if ((ok == true && !search.empty()) + && !(matchRegex(itr->title, "\\Q" + search + "\\E") + || matchRegex(itr->description, "\\Q" + search + "\\E") + || matchRegex(itr->language, "\\Q" + search + "\\E"))) { + ok = false; + } + + if (ok == true) { + this->bookIdList.push_back(itr->id); + } + } + } + + return true; +} + +void Manager::checkAndCleanBookPaths(Book& book, const string& libraryPath) +{ + if (!book.path.empty()) { + if (isRelativePath(book.path)) { + book.pathAbsolute = computeAbsolutePath( + removeLastPathElement(libraryPath, true, false), book.path); + } else { + book.pathAbsolute = book.path; + book.path = computeRelativePath( + removeLastPathElement(libraryPath, true, false), book.pathAbsolute); + } + } + + if (!book.indexPath.empty()) { + if (isRelativePath(book.indexPath)) { + book.indexPathAbsolute = computeAbsolutePath( + removeLastPathElement(libraryPath, true, false), book.indexPath); + } else { + book.indexPathAbsolute = book.indexPath; + book.indexPath + = computeRelativePath(removeLastPathElement(libraryPath, true, false), + book.indexPathAbsolute); + } + } +} } diff --git a/src/reader.cpp b/src/reader.cpp index d224bf6b3..c967b2473 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -20,646 +20,737 @@ #include "reader.h" #include -inline char hi(char v) { - char hex[] = "0123456789abcdef"; - return hex[(v >> 4) & 0xf]; +inline char hi(char v) +{ + char hex[] = "0123456789abcdef"; + return hex[(v >> 4) & 0xf]; } -inline char lo(char v) { - char hex[] = "0123456789abcdef"; - return hex[v & 0xf]; +inline char lo(char v) +{ + char hex[] = "0123456789abcdef"; + return hex[v & 0xf]; } -std::string hexUUID (std::string in) { - std::ostringstream out; - for (unsigned n = 0; n < 4; ++n) - out << hi(in[n]) << lo(in[n]); - out << '-'; - for (unsigned n = 4; n < 6; ++n) - out << hi(in[n]) << lo(in[n]); - out << '-'; - for (unsigned n = 6; n < 8; ++n) - out << hi(in[n]) << lo(in[n]); - out << '-'; - for (unsigned n = 8; n < 10; ++n) - out << hi(in[n]) << lo(in[n]); - out << '-'; - for (unsigned n = 10; n < 16; ++n) - out << hi(in[n]) << lo(in[n]); - std::string op=out.str(); - return op; +std::string hexUUID(std::string in) +{ + std::ostringstream out; + for (unsigned n = 0; n < 4; ++n) { + out << hi(in[n]) << lo(in[n]); + } + out << '-'; + for (unsigned n = 4; n < 6; ++n) { + out << hi(in[n]) << lo(in[n]); + } + out << '-'; + for (unsigned n = 6; n < 8; ++n) { + out << hi(in[n]) << lo(in[n]); + } + out << '-'; + for (unsigned n = 8; n < 10; ++n) { + out << hi(in[n]) << lo(in[n]); + } + out << '-'; + for (unsigned n = 10; n < 16; ++n) { + out << hi(in[n]) << lo(in[n]); + } + std::string op = out.str(); + return op; } -namespace kiwix { +namespace kiwix +{ +/* Constructor */ +Reader::Reader(const string zimFilePath) : zimFileHandler(NULL) +{ + string tmpZimFilePath = zimFilePath; - /* Constructor */ - Reader::Reader(const string zimFilePath) - : zimFileHandler(NULL) { - string tmpZimFilePath = zimFilePath; - - /* Remove potential trailing zimaa */ - size_t found = tmpZimFilePath.rfind("zimaa"); - if (found != string::npos && - tmpZimFilePath.size() > 5 && - found == tmpZimFilePath.size() - 5) { - tmpZimFilePath.resize(tmpZimFilePath.size() - 2); - } - - this->zimFileHandler = new zim::File(tmpZimFilePath); - - if (this->zimFileHandler != NULL) { - this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A'); - this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A'); - this->currentArticleOffset = this->firstArticleOffset; - this->nsACount = this->zimFileHandler->getNamespaceCount('A'); - this->nsICount = this->zimFileHandler->getNamespaceCount('I'); - this->zimFilePath = zimFilePath; - } - - /* initialize random seed: */ - srand ( time(NULL) ); + /* Remove potential trailing zimaa */ + size_t found = tmpZimFilePath.rfind("zimaa"); + if (found != string::npos && tmpZimFilePath.size() > 5 + && found == tmpZimFilePath.size() - 5) { + tmpZimFilePath.resize(tmpZimFilePath.size() - 2); } - /* Destructor */ - Reader::~Reader() { - if (this->zimFileHandler != NULL) { - delete this->zimFileHandler; - } - } + this->zimFileHandler = new zim::File(tmpZimFilePath); - zim::File* Reader::getZimFileHandler() const { - return this->zimFileHandler; - } - - /* Reset the cursor for GetNextArticle() */ - void Reader::reset() { + if (this->zimFileHandler != NULL) { + this->firstArticleOffset + = this->zimFileHandler->getNamespaceBeginOffset('A'); + this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A'); this->currentArticleOffset = this->firstArticleOffset; + this->nsACount = this->zimFileHandler->getNamespaceCount('A'); + this->nsICount = this->zimFileHandler->getNamespaceCount('I'); + this->zimFilePath = zimFilePath; } - std::map Reader::parseCounterMetadata() const { - std::map counters; - string mimeType, item, counterString; - unsigned int counter; + /* initialize random seed: */ + srand(time(NULL)); +} - zim::Article article = this->zimFileHandler->getArticle('M',"Counter"); +/* Destructor */ +Reader::~Reader() +{ + if (this->zimFileHandler != NULL) { + delete this->zimFileHandler; + } +} - if ( article.good() ) { - stringstream ssContent(article.getData()); +zim::File* Reader::getZimFileHandler() const +{ + return this->zimFileHandler; +} +/* Reset the cursor for GetNextArticle() */ +void Reader::reset() +{ + this->currentArticleOffset = this->firstArticleOffset; +} +std::map Reader::parseCounterMetadata() const +{ + std::map counters; + string mimeType, item, counterString; + unsigned int counter; - while(getline(ssContent, item, ';')) { - stringstream ssItem(item); - getline(ssItem, mimeType, '='); - getline(ssItem, counterString, '='); - if (!counterString.empty() && !mimeType.empty()) { - sscanf(counterString.c_str(), "%u", &counter); - counters.insert(pair(mimeType, counter)); - } + zim::Article article = this->zimFileHandler->getArticle('M', "Counter"); + + if (article.good()) { + stringstream ssContent(article.getData()); + + while (getline(ssContent, item, ';')) { + stringstream ssItem(item); + getline(ssItem, mimeType, '='); + getline(ssItem, counterString, '='); + if (!counterString.empty() && !mimeType.empty()) { + sscanf(counterString.c_str(), "%u", &counter); + counters.insert(pair(mimeType, counter)); } } - - return counters; } - /* Get the count of articles which can be indexed/displayed */ - unsigned int Reader::getArticleCount() const { - std::map counterMap = this->parseCounterMetadata(); - unsigned int counter = 0; + return counters; +} - if (counterMap.empty()) { - counter = this->nsACount; - } else { - auto it = counterMap.find("text/html"); - if (it != counterMap.end()) - counter = it->second; +/* Get the count of articles which can be indexed/displayed */ +unsigned int Reader::getArticleCount() const +{ + std::map counterMap + = this->parseCounterMetadata(); + unsigned int counter = 0; + + if (counterMap.empty()) { + counter = this->nsACount; + } else { + auto it = counterMap.find("text/html"); + if (it != counterMap.end()) { + counter = it->second; + } + } + + return counter; +} + +/* Get the count of medias content in the ZIM file */ +unsigned int Reader::getMediaCount() const +{ + std::map counterMap + = this->parseCounterMetadata(); + unsigned int counter = 0; + + if (counterMap.empty()) { + counter = this->nsICount; + } else { + auto it = counterMap.find("image/jpeg"); + if (it != counterMap.end()) { + counter += it->second; } - return counter; - } - - /* Get the count of medias content in the ZIM file */ - unsigned int Reader::getMediaCount() const { - std::map counterMap = this->parseCounterMetadata(); - unsigned int counter = 0; - - if (counterMap.empty()) - counter = this->nsICount; - else { - auto it = counterMap.find("image/jpeg"); - if (it != counterMap.end()) - counter += it->second; - - it = counterMap.find("image/gif"); - if (it != counterMap.end()) - counter += it->second; - - it = counterMap.find("image/png"); - if (it != counterMap.end()) - counter += it->second; + it = counterMap.find("image/gif"); + if (it != counterMap.end()) { + counter += it->second; } - return counter; - } - - /* Get the total of all items of a ZIM file, redirects included */ - unsigned int Reader::getGlobalCount() const { - return this->zimFileHandler->getCountArticles(); - } - - /* Return the UID of the ZIM file */ - string Reader::getId() const { - std::ostringstream s; - s << this->zimFileHandler->getFileheader().getUuid(); - return s.str(); - } - - /* Return a page url from a title */ - bool Reader::getPageUrlFromTitle(const string &title, string &url) const { - /* Extract the content from the zim file */ - zim::Article article = this->zimFileHandler->getArticleByTitle('A', title); - - if ( ! article.good() ) - { - return false; + it = counterMap.find("image/png"); + if (it != counterMap.end()) { + counter += it->second; } + } + return counter; +} - unsigned int loopCounter = 0; - while (article.isRedirect() && loopCounter++<42) { - article = article.getRedirectArticle(); - } +/* Get the total of all items of a ZIM file, redirects included */ +unsigned int Reader::getGlobalCount() const +{ + return this->zimFileHandler->getCountArticles(); +} +/* Return the UID of the ZIM file */ +string Reader::getId() const +{ + std::ostringstream s; + s << this->zimFileHandler->getFileheader().getUuid(); + return s.str(); +} + +/* Return a page url from a title */ +bool Reader::getPageUrlFromTitle(const string& title, string& url) const +{ + /* Extract the content from the zim file */ + zim::Article article = this->zimFileHandler->getArticleByTitle('A', title); + + if (!article.good()) { + return false; + } + + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++ < 42) { + article = article.getRedirectArticle(); + } + + url = article.getLongUrl(); + return true; +} + +/* Return an URL from a title */ +string Reader::getRandomPageUrl() const +{ + zim::Article article; + zim::size_type idx; + std::string mainPageUrl = this->getMainPageUrl(); + + do { + idx = this->firstArticleOffset + + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) + * this->nsACount); + article = zimFileHandler->getArticle(idx); + } while (article.getLongUrl() == mainPageUrl); + + return article.getLongUrl(); +} + +/* Return the welcome page URL */ +string Reader::getMainPageUrl() const +{ + string url = ""; + + if (this->zimFileHandler->getFileheader().hasMainPage()) { + zim::Article article = zimFileHandler->getArticle( + this->zimFileHandler->getFileheader().getMainPage()); url = article.getLongUrl(); - return true; - } - /* Return an URL from a title*/ - string Reader::getRandomPageUrl() const { - zim::Article article; - zim::size_type idx; - std::string mainPageUrl = this->getMainPageUrl(); - - do { - idx = this->firstArticleOffset + - (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); - article = zimFileHandler->getArticle(idx); - } while (article.getLongUrl() == mainPageUrl); - - return article.getLongUrl(); - } - - /* Return the welcome page URL */ - string Reader::getMainPageUrl() const { - string url = ""; - - if (this->zimFileHandler->getFileheader().hasMainPage()) { - zim::Article article = zimFileHandler->getArticle(this->zimFileHandler->getFileheader().getMainPage()); - url = article.getLongUrl(); - - if (url.empty()) { - url = getFirstPageUrl(); - } - } else { - url = getFirstPageUrl(); + if (url.empty()) { + url = getFirstPageUrl(); } - - return url; + } else { + url = getFirstPageUrl(); } - bool Reader::getFavicon(string &content, string &mimeType) const { - unsigned int contentLength = 0; + return url; +} - this->getContentByUrl( "/-/favicon.png", content, - contentLength, mimeType); +bool Reader::getFavicon(string& content, string& mimeType) const +{ + unsigned int contentLength = 0; + + this->getContentByUrl("/-/favicon.png", content, contentLength, mimeType); + + if (content.empty()) { + this->getContentByUrl("/I/favicon.png", content, contentLength, mimeType); if (content.empty()) { - this->getContentByUrl( "/I/favicon.png", content, - contentLength, mimeType); - + this->getContentByUrl("/I/favicon", content, contentLength, mimeType); if (content.empty()) { - this->getContentByUrl( "/I/favicon", content, - contentLength, mimeType); - - if (content.empty()) { - this->getContentByUrl( "/-/favicon", content, - contentLength, mimeType); - } + this->getContentByUrl("/-/favicon", content, contentLength, mimeType); } } - - return content.empty() ? false : true; } - string Reader::getZimFilePath() const { - return this->zimFilePath; + return content.empty() ? false : true; +} + +string Reader::getZimFilePath() const +{ + return this->zimFilePath; +} +/* Return a metatag value */ +bool Reader::getMetatag(const string& name, string& value) const +{ + unsigned int contentLength = 0; + string contentType = ""; + + return this->getContentByUrl("/M/" + name, value, contentLength, contentType); +} + +string Reader::getTitle() const +{ + string value; + this->getMetatag("Title", value); + if (value.empty()) { + value = getLastPathElement(zimFileHandler->getFilename()); + std::replace(value.begin(), value.end(), '_', ' '); + size_t pos = value.find(".zim"); + value = value.substr(0, pos); + } + return value; +} + +string Reader::getName() const +{ + string value; + this->getMetatag("Name", value); + return value; +} + +string Reader::getTags() const +{ + string value; + this->getMetatag("Tags", value); + return value; +} + +string Reader::getDescription() const +{ + string value; + this->getMetatag("Description", value); + + /* Mediawiki Collection tends to use the "Subtitle" name */ + if (value.empty()) { + this->getMetatag("Subtitle", value); } - /* Return a metatag value */ - bool Reader::getMetatag(const string &name, string &value) const { - unsigned int contentLength = 0; - string contentType = ""; + return value; +} - return this->getContentByUrl( "/M/" + name, value, - contentLength, contentType); +string Reader::getLanguage() const +{ + string value; + this->getMetatag("Language", value); + return value; +} + +string Reader::getDate() const +{ + string value; + this->getMetatag("Date", value); + return value; +} + +string Reader::getCreator() const +{ + string value; + this->getMetatag("Creator", value); + return value; +} + +string Reader::getPublisher() const +{ + string value; + this->getMetatag("Publisher", value); + return value; +} + +string Reader::getOrigId() const +{ + string value; + this->getMetatag("startfileuid", value); + if (value.empty()) { + return ""; } - - string Reader::getTitle() const { - string value; - this->getMetatag("Title", value); - if (value.empty()) { - value = getLastPathElement(zimFileHandler->getFilename()); - std::replace(value.begin(), value.end(), '_', ' '); - size_t pos = value.find(".zim"); - value = value.substr(0, pos); - } - return value; - } - - string Reader::getName() const { - string value; - this->getMetatag("Name", value); - return value; - } - - string Reader::getTags() const { - string value; - this->getMetatag("Tags", value); - return value; - } - - string Reader::getDescription() const{ - string value; - this->getMetatag("Description", value); - - /* Mediawiki Collection tends to use the "Subtitle" name */ - if (value.empty()) { - this->getMetatag("Subtitle", value); - } - - return value; - } - - string Reader::getLanguage() const { - string value; - this->getMetatag("Language", value); - return value; - } - - string Reader::getDate() const { - string value; - this->getMetatag("Date", value); - return value; - } - - string Reader::getCreator() const { - string value; - this->getMetatag("Creator", value); - return value; - } - - string Reader::getPublisher() const { - string value; - this->getMetatag("Publisher", value); - return value; - } - - string Reader::getOrigId() const { - string value; - this->getMetatag("startfileuid", value); - if(value.empty()) - return ""; - std::string id=value; - std::string origID; - std::string temp=""; - unsigned int k=0; - char tempArray[16]=""; - for(unsigned int i=0; igetNamespaceBeginOffset('A'); - zim::Article article = zimFileHandler->getArticle(firstPageOffset); - return article.getLongUrl(); - } - - bool Reader::parseUrl(const string &url, char *ns, string &title) const { - /* Offset to visit the url */ - unsigned int urlLength = url.size(); - unsigned int offset = 0; - - /* Ignore the '/' */ - while ((offset < urlLength) && (url[offset] == '/')) offset++; - - /* Get namespace */ - while ((offset < urlLength) && (url[offset] != '/')) { - *ns= url[offset]; - offset++; - } - - /* Ignore the '/' */ - while ((offset < urlLength) && (url[offset] == '/')) offset++; - - /* Get content title */ - unsigned int titleOffset = offset; - while (offset < urlLength) { - offset++; - } - - /* unescape title */ - title = url.substr(titleOffset, offset - titleOffset); - - return true; - } - - /* Return article by url */ - bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const { - if (this->zimFileHandler == NULL) { - return false; - } - - /* Parse the url */ - char ns = 0; - string urlStr; - this->parseUrl(url, &ns, urlStr); - - /* Main page */ - if (urlStr.empty() && ns == 0) { - this->parseUrl(this->getMainPageUrl(), &ns, urlStr); - } - - /* Extract the content from the zim file */ - article = zimFileHandler->getArticle(ns, urlStr); - return article.good(); - } - - /* Return the mimeType without the content */ - bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) const { - if (this->zimFileHandler == NULL) { - return false; - } - - zim::Article article; - if (this->getArticleObjectByDecodedUrl(url, article)) { - try { - mimeType = article.getMimeType(); - } catch (exception &e) { - cerr << "Unable to get the mimetype for " << url << ":" << e.what() << endl; - mimeType = "application/octet-stream"; - } - return true; + std::string id = value; + std::string origID; + std::string temp = ""; + unsigned int k = 0; + char tempArray[16] = ""; + for (unsigned int i = 0; i < id.size(); i++) { + if (id[i] == '\n') { + tempArray[k] = atoi(temp.c_str()); + temp = ""; + k++; } else { - mimeType = ""; - return false; + temp += id[i]; } } + origID = hexUUID(tempArray); + return origID; +} - /* Get a content from a zim file */ - bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { - return this->getContentByEncodedUrl(url, content, contentLength, contentType); +/* Return the first page URL */ +string Reader::getFirstPageUrl() const +{ + zim::size_type firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); + zim::Article article = zimFileHandler->getArticle(firstPageOffset); + return article.getLongUrl(); +} + +bool Reader::parseUrl(const string& url, char* ns, string& title) const +{ + /* Offset to visit the url */ + unsigned int urlLength = url.size(); + unsigned int offset = 0; + + /* Ignore the '/' */ + while ((offset < urlLength) && (url[offset] == '/')) { + offset++; } - bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { - return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, baseUrl); + /* Get namespace */ + while ((offset < urlLength) && (url[offset] != '/')) { + *ns = url[offset]; + offset++; } - bool Reader::getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { - std::string stubRedirectUrl; - return this->getContentByEncodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); + /* Ignore the '/' */ + while ((offset < urlLength) && (url[offset] == '/')) { + offset++; } - bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const { - std::string stubRedirectUrl; - return this->getContentByDecodedUrl(kiwix::urlDecode(url), content, contentLength, contentType, stubRedirectUrl); + /* Get content title */ + unsigned int titleOffset = offset; + while (offset < urlLength) { + offset++; } - bool Reader::getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const { - content=""; - contentType=""; - contentLength = 0; + /* unescape title */ + title = url.substr(titleOffset, offset - titleOffset); - zim::Article article; - if ( ! this->getArticleObjectByDecodedUrl(url, article)) { - return false; - } - - /* If redirect */ - unsigned int loopCounter = 0; - while (article.isRedirect() && loopCounter++<42) { - article = article.getRedirectArticle(); - } - - if (loopCounter < 42) { - /* Compute base url (might be different from the url if redirects */ - baseUrl = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl(); - - /* Get the content mime-type */ - try { - contentType = string(article.getMimeType().data(), article.getMimeType().size()); - } catch (exception &e) { - cerr << "Unable to get the mimetype for "<< baseUrl<< ":" << e.what() << endl; - contentType = "application/octet-stream"; - } - - /* Get the data */ - content = string(article.getData().data(), article.getArticleSize()); - } - - /* Try to set a stub HTML header/footer if necesssary */ - if (contentType.find("text/html") != string::npos && - content.find("" + content + ""; - } - - /* Get the data length */ - contentLength = article.getArticleSize(); - - return true; - } - - /* Check if an article exists */ - bool Reader::urlExists(const string &url) const { - char ns = 0; - string titleStr; - this->parseUrl(url, &ns, titleStr); - titleStr = "/" + titleStr; - zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr); - return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr; - } - - /* Does the ZIM file has a fulltext index */ - bool Reader::hasFulltextIndex() const { - return this->urlExists("/Z/fulltextIndex/xapian"); - } - - /* Search titles by prefix */ - bool Reader::searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset) { - bool retVal = false; - zim::File::const_iterator articleItr; - - /* Reset the suggestions otherwise check if the suggestions number is less than the suggestionsCount */ - if (reset) { - this->suggestions.clear(); - this->suggestionsOffset = this->suggestions.begin(); - } else { - if (this->suggestions.size() > suggestionsCount) { - return false; - } - } - - /* Return if no prefix */ - if (prefix.size() == 0) { - return false; - } - - for (articleItr = zimFileHandler->findByTitle('A', prefix); - articleItr != zimFileHandler->end() && - articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && - this->suggestions.size() < suggestionsCount ; - ++articleItr) { - - /* Extract the interesting part of article title & url */ - std::string normalizedArticleTitle = kiwix::normalize(articleItr->getTitle()); - std::string articleFinalUrl = "/A/"+articleItr->getUrl(); - if (articleItr->isRedirect()) { - zim::Article article = *articleItr; - unsigned int loopCounter = 0; - while (article.isRedirect() && loopCounter++<42) { - article = article.getRedirectArticle(); - } - articleFinalUrl = "/A/"+article.getUrl(); - } - - /* Go through all already found suggestions and skip if this - article is already in the suggestions list (with an other - title) */ - bool insert = true; - std::vector< std::vector >::iterator suggestionItr; - for (suggestionItr = this->suggestions.begin(); suggestionItr != this->suggestions.end(); suggestionItr++) { - int result = normalizedArticleTitle.compare((*suggestionItr)[2]); - if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) { - insert = false; - break; - } else if (result < 0) { - break; - } - } - - /* Insert if possible */ - if (insert) { - std::vector suggestion; - suggestion.push_back(articleItr->getTitle()); - suggestion.push_back(articleFinalUrl); - suggestion.push_back(normalizedArticleTitle); - this->suggestions.insert(suggestionItr, suggestion); - } - - /* Suggestions where found */ - retVal = true; - } - - /* Set the cursor to the begining */ - this->suggestionsOffset = this->suggestions.begin(); - - return retVal; - } - - std::vector Reader::getTitleVariants(const std::string &title) const { - std::vector variants; - variants.push_back(title); - variants.push_back(kiwix::ucFirst(title)); - variants.push_back(kiwix::lcFirst(title)); - variants.push_back(kiwix::toTitle(title)); - return variants; - } - - /* Try also a few variations of the prefix to have better results */ - bool Reader::searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount) { - std::vector variants = this->getTitleVariants(prefix); - bool retVal; - - this->suggestions.clear(); - this->suggestionsOffset = this->suggestions.begin(); - for (std::vector::iterator variantsItr = variants.begin(); - variantsItr != variants.end(); - variantsItr++) { - retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false) || retVal; - } - - return retVal; - } - - /* Get next suggestion */ - bool Reader::getNextSuggestion(string &title) { - if (this->suggestionsOffset != this->suggestions.end()) { - /* title */ - title = (*(this->suggestionsOffset))[0]; - - /* increment the cursor for the next call */ - this->suggestionsOffset++; - - return true; - } + return true; +} +/* Return article by url */ +bool Reader::getArticleObjectByDecodedUrl(const string& url, + zim::Article& article) const +{ + if (this->zimFileHandler == NULL) { return false; } - bool Reader::getNextSuggestion(string &title, string &url) { - if (this->suggestionsOffset != this->suggestions.end()) { - /* title */ - title = (*(this->suggestionsOffset))[0]; - url = (*(this->suggestionsOffset))[1]; + /* Parse the url */ + char ns = 0; + string urlStr; + this->parseUrl(url, &ns, urlStr); - /* increment the cursor for the next call */ - this->suggestionsOffset++; + /* Main page */ + if (urlStr.empty() && ns == 0) { + this->parseUrl(this->getMainPageUrl(), &ns, urlStr); + } - return true; - } + /* Extract the content from the zim file */ + article = zimFileHandler->getArticle(ns, urlStr); + return article.good(); +} +/* Return the mimeType without the content */ +bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const +{ + if (this->zimFileHandler == NULL) { return false; } - /* Check if the file has as checksum */ - bool Reader::canCheckIntegrity() const { - return this->zimFileHandler->getChecksum() != ""; - } - - /* Return true if corrupted, false otherwise */ - bool Reader::isCorrupted() const { + zim::Article article; + if (this->getArticleObjectByDecodedUrl(url, article)) { try { - if (this->zimFileHandler->verify() == true) - return false; - } catch (exception &e) { - cerr << e.what() << endl; - return true; + mimeType = article.getMimeType(); + } catch (exception& e) { + cerr << "Unable to get the mimetype for " << url << ":" << e.what() + << endl; + mimeType = "application/octet-stream"; } - return true; - } - - /* Return the file size, works also for splitted files */ - unsigned int Reader::getFileSize() const { - zim::File *file = this->getZimFileHandler(); - zim::offset_type size = 0; - - if (file != NULL) { - size = file->getFilesize(); - } - - return (size / 1024); + } else { + mimeType = ""; + return false; } } + +/* Get a content from a zim file */ +bool Reader::getContentByUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType) const +{ + return this->getContentByEncodedUrl(url, content, contentLength, contentType); +} + +bool Reader::getContentByEncodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType, + string& baseUrl) const +{ + return this->getContentByDecodedUrl( + kiwix::urlDecode(url), content, contentLength, contentType, baseUrl); +} + +bool Reader::getContentByEncodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType) const +{ + std::string stubRedirectUrl; + return this->getContentByEncodedUrl(kiwix::urlDecode(url), + content, + contentLength, + contentType, + stubRedirectUrl); +} + +bool Reader::getContentByDecodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType) const +{ + std::string stubRedirectUrl; + return this->getContentByDecodedUrl(kiwix::urlDecode(url), + content, + contentLength, + contentType, + stubRedirectUrl); +} + +bool Reader::getContentByDecodedUrl(const string& url, + string& content, + unsigned int& contentLength, + string& contentType, + string& baseUrl) const +{ + content = ""; + contentType = ""; + contentLength = 0; + + zim::Article article; + if (!this->getArticleObjectByDecodedUrl(url, article)) { + return false; + } + + /* If redirect */ + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++ < 42) { + article = article.getRedirectArticle(); + } + + if (loopCounter < 42) { + /* Compute base url (might be different from the url if redirects */ + baseUrl + = "/" + std::string(1, article.getNamespace()) + "/" + article.getUrl(); + + /* Get the content mime-type */ + try { + contentType + = string(article.getMimeType().data(), article.getMimeType().size()); + } catch (exception& e) { + cerr << "Unable to get the mimetype for " << baseUrl << ":" << e.what() + << endl; + contentType = "application/octet-stream"; + } + + /* Get the data */ + content = string(article.getData().data(), article.getArticleSize()); + } + + /* Try to set a stub HTML header/footer if necesssary */ + if (contentType.find("text/html") != string::npos + && content.find("" + + content + ""; + } + + /* Get the data length */ + contentLength = article.getArticleSize(); + + return true; +} + +/* Check if an article exists */ +bool Reader::urlExists(const string& url) const +{ + char ns = 0; + string titleStr; + this->parseUrl(url, &ns, titleStr); + titleStr = "/" + titleStr; + zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr); + return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr; +} + +/* Does the ZIM file has a fulltext index */ +bool Reader::hasFulltextIndex() const +{ + return this->urlExists("/Z/fulltextIndex/xapian"); +} + +/* Search titles by prefix */ +bool Reader::searchSuggestions(const string& prefix, + unsigned int suggestionsCount, + const bool reset) +{ + bool retVal = false; + zim::File::const_iterator articleItr; + + /* Reset the suggestions otherwise check if the suggestions number is less + * than the suggestionsCount */ + if (reset) { + this->suggestions.clear(); + this->suggestionsOffset = this->suggestions.begin(); + } else { + if (this->suggestions.size() > suggestionsCount) { + return false; + } + } + + /* Return if no prefix */ + if (prefix.size() == 0) { + return false; + } + + for (articleItr = zimFileHandler->findByTitle('A', prefix); + articleItr != zimFileHandler->end() + && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 + && this->suggestions.size() < suggestionsCount; + ++articleItr) { + /* Extract the interesting part of article title & url */ + std::string normalizedArticleTitle + = kiwix::normalize(articleItr->getTitle()); + std::string articleFinalUrl = "/A/" + articleItr->getUrl(); + if (articleItr->isRedirect()) { + zim::Article article = *articleItr; + unsigned int loopCounter = 0; + while (article.isRedirect() && loopCounter++ < 42) { + article = article.getRedirectArticle(); + } + articleFinalUrl = "/A/" + article.getUrl(); + } + + /* Go through all already found suggestions and skip if this + article is already in the suggestions list (with an other + title) */ + bool insert = true; + std::vector>::iterator suggestionItr; + for (suggestionItr = this->suggestions.begin(); + suggestionItr != this->suggestions.end(); + suggestionItr++) { + int result = normalizedArticleTitle.compare((*suggestionItr)[2]); + if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) { + insert = false; + break; + } else if (result < 0) { + break; + } + } + + /* Insert if possible */ + if (insert) { + std::vector suggestion; + suggestion.push_back(articleItr->getTitle()); + suggestion.push_back(articleFinalUrl); + suggestion.push_back(normalizedArticleTitle); + this->suggestions.insert(suggestionItr, suggestion); + } + + /* Suggestions where found */ + retVal = true; + } + + /* Set the cursor to the begining */ + this->suggestionsOffset = this->suggestions.begin(); + + return retVal; +} + +std::vector Reader::getTitleVariants( + const std::string& title) const +{ + std::vector variants; + variants.push_back(title); + variants.push_back(kiwix::ucFirst(title)); + variants.push_back(kiwix::lcFirst(title)); + variants.push_back(kiwix::toTitle(title)); + return variants; +} + +/* Try also a few variations of the prefix to have better results */ +bool Reader::searchSuggestionsSmart(const string& prefix, + unsigned int suggestionsCount) +{ + std::vector variants = this->getTitleVariants(prefix); + bool retVal; + + this->suggestions.clear(); + this->suggestionsOffset = this->suggestions.begin(); + for (std::vector::iterator variantsItr = variants.begin(); + variantsItr != variants.end(); + variantsItr++) { + retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false) + || retVal; + } + + return retVal; +} + +/* Get next suggestion */ +bool Reader::getNextSuggestion(string& title) +{ + if (this->suggestionsOffset != this->suggestions.end()) { + /* title */ + title = (*(this->suggestionsOffset))[0]; + + /* increment the cursor for the next call */ + this->suggestionsOffset++; + + return true; + } + + return false; +} + +bool Reader::getNextSuggestion(string& title, string& url) +{ + if (this->suggestionsOffset != this->suggestions.end()) { + /* title */ + title = (*(this->suggestionsOffset))[0]; + url = (*(this->suggestionsOffset))[1]; + + /* increment the cursor for the next call */ + this->suggestionsOffset++; + + return true; + } + + return false; +} + +/* Check if the file has as checksum */ +bool Reader::canCheckIntegrity() const +{ + return this->zimFileHandler->getChecksum() != ""; +} + +/* Return true if corrupted, false otherwise */ +bool Reader::isCorrupted() const +{ + try { + if (this->zimFileHandler->verify() == true) { + return false; + } + } catch (exception& e) { + cerr << e.what() << endl; + return true; + } + + return true; +} + +/* Return the file size, works also for splitted files */ +unsigned int Reader::getFileSize() const +{ + zim::File* file = this->getZimFileHandler(); + zim::offset_type size = 0; + + if (file != NULL) { + size = file->getFilesize(); + } + + return (size / 1024); +} +} diff --git a/src/searcher.cpp b/src/searcher.cpp index 074620840..5c4074f34 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -18,9 +18,9 @@ */ #include "searcher.h" -#include "xapianSearcher.h" -#include "reader.h" #include "kiwixlib-resources.h" +#include "reader.h" +#include "xapianSearcher.h" #include @@ -33,268 +33,287 @@ using namespace CTPP; #endif +namespace kiwix +{ +class _Result : public Result +{ + public: + _Result(Searcher* searcher, zim::Search::iterator& iterator); + virtual ~_Result(){}; -namespace kiwix { + virtual std::string get_url(); + virtual std::string get_title(); + virtual int get_score(); + virtual std::string get_snippet(); + virtual int get_wordCount(); + virtual int get_size(); - class _Result : public Result { - public: - _Result(Searcher* searcher, zim::Search::iterator& iterator); - virtual ~_Result() {}; + private: + Searcher* searcher; + zim::Search::iterator iterator; +}; - virtual std::string get_url(); - virtual std::string get_title(); - virtual int get_score(); - virtual std::string get_snippet(); - virtual int get_wordCount(); - virtual int get_size(); +struct SearcherInternal { + const zim::Search* _search; + XapianSearcher* _xapianSearcher; + zim::Search::iterator current_iterator; - private: - Searcher* searcher; - zim::Search::iterator iterator; - }; - - struct SearcherInternal { - const zim::Search *_search; - XapianSearcher *_xapianSearcher; - zim::Search::iterator current_iterator; - - - SearcherInternal() : - _search(NULL), - _xapianSearcher(NULL) - {} - ~SearcherInternal() { - if ( _search != NULL ) - delete _search; - if ( _xapianSearcher != NULL ) - delete _xapianSearcher; - } - - }; - - /* Constructor */ - Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) : - reader(reader), - internal(new SearcherInternal()), - searchPattern(""), - protocolPrefix("zim://"), - searchProtocolPrefix("search://?"), - resultCountPerPage(0), - estimatedResultCount(0), - resultStart(0), - resultEnd(0) + SearcherInternal() : _search(NULL), _xapianSearcher(NULL) {} + ~SearcherInternal() { - template_ct2 = RESOURCE::results_ct2; - loadICUExternalTables(); - if ( !reader || !reader->hasFulltextIndex() ) { - internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader); + if (_search != NULL) { + delete _search; + } + if (_xapianSearcher != NULL) { + delete _xapianSearcher; } } - - /* Destructor */ - Searcher::~Searcher() { - delete internal; +}; + +/* Constructor */ +Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader) + : reader(reader), + internal(new SearcherInternal()), + searchPattern(""), + protocolPrefix("zim://"), + searchProtocolPrefix("search://?"), + resultCountPerPage(0), + estimatedResultCount(0), + resultStart(0), + resultEnd(0) +{ + template_ct2 = RESOURCE::results_ct2; + loadICUExternalTables(); + if (!reader || !reader->hasFulltextIndex()) { + internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader); } - - /* Search strings in the database */ - void Searcher::search(std::string &search, unsigned int resultStart, - unsigned int resultEnd, const bool verbose) { - this->reset(); +} - if (verbose == true) { - cout << "Performing query `" << search << "'" << endl; - } +/* Destructor */ +Searcher::~Searcher() +{ + delete internal; +} +/* Search strings in the database */ +void Searcher::search(std::string& search, + unsigned int resultStart, + unsigned int resultEnd, + const bool verbose) +{ + this->reset(); - /* If resultEnd & resultStart inverted */ - if (resultStart > resultEnd) { - resultEnd += resultStart; - resultStart = resultEnd - resultStart; - resultEnd -= resultStart; - } - - /* Try to find results */ - if (resultStart != resultEnd) { - - /* Avoid big researches */ - this->resultCountPerPage = resultEnd - resultStart; - if (this->resultCountPerPage > 70) { - resultEnd = resultStart + 70; - this->resultCountPerPage = 70; - } - - /* Perform the search */ - this->searchPattern = search; - this->resultStart = resultStart; - this->resultEnd = resultEnd; - string unaccentedSearch = removeAccents(search); - if ( internal->_xapianSearcher ) { - internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose); - this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated(); - } else { - internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd); - internal->current_iterator = internal->_search->begin(); - this->estimatedResultCount = internal->_search->get_matches_estimated(); - } - } - - return; + if (verbose == true) { + cout << "Performing query `" << search << "'" << endl; } - void Searcher::restart_search() { - if ( internal->_xapianSearcher ) { - internal->_xapianSearcher->restart_search(); + /* If resultEnd & resultStart inverted */ + if (resultStart > resultEnd) { + resultEnd += resultStart; + resultStart = resultEnd - resultStart; + resultEnd -= resultStart; + } + + /* Try to find results */ + if (resultStart != resultEnd) { + /* Avoid big researches */ + this->resultCountPerPage = resultEnd - resultStart; + if (this->resultCountPerPage > 70) { + resultEnd = resultStart + 70; + this->resultCountPerPage = 70; + } + + /* Perform the search */ + this->searchPattern = search; + this->resultStart = resultStart; + this->resultEnd = resultEnd; + string unaccentedSearch = removeAccents(search); + if (internal->_xapianSearcher) { + internal->_xapianSearcher->searchInIndex( + unaccentedSearch, resultStart, resultEnd, verbose); + this->estimatedResultCount + = internal->_xapianSearcher->results.get_matches_estimated(); } else { + internal->_search = this->reader->getZimFileHandler()->search( + unaccentedSearch, resultStart, resultEnd); internal->current_iterator = internal->_search->begin(); + this->estimatedResultCount = internal->_search->get_matches_estimated(); } } - Result* Searcher::getNextResult() { - if ( internal->_xapianSearcher ) { - return internal->_xapianSearcher->getNextResult(); - } else if (internal->current_iterator != internal->_search->end()) { - Result* result = new _Result(this, internal->current_iterator); - internal->current_iterator++; - return result; - } - return NULL; + return; +} + +void Searcher::restart_search() +{ + if (internal->_xapianSearcher) { + internal->_xapianSearcher->restart_search(); + } else { + internal->current_iterator = internal->_search->begin(); } +} - - /* Reset the results */ - void Searcher::reset() { - this->estimatedResultCount = 0; - this->searchPattern = ""; - return; +Result* Searcher::getNextResult() +{ + if (internal->_xapianSearcher) { + return internal->_xapianSearcher->getNextResult(); + } else if (internal->current_iterator != internal->_search->end()) { + Result* result = new _Result(this, internal->current_iterator); + internal->current_iterator++; + return result; } + return NULL; +} - /* Return the result count estimation */ - unsigned int Searcher::getEstimatedResultCount() { - return this->estimatedResultCount; - } +/* Reset the results */ +void Searcher::reset() +{ + this->estimatedResultCount = 0; + this->searchPattern = ""; + return; +} - bool Searcher::setProtocolPrefix(const std::string prefix) { - this->protocolPrefix = prefix; - return true; - } +/* Return the result count estimation */ +unsigned int Searcher::getEstimatedResultCount() +{ + return this->estimatedResultCount; +} - bool Searcher::setSearchProtocolPrefix(const std::string prefix) { - this->searchProtocolPrefix = prefix; - return true; - } +bool Searcher::setProtocolPrefix(const std::string prefix) +{ + this->protocolPrefix = prefix; + return true; +} - void Searcher::setContentHumanReadableId(const string &contentHumanReadableId) { - this->contentHumanReadableId = contentHumanReadableId; - } +bool Searcher::setSearchProtocolPrefix(const std::string prefix) +{ + this->searchProtocolPrefix = prefix; + return true; +} - _Result::_Result(Searcher* searcher, zim::Search::iterator& iterator): - searcher(searcher), - iterator(iterator) - { - } +void Searcher::setContentHumanReadableId(const string& contentHumanReadableId) +{ + this->contentHumanReadableId = contentHumanReadableId; +} - std::string _Result::get_url() { - return iterator.get_url(); - } - - std::string _Result::get_title() { - return iterator.get_title(); - } - - int _Result::get_score() { - return iterator.get_score(); - } - - std::string _Result::get_snippet() { - return iterator.get_snippet(); - } - - int _Result::get_size() { - return iterator.get_size(); - } - - int _Result::get_wordCount() { - return iterator.get_wordCount(); - } +_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator) + : searcher(searcher), iterator(iterator) +{ +} +std::string _Result::get_url() +{ + return iterator.get_url(); +} +std::string _Result::get_title() +{ + return iterator.get_title(); +} +int _Result::get_score() +{ + return iterator.get_score(); +} +std::string _Result::get_snippet() +{ + return iterator.get_snippet(); +} +int _Result::get_size() +{ + return iterator.get_size(); +} +int _Result::get_wordCount() +{ + return iterator.get_wordCount(); +} #ifdef ENABLE_CTPP2 - - string Searcher::getHtml() { - SimpleVM oSimpleVM; +string Searcher::getHtml() +{ + SimpleVM oSimpleVM; - // Fill data - CDT oData; - CDT resultsCDT(CDT::ARRAY_VAL); + // Fill data + CDT oData; + CDT resultsCDT(CDT::ARRAY_VAL); - this->restart_search(); - Result * p_result = NULL; - while ( (p_result = this->getNextResult()) ) { - CDT result; - result["title"] = p_result->get_title(); - result["url"] = p_result->get_url(); - result["snippet"] = p_result->get_snippet(); + this->restart_search(); + Result* p_result = NULL; + while ((p_result = this->getNextResult())) { + CDT result; + result["title"] = p_result->get_title(); + result["url"] = p_result->get_url(); + result["snippet"] = p_result->get_snippet(); - if (p_result->get_size() >= 0) - result["size"] = kiwix::beautifyInteger(p_result->get_size()); - - if (p_result->get_wordCount() >= 0) - result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount()); - - resultsCDT.PushBack(result); - delete p_result; + if (p_result->get_size() >= 0) { + result["size"] = kiwix::beautifyInteger(p_result->get_size()); } - this->restart_search(); - oData["results"] = resultsCDT; - // pages - CDT pagesCDT(CDT::ARRAY_VAL); - - unsigned int pageStart = this->resultStart / this->resultCountPerPage >= 5 ? this->resultStart / this->resultCountPerPage - 4 : 0; - unsigned int pageCount = this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart; - - if (pageCount > 10) - pageCount = 10; - else if (pageCount == 1) - pageCount = 0; - - for (unsigned int i=pageStart; iresultCountPerPage; - page["end"] = (i+1) * this->resultCountPerPage; - - if (i * this->resultCountPerPage == this->resultStart) - page["selected"] = true; - - pagesCDT.PushBack(page); + if (p_result->get_wordCount() >= 0) { + result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount()); } - oData["pages"] = pagesCDT; - - oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount); - oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern); - oData["searchPatternEncoded"] = urlEncode(this->searchPattern); - oData["resultStart"] = this->resultStart + 1; - oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd); - oData["resultRange"] = this->resultCountPerPage; - oData["resultLastPageStart"] = this->estimatedResultCount > this->resultCountPerPage ? this->estimatedResultCount - this->resultCountPerPage : 0; - oData["protocolPrefix"] = this->protocolPrefix; - oData["searchProtocolPrefix"] = this->searchProtocolPrefix; - oData["contentId"] = this->contentHumanReadableId; - - VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size()); - - FileLogger oLogger(stderr); - - // DEBUG only (write output to stdout) - // oSimpleVM.Run(oData, oLoader, stdout, oLogger); - - std::string sResult; - oSimpleVM.Run(oData, oLoader, sResult, oLogger); - - return sResult; + resultsCDT.PushBack(result); + delete p_result; } + this->restart_search(); + oData["results"] = resultsCDT; + + // pages + CDT pagesCDT(CDT::ARRAY_VAL); + + unsigned int pageStart + = this->resultStart / this->resultCountPerPage >= 5 + ? this->resultStart / this->resultCountPerPage - 4 + : 0; + unsigned int pageCount + = this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart; + + if (pageCount > 10) { + pageCount = 10; + } else if (pageCount == 1) { + pageCount = 0; + } + + for (unsigned int i = pageStart; i < pageStart + pageCount; i++) { + CDT page; + page["label"] = i + 1; + page["start"] = i * this->resultCountPerPage; + page["end"] = (i + 1) * this->resultCountPerPage; + + if (i * this->resultCountPerPage == this->resultStart) { + page["selected"] = true; + } + + pagesCDT.PushBack(page); + } + oData["pages"] = pagesCDT; + + oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount); + oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern); + oData["searchPatternEncoded"] = urlEncode(this->searchPattern); + oData["resultStart"] = this->resultStart + 1; + oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount + ? this->estimatedResultCount + : this->resultEnd); + oData["resultRange"] = this->resultCountPerPage; + oData["resultLastPageStart"] + = this->estimatedResultCount > this->resultCountPerPage + ? this->estimatedResultCount - this->resultCountPerPage + : 0; + oData["protocolPrefix"] = this->protocolPrefix; + oData["searchProtocolPrefix"] = this->searchProtocolPrefix; + oData["contentId"] = this->contentHumanReadableId; + + VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size()); + + FileLogger oLogger(stderr); + + // DEBUG only (write output to stdout) + // oSimpleVM.Run(oData, oLoader, stdout, oLogger); + + std::string sResult; + oSimpleVM.Run(oData, oLoader, sResult, oLogger); + + return sResult; +} #endif - } diff --git a/src/xapianSearcher.cpp b/src/xapianSearcher.cpp index 1b9a6298f..aa0223d99 100644 --- a/src/xapianSearcher.cpp +++ b/src/xapianSearcher.cpp @@ -18,196 +18,204 @@ */ #include "xapianSearcher.h" -#include "xapian/myhtmlparse.h" -#include -#include +#include +#include +#include #include #include -#include -#include -#include +#include +#include +#include "xapian/myhtmlparse.h" #include -namespace kiwix { - -std::map read_valuesmap(const std::string &s) { - std::map result; - std::vector elems = split(s, ";"); - for(std::vector::iterator elem = elems.begin(); - elem != elems.end(); - elem++) - { - std::vector tmp_elems = split(*elem, ":"); - result.insert( std::pair(tmp_elems[0], atoi(tmp_elems[1].c_str())) ); - } - return result; +namespace kiwix +{ +std::map read_valuesmap(const std::string& s) +{ + std::map result; + std::vector elems = split(s, ";"); + for (std::vector::iterator elem = elems.begin(); + elem != elems.end(); + elem++) { + std::vector tmp_elems = split(*elem, ":"); + result.insert( + std::pair(tmp_elems[0], atoi(tmp_elems[1].c_str()))); + } + return result; } - /* Constructor */ - XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader) +/* Constructor */ +XapianSearcher::XapianSearcher(const string& xapianDirectoryPath, + Reader* reader) : reader(reader) - { - this->openIndex(xapianDirectoryPath); - } +{ + this->openIndex(xapianDirectoryPath); +} - /* Open Xapian readable database */ - void XapianSearcher::openIndex(const string &directoryPath) { - this->readableDatabase = Xapian::Database(directoryPath); - this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); - this->language = this->readableDatabase.get_metadata("language"); - this->stopwords = this->readableDatabase.get_metadata("stopwords"); - setup_queryParser(); - } - - /* Close Xapian writable database */ - void XapianSearcher::closeIndex() { - return; - } +/* Open Xapian readable database */ +void XapianSearcher::openIndex(const string& directoryPath) +{ + this->readableDatabase = Xapian::Database(directoryPath); + this->valuesmap + = read_valuesmap(this->readableDatabase.get_metadata("valuesmap")); + this->language = this->readableDatabase.get_metadata("language"); + this->stopwords = this->readableDatabase.get_metadata("stopwords"); + setup_queryParser(); +} - void XapianSearcher::setup_queryParser() - { - queryParser.set_database(readableDatabase); - if ( ! language.empty() ) - { - /* Build ICU Local object to retrieve ISO-639 language code (from - ISO-639-3) */ - icu::Locale languageLocale(language.c_str()); +/* Close Xapian writable database */ +void XapianSearcher::closeIndex() +{ + return; +} +void XapianSearcher::setup_queryParser() +{ + queryParser.set_database(readableDatabase); + if (!language.empty()) { + /* Build ICU Local object to retrieve ISO-639 language code (from + ISO-639-3) */ + icu::Locale languageLocale(language.c_str()); - /* Configuring language base steemming */ - try { - stemmer = Xapian::Stem(languageLocale.getLanguage()); - queryParser.set_stemmer(stemmer); - queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); - } catch (...) { - std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl; - } - } - - if ( ! stopwords.empty() ) - { - std::string stopWord; - std::istringstream file(this->stopwords); - while (std::getline(file, stopWord, '\n')) { - this->stopper.add(stopWord); - } - queryParser.set_stopper(&(this->stopper)); + /* Configuring language base steemming */ + try { + stemmer = Xapian::Stem(languageLocale.getLanguage()); + queryParser.set_stemmer(stemmer); + queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL); + } catch (...) { + std::cout << "No steemming for language '" << languageLocale.getLanguage() + << "'" << std::endl; } } - - /* Search strings in the database */ - void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart, - const unsigned int resultEnd, const bool verbose) { - /* Create the query */ - Xapian::Query query = queryParser.parse_query(search); - /* Create the enquire object */ - Xapian::Enquire enquire(this->readableDatabase); - enquire.set_query(query); - - /* Get the results */ - this->results = enquire.get_mset(resultStart, resultEnd - resultStart); - this->current_result = this->results.begin(); - } - - /* Get next result */ - Result* XapianSearcher::getNextResult() { - if (this->current_result != this->results.end()) { - XapianResult* result = new XapianResult(this, this->current_result); - this->current_result++; - return result; + if (!stopwords.empty()) { + std::string stopWord; + std::istringstream file(this->stopwords); + while (std::getline(file, stopWord, '\n')) { + this->stopper.add(stopWord); } - return NULL; + queryParser.set_stopper(&(this->stopper)); } +} - void XapianSearcher::restart_search() { - this->current_result = this->results.begin(); +/* Search strings in the database */ +void XapianSearcher::searchInIndex(string& search, + const unsigned int resultStart, + const unsigned int resultEnd, + const bool verbose) +{ + /* Create the query */ + Xapian::Query query = queryParser.parse_query(search); + + /* Create the enquire object */ + Xapian::Enquire enquire(this->readableDatabase); + enquire.set_query(query); + + /* Get the results */ + this->results = enquire.get_mset(resultStart, resultEnd - resultStart); + this->current_result = this->results.begin(); +} + +/* Get next result */ +Result* XapianSearcher::getNextResult() +{ + if (this->current_result != this->results.end()) { + XapianResult* result = new XapianResult(this, this->current_result); + this->current_result++; + return result; } + return NULL; +} - XapianResult::XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator): - searcher(searcher), - iterator(iterator), - document(iterator.get_document()) - { +void XapianSearcher::restart_search() +{ + this->current_result = this->results.begin(); +} + +XapianResult::XapianResult(XapianSearcher* searcher, + Xapian::MSetIterator& iterator) + : searcher(searcher), iterator(iterator), document(iterator.get_document()) +{ +} + +std::string XapianResult::get_url() +{ + return document.get_data(); +} +std::string XapianResult::get_title() +{ + if (searcher->valuesmap.empty()) { + /* This is the old legacy version. Guess and try */ + return document.get_value(0); + } else if (searcher->valuesmap.find("title") != searcher->valuesmap.end()) { + return document.get_value(searcher->valuesmap["title"]); } + return ""; +} - std::string XapianResult::get_url() { - return document.get_data(); +int XapianResult::get_score() +{ + return iterator.get_percent(); +} +std::string XapianResult::get_snippet() +{ + if (searcher->valuesmap.empty()) { + /* This is the old legacy version. Guess and try */ + std::string stored_snippet = document.get_value(1); + if (!stored_snippet.empty()) { + return stored_snippet; + } + /* Let's continue here, and see if we can genenate one */ + } else if (searcher->valuesmap.find("snippet") != searcher->valuesmap.end()) { + return document.get_value(searcher->valuesmap["snippet"]); } - - std::string XapianResult::get_title() { - if ( searcher->valuesmap.empty() ) - { - /* This is the old legacy version. Guess and try */ - return document.get_value(0); - } - else if ( searcher->valuesmap.find("title") != searcher->valuesmap.end() ) - { - return document.get_value(searcher->valuesmap["title"]); - } - return ""; + /* No reader, no snippet */ + if (!searcher->reader) { + return ""; } - - int XapianResult::get_score() { - return iterator.get_percent(); + /* Get the content of the article to generate a snippet. + We parse it and use the html dump to avoid remove html tags in the + content and be able to nicely cut the text at random place. */ + MyHtmlParser htmlParser; + std::string content; + unsigned int contentLength; + std::string contentType; + searcher->reader->getContentByUrl( + get_url(), content, contentLength, contentType); + try { + htmlParser.parse_html(content, "UTF-8", true); + } catch (...) { } + return searcher->results.snippet(htmlParser.dump, 500); +} - std::string XapianResult::get_snippet() { - if ( searcher->valuesmap.empty() ) - { - /* This is the old legacy version. Guess and try */ - std::string stored_snippet = document.get_value(1); - if ( ! stored_snippet.empty() ) - return stored_snippet; - /* Let's continue here, and see if we can genenate one */ - } - else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() ) - { - return document.get_value(searcher->valuesmap["snippet"]); - } - /* No reader, no snippet */ - if ( ! searcher->reader ) - return ""; - /* Get the content of the article to generate a snippet. - We parse it and use the html dump to avoid remove html tags in the - content and be able to nicely cut the text at random place. */ - MyHtmlParser htmlParser; - std::string content; - unsigned int contentLength; - std::string contentType; - searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType); - try { - htmlParser.parse_html(content, "UTF-8", true); - } catch (...) {} - return searcher->results.snippet(htmlParser.dump, 500); +int XapianResult::get_size() +{ + if (searcher->valuesmap.empty()) { + /* This is the old legacy version. Guess and try */ + return document.get_value(2).empty() == true + ? -1 + : atoi(document.get_value(2).c_str()); + } else if (searcher->valuesmap.find("size") != searcher->valuesmap.end()) { + return atoi(document.get_value(searcher->valuesmap["size"]).c_str()); } + /* The size is never used. Do we really want to get the content and + calculate the size ? */ + return -1; +} - int XapianResult::get_size() { - if ( searcher->valuesmap.empty() ) - { - /* This is the old legacy version. Guess and try */ - return document.get_value(2).empty() == true ? -1 : atoi(document.get_value(2).c_str()); - } - else if ( searcher->valuesmap.find("size") != searcher->valuesmap.end() ) - { - return atoi(document.get_value(searcher->valuesmap["size"]).c_str()); - } - /* The size is never used. Do we really want to get the content and - calculate the size ? */ - return -1; +int XapianResult::get_wordCount() +{ + if (searcher->valuesmap.empty()) { + /* This is the old legacy version. Guess and try */ + return document.get_value(3).empty() == true + ? -1 + : atoi(document.get_value(3).c_str()); + } else if (searcher->valuesmap.find("wordcount") + != searcher->valuesmap.end()) { + return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str()); } + return -1; +} - int XapianResult::get_wordCount() { - if ( searcher->valuesmap.empty() ) - { - /* This is the old legacy version. Guess and try */ - return document.get_value(3).empty() == true ? -1 : atoi(document.get_value(3).c_str()); - } - else if ( searcher->valuesmap.find("wordcount") != searcher->valuesmap.end() ) - { - return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str()); - } - return -1; - } - -} // Kiwix namespace +} // Kiwix namespace