From 82cba791799f414c3581cc6a045ece167b090fef Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 7 Oct 2016 17:21:09 +0200 Subject: [PATCH 1/5] Do not make unnecessary copy when serving binary content. Binary content do not need to be modified, so we don't need to copy it. We can directly serve it from the internal zim (cluster) buffer. The handle_content function now getArticleObjectByDecodedUrl instead of getContentByDecodedUrl. This is to get the mimetype of the article and copy the content only when needed (getContentByDecodedUrl always copy the content). Thus, handle_content is a bit more complex as it need to do some manipulation previously made in getContentByDecodedUrl. The main change is that if the content is binary, we serve the content with a callback response who will get the content chunks directly from the blob buffer. --- src/common/kiwix/reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index 79de7f18f..98901dbf8 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -81,6 +81,7 @@ namespace kiwix { bool parseUrl(const string &url, char *ns, string &title); unsigned int getFileSize(); zim::File* getZimFileHandler(); + bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article); protected: zim::File* zimFileHandler; @@ -95,7 +96,6 @@ namespace kiwix { private: std::map parseCounterMetadata(); - bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article); }; } From 01794e6e91adac3e9dc9c45fd335a83cadc7fc5a Mon Sep 17 00:00:00 2001 From: Kelson Date: Fri, 14 Oct 2016 13:11:14 +0200 Subject: [PATCH 2/5] New Reader::getZimFilePath() --- src/common/kiwix/reader.cpp | 5 +++++ src/common/kiwix/reader.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 10526ba25..01a10b2e4 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -72,6 +72,7 @@ namespace kiwix { this->currentArticleOffset = this->firstArticleOffset; this->nsACount = this->zimFileHandler->getNamespaceCount('A'); this->nsICount = this->zimFileHandler->getNamespaceCount('I'); + this->zimFilePath = zimFilePath; } /* initialize random seed: */ @@ -252,6 +253,10 @@ namespace kiwix { return content.empty() ? false : true; } + string Reader::getZimFilePath() { + return this->zimFilePath; + } + /* Return a metatag value */ bool Reader::getMetatag(const string &name, string &value) { unsigned int contentLength = 0; diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index 98901dbf8..7d0325069 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -47,6 +47,7 @@ namespace kiwix { unsigned int getArticleCount(); unsigned int getMediaCount(); unsigned int getGlobalCount(); + string getZimFilePath(); string getId(); string getRandomPageUrl(); string getFirstPageUrl(); @@ -90,7 +91,8 @@ namespace kiwix { zim::size_type currentArticleOffset; zim::size_type nsACount; zim::size_type nsICount; - + std::string zimFilePath; + std::vector< std::vector > suggestions; std::vector< std::vector >::iterator suggestionsOffset; From 2889d7c651ee1e3015b230eeb7d3c11d324bae1e Mon Sep 17 00:00:00 2001 From: Kelson Date: Fri, 14 Oct 2016 13:12:26 +0200 Subject: [PATCH 3/5] Make fulltext searcher Android compatible --- src/common/kiwix/searcher.cpp | 13 +++++++------ src/common/kiwix/searcher.h | 13 +++++++++---- src/common/kiwix/xapianSearcher.h | 7 ++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/common/kiwix/searcher.cpp b/src/common/kiwix/searcher.cpp index 1cc2613e5..05e88bcad 100644 --- a/src/common/kiwix/searcher.cpp +++ b/src/common/kiwix/searcher.cpp @@ -34,9 +34,11 @@ namespace kiwix { { template_ct2 = getResourceAsString("results.ct2"); loadICUExternalTables(); - } - + + /* Destructor */ + Searcher::~Searcher() {} + /* Search strings in the database */ void Searcher::search(std::string &search, unsigned int resultStart, unsigned int resultEnd, const bool verbose) { @@ -127,6 +129,8 @@ namespace kiwix { this->contentHumanReadableId = contentHumanReadableId; } +#ifndef __ANDROID__ + string Searcher::getHtml() { SimpleVM oSimpleVM; @@ -203,9 +207,6 @@ namespace kiwix { } - /* Destructor */ - Searcher::~Searcher() { - - } +#endif } diff --git a/src/common/kiwix/searcher.h b/src/common/kiwix/searcher.h index f85fcd13b..f8a2551e2 100644 --- a/src/common/kiwix/searcher.h +++ b/src/common/kiwix/searcher.h @@ -33,14 +33,16 @@ #include #include "unicode/putil.h" +#ifndef __ANDROID__ #include #include #include - #include "kiwix/ctpp2/CTPP2VMStringLoader.hpp" -using namespace std; using namespace CTPP; +#endif + +using namespace std; struct Result { @@ -58,6 +60,7 @@ namespace kiwix { public: Searcher(); + ~Searcher(); void search(std::string &search, unsigned int resultStart, unsigned int resultEnd, const bool verbose=false); @@ -65,11 +68,13 @@ namespace kiwix { unsigned int getEstimatedResultCount(); bool setProtocolPrefix(const std::string prefix); bool setSearchProtocolPrefix(const std::string prefix); - string getHtml(); void reset(); void setContentHumanReadableId(const string &contentHumanReadableId); - ~Searcher(); +#ifndef __ANDROID__ + string getHtml(); +#endif + protected: std::string beautifyInteger(const unsigned int number); virtual void closeIndex() = 0; diff --git a/src/common/kiwix/xapianSearcher.h b/src/common/kiwix/xapianSearcher.h index 3fb24b29e..2f21ed052 100644 --- a/src/common/kiwix/xapianSearcher.h +++ b/src/common/kiwix/xapianSearcher.h @@ -27,10 +27,8 @@ using namespace std; namespace kiwix { - class NoXapianIndexInZim: public exception - { - virtual const char* what() const throw() - { + class NoXapianIndexInZim: public exception { + virtual const char* what() const throw() { return "There is no fulltext index in the zim file"; } }; @@ -39,7 +37,6 @@ namespace kiwix { public: XapianSearcher(const string &xapianDirectoryPath); - void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd, const bool verbose=false); From 75da598ba873d3dbbd3c665c00f65e723aece27c Mon Sep 17 00:00:00 2001 From: Kelson Date: Fri, 14 Oct 2016 16:59:32 +0200 Subject: [PATCH 4/5] Add kiwix::removeAccents for Android --- src/common/stringTools.cpp | 26 +++++++++++++------------- src/common/stringTools.h | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 74aea6104..308765036 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -33,6 +33,19 @@ void kiwix::loadICUExternalTables() { #endif } +std::string kiwix::removeAccents(const std::string &text) { + loadICUExternalTables(); + ucnv_setDefaultName("UTF-8"); + UErrorCode status = U_ZERO_ERROR; + Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); + UnicodeString ustring = UnicodeString(text.c_str()); + removeAccentsTrans->transliterate(ustring); + delete removeAccentsTrans; + std::string unaccentedText; + ustring.toUTF8String(unaccentedText); + return unaccentedText; +} + #ifndef __ANDROID__ /* Prepare integer for display */ @@ -59,19 +72,6 @@ std::string kiwix::beautifyFileSize(const unsigned int number) { } } -std::string kiwix::removeAccents(const std::string &text) { - loadICUExternalTables(); - ucnv_setDefaultName("UTF-8"); - UErrorCode status = U_ZERO_ERROR; - Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); - UnicodeString ustring = UnicodeString(text.c_str()); - removeAccentsTrans->transliterate(ustring); - delete removeAccentsTrans; - std::string unaccentedText; - ustring.toUTF8String(unaccentedText); - return unaccentedText; -} - void kiwix::printStringInHexadecimal(UnicodeString s) { std::cout << std::showbase << std::hex; for (int i=0; i Date: Fri, 14 Oct 2016 17:04:47 +0200 Subject: [PATCH 5/5] Remove exec permission on source files --- src/common/resourceTools.cpp | 0 src/common/resourceTools.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 src/common/resourceTools.cpp mode change 100755 => 100644 src/common/resourceTools.h diff --git a/src/common/resourceTools.cpp b/src/common/resourceTools.cpp old mode 100755 new mode 100644 diff --git a/src/common/resourceTools.h b/src/common/resourceTools.h old mode 100755 new mode 100644