From 45ad751d8949f835ce07748af6b00e87a93114fa Mon Sep 17 00:00:00 2001 From: Kelson42 Date: Mon, 8 Dec 2014 01:37:06 +0100 Subject: [PATCH 2/3] New function getMimeTypeByUrl() --- src/common/kiwix/reader.cpp | 76 +++++++++++++++++++++++++++---------- src/common/kiwix/reader.h | 3 +- 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/common/kiwix/reader.cpp b/src/common/kiwix/reader.cpp index 7dfb2afed..79c84feaf 100644 --- a/src/common/kiwix/reader.cpp +++ b/src/common/kiwix/reader.cpp @@ -376,6 +376,60 @@ namespace kiwix { return true; } + /* Return article by url */ + bool Reader::getArticleObjectByDecodedUrl(const string &url, zim::Article &article) { + bool retVal = false; + + if (this->zimFileHandler != NULL) { + + /* Parse the url */ + char ns = 0; + string titleStr; + this->parseUrl(url, &ns, titleStr); + + /* Main page */ + if (titleStr.empty() && ns == 0) { + this->parseUrl(this->getMainPageUrl(), &ns, titleStr); + } + + /* Extract the content from the zim file */ + std::pair resultPair = zimFileHandler->findx(ns, titleStr); + + /* Test if the article was found */ + if (resultPair.first == true) { + article = zimFileHandler->getArticle(resultPair.second.getIndex()); + retVal = true; + } + + } + + return retVal; + } + + /* Return the mimeType without the content */ + bool Reader::getMimeTypeByUrl(const string &url, string &mimeType) { + bool retVal = false; + + if (this->zimFileHandler != NULL) { + + zim::Article article; + if (this->getArticleObjectByDecodedUrl(url, article)) { + try { + mimeType = string(article.getMimeType().data(), article.getMimeType().size()); + } catch (exception &e) { + cerr << "Unable to get the mimetype for "<< url << ":" << e.what() << endl; + mimeType = "application/octet-stream"; + } + retVal = true; + } else { + mimeType = ""; + } + + } + + return retVal; + } + /* Get a content from a zim file */ bool Reader::getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) { return this->getContentByEncodedUrl(url, content, contentLength, contentType); @@ -402,25 +456,9 @@ namespace kiwix { contentLength = 0; if (this->zimFileHandler != NULL) { - /* Parse the url */ - char ns = 0; - string titleStr; - this->parseUrl(url, &ns, titleStr); - - /* Main page */ - if (titleStr.empty() && ns == 0) { - this->parseUrl(this->getMainPageUrl(), &ns, titleStr); - } - - /* Extract the content from the zim file */ - std::pair resultPair = zimFileHandler->findx(ns, titleStr); - - /* Test if the article was found */ - if (resultPair.first == true) { - - /* Get the article */ - zim::Article article = zimFileHandler->getArticle(resultPair.second.getIndex()); - + zim::Article article; + if (this->getArticleObjectByDecodedUrl(url, article)) { + /* If redirect */ unsigned int loopCounter = 0; while (article.isRedirect() && loopCounter++<42) { diff --git a/src/common/kiwix/reader.h b/src/common/kiwix/reader.h index 9d2982ebc..8dd77f84a 100644 --- a/src/common/kiwix/reader.h +++ b/src/common/kiwix/reader.h @@ -61,6 +61,7 @@ namespace kiwix { string getOrigId(); bool getFavicon(string &content, string &mimeType); bool getPageUrlFromTitle(const string &title, string &url); + bool getMimeTypeByUrl(const string &url, string &mimeType); bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl); bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType); @@ -89,7 +90,7 @@ namespace kiwix { private: std::map parseCounterMetadata(); - + bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article); }; } From 8287a64172b59db43d5773903c1b9fc8ff0fe6bb Mon Sep 17 00:00:00 2001 From: Kelson42 Date: Thu, 8 Jan 2015 12:51:42 +0100 Subject: [PATCH 3/3] FIXED: kiwix-serve XSS attack vulnerability (#763) --- src/common/kiwix/searcher.cpp | 2 +- src/common/stringTools.cpp | 8 ++++++++ src/common/stringTools.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/common/kiwix/searcher.cpp b/src/common/kiwix/searcher.cpp index 2ab1949cc..36d5b3529 100644 --- a/src/common/kiwix/searcher.cpp +++ b/src/common/kiwix/searcher.cpp @@ -180,7 +180,7 @@ namespace kiwix { oData["pages"] = pagesCDT; oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount); - oData["searchPattern"] = this->searchPattern; + oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern); oData["searchPatternEncoded"] = urlEncode(this->searchPattern); oData["resultStart"] = this->resultStart + 1; oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd); diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 15913b1cc..0b0ceeeba 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -104,6 +104,14 @@ void kiwix::stringReplacement(std::string& str, const std::string& oldStr, const } } +/* Encode string to avoid XSS attacks */ +std::string kiwix::encodeDiples(const std::string& str) { + std::string result = str; + kiwix::stringReplacement(result, "<", "<"); + kiwix::stringReplacement(result, ">", ">"); + return result; +} + // Urlencode //based on javascript encodeURIComponent() diff --git a/src/common/stringTools.h b/src/common/stringTools.h index 8a6683af5..2a2367b29 100644 --- a/src/common/stringTools.h +++ b/src/common/stringTools.h @@ -48,6 +48,7 @@ namespace kiwix { void printStringInHexadecimal(const char *s); void printStringInHexadecimal(UnicodeString s); void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr); + std::string encodeDiples(const std::string& str); #endif