mirror of https://github.com/kiwix/libkiwix.git
Enter uriEncode()
This commit is contained in:
parent
b7a019469c
commit
cd9785fe85
|
@ -208,6 +208,43 @@ bool isHarmlessUriChar(char c)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool mustBeUriEncodedFor(kiwix::URIComponentKind target, char c)
|
||||||
|
{
|
||||||
|
if (isHarmlessUriChar(c))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (c) {
|
||||||
|
case '/': // There is no reason to encode the path separator in the general
|
||||||
|
// case. It must be encoded only in a path component when its
|
||||||
|
// semantics of a path separator has to be suppressed.
|
||||||
|
return false;
|
||||||
|
|
||||||
|
case '@': // In a relative URL of the form abc@def/xyz (with no / in abc)
|
||||||
|
// a non-encoded @ will make "abc" and "def" to be interpreted as
|
||||||
|
// username and host components, respectively
|
||||||
|
return target == kiwix::URIComponentKind::PATH;
|
||||||
|
|
||||||
|
case ':': // In a relative URL of the form abc:def/xyz (with no / in abc)
|
||||||
|
// a non-encoded : will make "abc" and "def" to be interpreted as
|
||||||
|
// host and port components, respectively
|
||||||
|
return target == kiwix::URIComponentKind::PATH;
|
||||||
|
|
||||||
|
case '?': // A non-encoded '?' acts as a separator between the path
|
||||||
|
// and query components
|
||||||
|
return target == kiwix::URIComponentKind::PATH;
|
||||||
|
|
||||||
|
case '&': return target == kiwix::URIComponentKind::QUERY;
|
||||||
|
case '=': return target == kiwix::URIComponentKind::QUERY;
|
||||||
|
case '+': return target == kiwix::URIComponentKind::QUERY;
|
||||||
|
|
||||||
|
case '#': // A non-encoded '#' in either path or query-component
|
||||||
|
// would mark the beginning of the fragment component
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
int hexToInt(char c) {
|
int hexToInt(char c) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '0': return 0;
|
case '0': return 0;
|
||||||
|
@ -247,6 +284,26 @@ std::string kiwix::urlEncode(const std::string& value)
|
||||||
return os.str();
|
return os.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace kiwix
|
||||||
|
{
|
||||||
|
|
||||||
|
std::string uriEncode(URIComponentKind target, const std::string& value)
|
||||||
|
{
|
||||||
|
std::ostringstream os;
|
||||||
|
os << std::hex << std::uppercase;
|
||||||
|
for (const char c : value) {
|
||||||
|
if ( mustBeUriEncodedFor(target, c) ) {
|
||||||
|
const unsigned int charVal = static_cast<unsigned char>(c);
|
||||||
|
os << '%' << std::setw(2) << std::setfill('0') << charVal;
|
||||||
|
} else {
|
||||||
|
os << c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return os.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace kiwix
|
||||||
|
|
||||||
std::string kiwix::urlDecode(const std::string& value, bool component)
|
std::string kiwix::urlDecode(const std::string& value, bool component)
|
||||||
{
|
{
|
||||||
std::ostringstream os;
|
std::ostringstream os;
|
||||||
|
|
|
@ -60,6 +60,17 @@ private:
|
||||||
std::string urlEncode(const std::string& value);
|
std::string urlEncode(const std::string& value);
|
||||||
std::string urlDecode(const std::string& value, bool component = false);
|
std::string urlDecode(const std::string& value, bool component = false);
|
||||||
|
|
||||||
|
// Only URI components that are of interest to libkiwix
|
||||||
|
// are included in the below enumeration type
|
||||||
|
enum class URIComponentKind
|
||||||
|
{
|
||||||
|
PATH,
|
||||||
|
QUERY
|
||||||
|
};
|
||||||
|
|
||||||
|
// Encode 'value' for usage in a URI componenet specified by 'target'
|
||||||
|
std::string uriEncode(URIComponentKind target, const std::string& value);
|
||||||
|
|
||||||
std::string join(const std::vector<std::string>& list, const std::string& sep);
|
std::string join(const std::vector<std::string>& list, const std::string& sep);
|
||||||
|
|
||||||
std::string ucAll(const std::string& word);
|
std::string ucAll(const std::string& word);
|
||||||
|
|
|
@ -163,4 +163,35 @@ TEST(stringTools, urlDecode)
|
||||||
EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols);
|
EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols);
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
TEST(stringTools, uriEncode)
|
||||||
|
{
|
||||||
|
const char letters[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, letters), letters);
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, letters), letters);
|
||||||
|
|
||||||
|
const char digits[] = "0123456789";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, digits), digits);
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, digits), digits);
|
||||||
|
|
||||||
|
const char nonEncodableSymbols[] = ".-_~()*!/";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, nonEncodableSymbols), nonEncodableSymbols);
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, nonEncodableSymbols), nonEncodableSymbols);
|
||||||
|
|
||||||
|
const char uriDelimSymbols[] = ":@?=+&#$;,";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, uriDelimSymbols), "%3A%40%3F=+&%23%24%3B%2C");
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, uriDelimSymbols), ":@?%3D%2B%26%23%24%3B%2C");
|
||||||
|
|
||||||
|
const char otherSymbols[] = R"(`%^[]{}\|"<>)";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), "%60%25%5E%5B%5D%7B%7D%5C%7C%22%3C%3E");
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), uriEncode(URIComponentKind::QUERY, otherSymbols));
|
||||||
|
|
||||||
|
const char whitespace[] = " \n\t\r";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), "%20%0A%09%0D");
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), uriEncode(URIComponentKind::QUERY, whitespace));
|
||||||
|
|
||||||
|
const char someNonASCIIChars[] = "Σ♂♀ツ";
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), "%CE%A3%E2%99%82%E2%99%80%E3%83%84");
|
||||||
|
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), uriEncode(URIComponentKind::QUERY, someNonASCIIChars));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // unnamed namespace
|
||||||
|
|
Loading…
Reference in New Issue