mirror of https://github.com/kiwix/libkiwix.git
Enter uriEncode()
This commit is contained in:
parent
b7a019469c
commit
cd9785fe85
|
@ -208,6 +208,43 @@ bool isHarmlessUriChar(char c)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool mustBeUriEncodedFor(kiwix::URIComponentKind target, char c)
|
||||
{
|
||||
if (isHarmlessUriChar(c))
|
||||
return false;
|
||||
|
||||
switch (c) {
|
||||
case '/': // There is no reason to encode the path separator in the general
|
||||
// case. It must be encoded only in a path component when its
|
||||
// semantics of a path separator has to be suppressed.
|
||||
return false;
|
||||
|
||||
case '@': // In a relative URL of the form abc@def/xyz (with no / in abc)
|
||||
// a non-encoded @ will make "abc" and "def" to be interpreted as
|
||||
// username and host components, respectively
|
||||
return target == kiwix::URIComponentKind::PATH;
|
||||
|
||||
case ':': // In a relative URL of the form abc:def/xyz (with no / in abc)
|
||||
// a non-encoded : will make "abc" and "def" to be interpreted as
|
||||
// host and port components, respectively
|
||||
return target == kiwix::URIComponentKind::PATH;
|
||||
|
||||
case '?': // A non-encoded '?' acts as a separator between the path
|
||||
// and query components
|
||||
return target == kiwix::URIComponentKind::PATH;
|
||||
|
||||
case '&': return target == kiwix::URIComponentKind::QUERY;
|
||||
case '=': return target == kiwix::URIComponentKind::QUERY;
|
||||
case '+': return target == kiwix::URIComponentKind::QUERY;
|
||||
|
||||
case '#': // A non-encoded '#' in either path or query-component
|
||||
// would mark the beginning of the fragment component
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int hexToInt(char c) {
|
||||
switch (c) {
|
||||
case '0': return 0;
|
||||
|
@ -247,6 +284,26 @@ std::string kiwix::urlEncode(const std::string& value)
|
|||
return os.str();
|
||||
}
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
std::string uriEncode(URIComponentKind target, const std::string& value)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << std::hex << std::uppercase;
|
||||
for (const char c : value) {
|
||||
if ( mustBeUriEncodedFor(target, c) ) {
|
||||
const unsigned int charVal = static_cast<unsigned char>(c);
|
||||
os << '%' << std::setw(2) << std::setfill('0') << charVal;
|
||||
} else {
|
||||
os << c;
|
||||
}
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
} // namespace kiwix
|
||||
|
||||
std::string kiwix::urlDecode(const std::string& value, bool component)
|
||||
{
|
||||
std::ostringstream os;
|
||||
|
|
|
@ -60,6 +60,17 @@ private:
|
|||
std::string urlEncode(const std::string& value);
|
||||
std::string urlDecode(const std::string& value, bool component = false);
|
||||
|
||||
// Only URI components that are of interest to libkiwix
|
||||
// are included in the below enumeration type
|
||||
enum class URIComponentKind
|
||||
{
|
||||
PATH,
|
||||
QUERY
|
||||
};
|
||||
|
||||
// Encode 'value' for usage in a URI componenet specified by 'target'
|
||||
std::string uriEncode(URIComponentKind target, const std::string& value);
|
||||
|
||||
std::string join(const std::vector<std::string>& list, const std::string& sep);
|
||||
|
||||
std::string ucAll(const std::string& word);
|
||||
|
|
|
@ -163,4 +163,35 @@ TEST(stringTools, urlDecode)
|
|||
EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols);
|
||||
}
|
||||
|
||||
};
|
||||
TEST(stringTools, uriEncode)
|
||||
{
|
||||
const char letters[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, letters), letters);
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, letters), letters);
|
||||
|
||||
const char digits[] = "0123456789";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, digits), digits);
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, digits), digits);
|
||||
|
||||
const char nonEncodableSymbols[] = ".-_~()*!/";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, nonEncodableSymbols), nonEncodableSymbols);
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, nonEncodableSymbols), nonEncodableSymbols);
|
||||
|
||||
const char uriDelimSymbols[] = ":@?=+&#$;,";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, uriDelimSymbols), "%3A%40%3F=+&%23%24%3B%2C");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, uriDelimSymbols), ":@?%3D%2B%26%23%24%3B%2C");
|
||||
|
||||
const char otherSymbols[] = R"(`%^[]{}\|"<>)";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), "%60%25%5E%5B%5D%7B%7D%5C%7C%22%3C%3E");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), uriEncode(URIComponentKind::QUERY, otherSymbols));
|
||||
|
||||
const char whitespace[] = " \n\t\r";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), "%20%0A%09%0D");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), uriEncode(URIComponentKind::QUERY, whitespace));
|
||||
|
||||
const char someNonASCIIChars[] = "Σ♂♀ツ";
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), "%CE%A3%E2%99%82%E2%99%80%E3%83%84");
|
||||
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), uriEncode(URIComponentKind::QUERY, someNonASCIIChars));
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
|
Loading…
Reference in New Issue