Enter uriEncode()

This commit is contained in:
Veloman Yunkan 2023-01-25 23:45:18 +04:00
parent b7a019469c
commit cd9785fe85
3 changed files with 100 additions and 1 deletions

View File

@ -208,6 +208,43 @@ bool isHarmlessUriChar(char c)
return false; return false;
} }
bool mustBeUriEncodedFor(kiwix::URIComponentKind target, char c)
{
if (isHarmlessUriChar(c))
return false;
switch (c) {
case '/': // There is no reason to encode the path separator in the general
// case. It must be encoded only in a path component when its
// semantics of a path separator has to be suppressed.
return false;
case '@': // In a relative URL of the form abc@def/xyz (with no / in abc)
// a non-encoded @ will make "abc" and "def" to be interpreted as
// username and host components, respectively
return target == kiwix::URIComponentKind::PATH;
case ':': // In a relative URL of the form abc:def/xyz (with no / in abc)
// a non-encoded : will make "abc" and "def" to be interpreted as
// host and port components, respectively
return target == kiwix::URIComponentKind::PATH;
case '?': // A non-encoded '?' acts as a separator between the path
// and query components
return target == kiwix::URIComponentKind::PATH;
case '&': return target == kiwix::URIComponentKind::QUERY;
case '=': return target == kiwix::URIComponentKind::QUERY;
case '+': return target == kiwix::URIComponentKind::QUERY;
case '#': // A non-encoded '#' in either path or query-component
// would mark the beginning of the fragment component
return true;
}
return true;
}
int hexToInt(char c) { int hexToInt(char c) {
switch (c) { switch (c) {
case '0': return 0; case '0': return 0;
@ -247,6 +284,26 @@ std::string kiwix::urlEncode(const std::string& value)
return os.str(); return os.str();
} }
namespace kiwix
{
std::string uriEncode(URIComponentKind target, const std::string& value)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (const char c : value) {
if ( mustBeUriEncodedFor(target, c) ) {
const unsigned int charVal = static_cast<unsigned char>(c);
os << '%' << std::setw(2) << std::setfill('0') << charVal;
} else {
os << c;
}
}
return os.str();
}
} // namespace kiwix
std::string kiwix::urlDecode(const std::string& value, bool component) std::string kiwix::urlDecode(const std::string& value, bool component)
{ {
std::ostringstream os; std::ostringstream os;

View File

@ -60,6 +60,17 @@ private:
std::string urlEncode(const std::string& value); std::string urlEncode(const std::string& value);
std::string urlDecode(const std::string& value, bool component = false); std::string urlDecode(const std::string& value, bool component = false);
// Only URI components that are of interest to libkiwix
// are included in the below enumeration type
enum class URIComponentKind
{
PATH,
QUERY
};
// Encode 'value' for usage in a URI componenet specified by 'target'
std::string uriEncode(URIComponentKind target, const std::string& value);
std::string join(const std::vector<std::string>& list, const std::string& sep); std::string join(const std::vector<std::string>& list, const std::string& sep);
std::string ucAll(const std::string& word); std::string ucAll(const std::string& word);

View File

@ -163,4 +163,35 @@ TEST(stringTools, urlDecode)
EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols); EXPECT_EQ(urlDecode(encodedUriDelimSymbols, false), encodedUriDelimSymbols);
} }
}; TEST(stringTools, uriEncode)
{
const char letters[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, letters), letters);
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, letters), letters);
const char digits[] = "0123456789";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, digits), digits);
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, digits), digits);
const char nonEncodableSymbols[] = ".-_~()*!/";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, nonEncodableSymbols), nonEncodableSymbols);
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, nonEncodableSymbols), nonEncodableSymbols);
const char uriDelimSymbols[] = ":@?=+&#$;,";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, uriDelimSymbols), "%3A%40%3F=+&%23%24%3B%2C");
EXPECT_EQ(uriEncode(URIComponentKind::QUERY, uriDelimSymbols), ":@?%3D%2B%26%23%24%3B%2C");
const char otherSymbols[] = R"(`%^[]{}\|"<>)";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), "%60%25%5E%5B%5D%7B%7D%5C%7C%22%3C%3E");
EXPECT_EQ(uriEncode(URIComponentKind::PATH, otherSymbols), uriEncode(URIComponentKind::QUERY, otherSymbols));
const char whitespace[] = " \n\t\r";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), "%20%0A%09%0D");
EXPECT_EQ(uriEncode(URIComponentKind::PATH, whitespace), uriEncode(URIComponentKind::QUERY, whitespace));
const char someNonASCIIChars[] = "Σ♂♀ツ";
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), "%CE%A3%E2%99%82%E2%99%80%E3%83%84");
EXPECT_EQ(uriEncode(URIComponentKind::PATH, someNonASCIIChars), uriEncode(URIComponentKind::QUERY, someNonASCIIChars));
}
} // unnamed namespace