mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-06-26 10:11:30 +00:00
Better parsing of M/Counter
Mimetype may contain a parameters. Then, the mimetype would be something like "text/html;foo=bar;foz=baz" It will contains a `;` and `=` and it conflicts with the same operators we use to separate the items in our list. We have to use a more advanced algorithm which takes the context into account. Fix #416
This commit is contained in:
@ -51,30 +51,55 @@ TEST(ParseCounterTest, simpleMimeType)
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
||||
TEST(ParseCounterTest, paramMimeType)
|
||||
{
|
||||
{
|
||||
std::string counterStr = "text/html;raw=true=1";
|
||||
CounterType counterMap = {{"foo", 1}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap);
|
||||
CounterType counterMap = {{"text/html;raw=true", 1}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html;raw=true=50;bar=2";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap);
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html;raw=true;param=value=50;bar=2";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true;param=value", 50}, {"bar", 2}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap);
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "foo=1;text/html;raw=true=50;bar=2";
|
||||
CounterType counterMap = {{"foo", 1}, {"text/html;raw=true", 50}, {"bar", 2}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap);
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
}*/
|
||||
{
|
||||
std::string counterStr = "application/javascript=8;text/html=3;application/warc-headers=28364;text/html;raw=true=6336;text/css=47;text/javascript=98;image/png=968;image/webp=24;application/json=3694;image/gif=10274;image/jpeg=1582;font/woff2=25;text/plain=284;application/atom+xml=247;application/x-www-form-urlencoded=9;video/mp4=9;application/x-javascript=7;application/xml=1;image/svg+xml=5";
|
||||
CounterType counterMap = {
|
||||
{"application/javascript", 8},
|
||||
{"text/html", 3},
|
||||
{"application/warc-headers", 28364},
|
||||
{"text/html;raw=true", 6336},
|
||||
{"text/css", 47},
|
||||
{"text/javascript", 98},
|
||||
{"image/png", 968},
|
||||
{"image/webp", 24},
|
||||
{"application/json", 3694},
|
||||
{"image/gif", 10274},
|
||||
{"image/jpeg", 1582},
|
||||
{"font/woff2", 25},
|
||||
{"text/plain", 284},
|
||||
{"application/atom+xml", 247},
|
||||
{"application/x-www-form-urlencoded", 9},
|
||||
{"video/mp4", 9},
|
||||
{"application/x-javascript", 7},
|
||||
{"application/xml", 1},
|
||||
{"image/svg+xml", 5}
|
||||
};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(ParseCounterTest, wrongType)
|
||||
{
|
||||
@ -96,14 +121,14 @@ TEST(ParseCounterTest, wrongType)
|
||||
CounterType counterMap = {{"text/html", 50}};
|
||||
ASSERT_EQ(parse(counterStr), counterMap) << counterStr;
|
||||
}
|
||||
/*{
|
||||
{
|
||||
std::string counterStr = "text/html;foo=20";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html;foo=20;";
|
||||
ASSERT_EQ(parse(counterStr), empty) << counterStr;
|
||||
}*/
|
||||
}
|
||||
{
|
||||
std::string counterStr = "text/html=50;;foo";
|
||||
CounterType counterMap = {{"text/html", 50}};
|
||||
|
@ -23,7 +23,7 @@
|
||||
|
||||
namespace kiwix {
|
||||
std::string join(const std::vector<std::string>& list, const std::string& sep);
|
||||
std::vector<std::string> split(const std::string& base, const std::string& sep, bool trimEmpty);
|
||||
std::vector<std::string> split(const std::string& base, const std::string& sep, bool trimEmpty, bool keepDelim);
|
||||
};
|
||||
|
||||
using namespace kiwix;
|
||||
@ -40,17 +40,22 @@ TEST(stringTools, join)
|
||||
TEST(stringTools, split)
|
||||
{
|
||||
std::vector<std::string> list1 = { "a", "b", "c" };
|
||||
ASSERT_EQ(split("a;b;c", ";", false), list1);
|
||||
ASSERT_EQ(split("a;b;c", ";", true), list1);
|
||||
ASSERT_EQ(split("a;b;c", ";", false, false), list1);
|
||||
ASSERT_EQ(split("a;b;c", ";", true, false), list1);
|
||||
std::vector<std::string> list2 = { "", "a", "b", "c" };
|
||||
ASSERT_EQ(split(";a;b;c", ";", false), list2);
|
||||
ASSERT_EQ(split(";a;b;c", ";", true), list1);
|
||||
ASSERT_EQ(split(";a;b;c", ";", false, false), list2);
|
||||
ASSERT_EQ(split(";a;b;c", ";", true, false), list1);
|
||||
std::vector<std::string> list3 = { "", "a", "b", "c", ""};
|
||||
ASSERT_EQ(split(";a;b;c;", ";", false), list3);
|
||||
ASSERT_EQ(split(";a;b;c;", ";", true), list1);
|
||||
ASSERT_EQ(split(";a;b;c;", ";", false, false), list3);
|
||||
ASSERT_EQ(split(";a;b;c;", ";", true, false), list1);
|
||||
std::vector<std::string> list4 = { "", "a", "b", "", "c", ""};
|
||||
ASSERT_EQ(split(";a;b;;c;", ";", false), list4);
|
||||
ASSERT_EQ(split(";a;b;;c;", ";", true), list1);
|
||||
ASSERT_EQ(split(";a;b;;c;", ";", false, false), list4);
|
||||
ASSERT_EQ(split(";a;b;;c;", ";", true, false), list1);
|
||||
|
||||
std::vector<std::string> list5 = { ";", "a", ";", "b", "=", ";", "c", "=", "d", ";"};
|
||||
ASSERT_EQ(split(";a;b=;c=d;", ";=", true, true), list5);
|
||||
std::vector<std::string> list6 = { "", ";", "a", ";", "b", "=", "", ";", "c", "=", "d", ";", ""};
|
||||
ASSERT_EQ(split(";a;b=;c=d;", ";=", false, true), list6);
|
||||
}
|
||||
|
||||
};
|
||||
|
Reference in New Issue
Block a user