Merge pull request #1067 from kiwix/stricter_namemapper

This commit is contained in:
Matthieu Gautier 2024-03-06 14:24:41 +01:00 committed by GitHub
commit a8368b3a0d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 97 additions and 47 deletions

View File

@ -54,6 +54,9 @@ class HumanReadableNameMapper : public NameMapper {
virtual ~HumanReadableNameMapper() = default; virtual ~HumanReadableNameMapper() = default;
virtual std::string getNameForId(const std::string& id) const; virtual std::string getNameForId(const std::string& id) const;
virtual std::string getIdForName(const std::string& name) const; virtual std::string getIdForName(const std::string& name) const;
private:
void mapName(const kiwix::Library& lib, std::string name, std::string id);
}; };
class UpdatableNameMapper : public NameMapper { class UpdatableNameMapper : public NameMapper {

View File

@ -29,25 +29,29 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w
auto& currentBook = library.getBookById(bookId); auto& currentBook = library.getBookById(bookId);
auto bookName = currentBook.getHumanReadableIdFromPath(); auto bookName = currentBook.getHumanReadableIdFromPath();
m_idToName[bookId] = bookName; m_idToName[bookId] = bookName;
m_nameToId[bookName] = bookId; mapName(library, bookName, bookId);
if (!withAlias) if (!withAlias)
continue; continue;
auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$"); auto aliasName = replaceRegex(bookName, "", "_[[:digit:]]{4}-[[:digit:]]{2}$");
if (aliasName == bookName) { if (aliasName != bookName) {
continue; mapName(library, aliasName, bookId);
} }
if (m_nameToId.find(aliasName) == m_nameToId.end()) { }
m_nameToId[aliasName] = bookId; }
void HumanReadableNameMapper::mapName(const Library& library, std::string name, std::string bookId) {
if (m_nameToId.find(name) == m_nameToId.end()) {
m_nameToId[name] = bookId;
} else { } else {
auto alreadyPresentPath = library.getBookById(m_nameToId[aliasName]).getPath(); const auto& currentBook = library.getBookById(bookId);
std::cerr << "Path collision: " << alreadyPresentPath auto alreadyPresentPath = library.getBookById(m_nameToId[name]).getPath();
<< " and " << currentBook.getPath() std::cerr << "Path collision: '" << alreadyPresentPath
<< " can't share the same URL path '" << aliasName << "'." << "' and '" << currentBook.getPath()
<< " Therefore, only " << alreadyPresentPath << "' can't share the same URL path '" << name << "'."
<< " will be served." << std::endl; << " Therefore, only '" << alreadyPresentPath
} << "' will be served." << std::endl;
} }
} }

View File

@ -1,8 +1,8 @@
<library version="1.0"> <library version="1.0">
<book <book
id="raycharles" id="raycharles"
path="./zimfile.zim" path="./zimfile_raycharles.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles.zim"
title="Ray Charles" title="Ray Charles"
description="Wikipedia articles about Ray Charles" description="Wikipedia articles about Ray Charles"
language="eng" language="eng"
@ -19,8 +19,8 @@
></book> ></book>
<book <book
id="raycharles_uncategorized" id="raycharles_uncategorized"
path="./zimfile.zim" path="./zimfile_raycharles_uncategorized.zim"
url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim" url="https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim"
title="Ray (uncategorized) Charles" title="Ray (uncategorized) Charles"
description="No category is assigned to this library entry." description="No category is assigned to this library entry."
language="rus,eng" language="rus,eng"

View File

@ -0,0 +1 @@
zimfile.zim

View File

@ -0,0 +1 @@
zimfile.zim

View File

@ -129,11 +129,11 @@ std::string maskVariableOPDSFeedData(std::string s)
" href=\"/ROOT%23%3F/catalog/v2/illustration/raycharles/?size=48\"\n" \ " href=\"/ROOT%23%3F/catalog/v2/illustration/raycharles/?size=48\"\n" \
" type=\"image/png;width=48;height=48;scale=1\"/>\n ", \ " type=\"image/png;width=48;height=48;scale=1\"/>\n ", \
CONTENT_NAME, \ CONTENT_NAME, \
"zimfile", \ "zimfile_raycharles", \
"569344"\ "569344"\
) )
#define RAY_CHARLES_CATALOG_ENTRY _RAY_CHARLES_CATALOG_ENTRY("zimfile") #define RAY_CHARLES_CATALOG_ENTRY _RAY_CHARLES_CATALOG_ENTRY("zimfile_raycharles")
#define RAY_CHARLES_CATALOG_ENTRY_NO_MAPPER _RAY_CHARLES_CATALOG_ENTRY("raycharles") #define RAY_CHARLES_CATALOG_ENTRY_NO_MAPPER _RAY_CHARLES_CATALOG_ENTRY("raycharles")
#define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\ #define UNCATEGORIZED_RAY_CHARLES_CATALOG_ENTRY CATALOG_ENTRY(\
@ -145,8 +145,8 @@ std::string maskVariableOPDSFeedData(std::string s)
"",\ "",\
"public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\ "public_tag_with_a_value:value_of_a_public_tag;_private_tag_with_a_value:value_of_a_private_tag;wikipedia;_pictures:no;_videos:no;_details:no",\
"",\ "",\
"zimfile", \ "zimfile_raycharles_uncategorized", \
"zimfile", \ "zimfile_raycharles_uncategorized", \
"125952"\ "125952"\
) )
@ -1110,10 +1110,10 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__link__wrapper\">\n" \ " <div class=\"book__link__wrapper\">\n" \
" <div class=\"book__icon\" style=background-image:url(/ROOT%23%3F/catalog/v2/illustration/raycharles/?size=48)></div>\n" \ " <div class=\"book__icon\" style=background-image:url(/ROOT%23%3F/catalog/v2/illustration/raycharles/?size=48)></div>\n" \
" <div class=\"book__header\">\n" \ " <div class=\"book__header\">\n" \
" <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile\">Ray Charles</a></div>\n" \ " <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile_raycharles\">Ray Charles</a></div>\n" \
" <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile\">Download</a></span></div>\n" \ " <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile_raycharles\">Download</a></span></div>\n" \
" </div>\n" \ " </div>\n" \
" <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile\" title=\"Preview\" aria-label=\"Preview\">\n" \ " <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile_raycharles\" title=\"Preview\" aria-label=\"Preview\">\n" \
" <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles\">Wikipedia articles about Ray Charles</div>\n" \ " <div class=\"book__description\" title=\"Wikipedia articles about Ray Charles\">Wikipedia articles about Ray Charles</div>\n" \
" </a>\n" \ " </a>\n" \
" </div>\n" \ " </div>\n" \
@ -1130,10 +1130,10 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"book__link__wrapper\">\n" \ " <div class=\"book__link__wrapper\">\n" \
" <div class=\"book__icon\" style=background-image:url(/ROOT%23%3F/catalog/v2/illustration/raycharles_uncategorized/?size=48)></div>\n" \ " <div class=\"book__icon\" style=background-image:url(/ROOT%23%3F/catalog/v2/illustration/raycharles_uncategorized/?size=48)></div>\n" \
" <div class=\"book__header\">\n" \ " <div class=\"book__header\">\n" \
" <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile\">Ray (uncategorized) Charles</a></div>\n" \ " <div id=\"book__title\"><a href=\"/ROOT%23%3F/content/zimfile_raycharles_uncategorized\">Ray (uncategorized) Charles</a></div>\n" \
" <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile\">Download</a></span></div>\n" \ " <div class=\"book__download\"><span><a href=\"/ROOT%23%3F/nojs/download/zimfile_raycharles_uncategorized\">Download</a></span></div>\n" \
" </div>\n" \ " </div>\n" \
" <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile\" title=\"Preview\" aria-label=\"Preview\">\n" \ " <a class=\"book__link\" href=\"/ROOT%23%3F/content/zimfile_raycharles_uncategorized\" title=\"Preview\" aria-label=\"Preview\">\n" \
" <div class=\"book__description\" title=\"No category is assigned to this library entry.\">No category is assigned to this library entry.</div>\n" \ " <div class=\"book__description\" title=\"No category is assigned to this library entry.\">No category is assigned to this library entry.</div>\n" \
" </a>\n" \ " </a>\n" \
" </div>\n" \ " </div>\n" \
@ -1224,16 +1224,16 @@ TEST_F(LibraryServerTest, no_name_mapper_catalog_v2_individual_entry_access)
" <div class=\"downloadLinksTitle\">\n" \ " <div class=\"downloadLinksTitle\">\n" \
" Download links for <b><i>Ray (uncategorized) Charles</i></b>\n" \ " Download links for <b><i>Ray (uncategorized) Charles</i></b>\n" \
" </div>\n" \ " </div>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim\" download>\n" \ " <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim\" download>\n" \
" <div>Direct</div>\n" \ " <div>Direct</div>\n" \
" </a>\n" \ " </a>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim.sha256\" download>\n" \ " <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim.sha256\" download>\n" \
" <div>Sha256 hash</div>\n" \ " <div>Sha256 hash</div>\n" \
" </a>\n" \ " </a>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim.magnet\" target=\"_blank\">\n" \ " <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim.magnet\" target=\"_blank\">\n" \
" <div>Magnet link</div>\n" \ " <div>Magnet link</div>\n" \
" </a>\n" \ " </a>\n" \
" <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile.zim.torrent\" download>\n" \ " <a href=\"https://github.com/kiwix/libkiwix/raw/master/test/data/zimfile_raycharles_uncategorized.zim.torrent\" download>\n" \
" <div>Torrent file</div>\n" \ " <div>Torrent file</div>\n" \
" </a>\n" \ " </a>\n" \
"</body>\n" \ "</body>\n" \
@ -1273,7 +1273,7 @@ TEST_F(LibraryServerTest, noJS) {
FINAL_HTML_TEXT); FINAL_HTML_TEXT);
// no_js_download // no_js_download
r = zfs1_->GET("/ROOT%23%3F/nojs/download/zimfile"); r = zfs1_->GET("/ROOT%23%3F/nojs/download/zimfile_raycharles_uncategorized");
EXPECT_EQ(r->status, 200); EXPECT_EQ(r->status, 200);
EXPECT_EQ(r->body, RAY_CHARLES_UNCTZ_DOWNLOAD); EXPECT_EQ(r->body, RAY_CHARLES_UNCTZ_DOWNLOAD);
} }

View File

@ -38,6 +38,8 @@ if gtest_dep.found() and not meson.is_cross_build()
'example.zim', 'example.zim',
'zimfile.zim', 'zimfile.zim',
'zimfile&other.zim', 'zimfile&other.zim',
'zimfile_raycharles.zim',
'zimfile_raycharles_uncategorized.zim',
'corner_cases#&.zim', 'corner_cases#&.zim',
'poor.zim', 'poor.zim',
'library.xml', 'library.xml',

View File

@ -14,6 +14,12 @@ const char libraryXML[] = R"(
<book id="03" path="/data/ZERO thrêë.zim"> </book> <book id="03" path="/data/ZERO thrêë.zim"> </book>
<book id="04-2021-10" path="/data/zero_four_2021-10.zim"></book> <book id="04-2021-10" path="/data/zero_four_2021-10.zim"></book>
<book id="04-2021-11" path="/data/zero_four_2021-11.zim"></book> <book id="04-2021-11" path="/data/zero_four_2021-11.zim"></book>
<book id="05-a" path="/data/zero_five-a.zim" name="zero_five"></book>
<book id="05-b" path="/data/zero_five-b.zim" name="zero_five"></book>
<book id="06+" path="/data/zërô + SIX.zim"></book>
<book id="06plus" path="/data/zero_plus_six.zim"></book>
<book id="07-super" path="/data/zero_seven.zim"></book>
<book id="07-sub" path="/data/subdir/zero_seven.zim"></book>
</library> </library>
)"; )";
@ -55,6 +61,31 @@ public:
operator std::string() const { return buffer.str(); } operator std::string() const { return buffer.str(); }
}; };
const std::string ZERO_FOUR_NAME_CONFLICT_MSG =
"Path collision: '/data/zero_four_2021-10.zim' and"
" '/data/zero_four_2021-11.zim' can't share the same URL path 'zero_four'."
" Therefore, only '/data/zero_four_2021-10.zim' will be served.\n";
const std::string ZERO_SIX_NAME_CONFLICT_MSG =
"Path collision: '/data/zërô + SIX.zim' and "
"'/data/zero_plus_six.zim' can't share the same URL path 'zero_plus_six'."
" Therefore, only '/data/zërô + SIX.zim' will be served.\n";
const std::string ZERO_SEVEN_NAME_CONFLICT_MSG =
"Path collision: '/data/subdir/zero_seven.zim' and"
" '/data/zero_seven.zim' can't share the same URL path 'zero_seven'."
" Therefore, only '/data/subdir/zero_seven.zim' will be served.\n";
// Name conflicts in the default mode (without the --nodatealiases is off
const std::string DEFAULT_NAME_CONFLICTS = ZERO_SIX_NAME_CONFLICT_MSG
+ ZERO_SEVEN_NAME_CONFLICT_MSG;
// Name conflicts in --nodatealiases mode
const std::string ALL_NAME_CONFLICTS = ZERO_FOUR_NAME_CONFLICT_MSG
+ ZERO_SIX_NAME_CONFLICT_MSG
+ ZERO_SEVEN_NAME_CONFLICT_MSG;
} // unnamed namespace } // unnamed namespace
void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm) void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm)
@ -64,19 +95,37 @@ void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm)
EXPECT_EQ("zero_three", nm.getNameForId("03")); EXPECT_EQ("zero_three", nm.getNameForId("03"));
EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10"));
EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11")); EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11"));
EXPECT_EQ("zero_five-a", nm.getNameForId("05-a"));
EXPECT_EQ("zero_five-b", nm.getNameForId("05-b"));
// unreported conflict
EXPECT_EQ("zero_plus_six", nm.getNameForId("06+"));
EXPECT_EQ("zero_plus_six", nm.getNameForId("06plus"));
// unreported conflict
EXPECT_EQ("zero_seven", nm.getNameForId("07-super"));
EXPECT_EQ("zero_seven", nm.getNameForId("07-sub"));
EXPECT_EQ("01", nm.getIdForName("zero_one")); EXPECT_EQ("01", nm.getIdForName("zero_one"));
EXPECT_EQ("02", nm.getIdForName("zero_two")); EXPECT_EQ("02", nm.getIdForName("zero_two"));
EXPECT_EQ("03", nm.getIdForName("zero_three")); EXPECT_EQ("03", nm.getIdForName("zero_three"));
EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10"));
EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11")); EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11"));
// book name doesn't participate in name mapping
EXPECT_THROW(nm.getIdForName("zero_five"), std::out_of_range);
EXPECT_EQ("05-a", nm.getIdForName("zero_five-a"));
EXPECT_EQ("05-b", nm.getIdForName("zero_five-b"));
EXPECT_EQ("06+", nm.getIdForName("zero_plus_six"));
EXPECT_EQ("07-sub", nm.getIdForName("zero_seven"));
} }
TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases)
{ {
CapturedStderr stderror; CapturedStderr stderror;
kiwix::HumanReadableNameMapper nm(*lib, false); kiwix::HumanReadableNameMapper nm(*lib, false);
EXPECT_EQ("", std::string(stderror)); EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(stderror));
checkUnaliasedEntriesInNameMapper(nm); checkUnaliasedEntriesInNameMapper(nm);
EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range);
@ -91,12 +140,7 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases)
{ {
CapturedStderr stderror; CapturedStderr stderror;
kiwix::HumanReadableNameMapper nm(*lib, true); kiwix::HumanReadableNameMapper nm(*lib, true);
EXPECT_EQ( EXPECT_EQ(ALL_NAME_CONFLICTS, std::string(stderror));
"Path collision: /data/zero_four_2021-10.zim and"
" /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'."
" Therefore, only /data/zero_four_2021-10.zim will be served.\n"
, std::string(stderror)
);
checkUnaliasedEntriesInNameMapper(nm); checkUnaliasedEntriesInNameMapper(nm);
EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four"));
@ -111,7 +155,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithoutAliases)
{ {
CapturedStderr stderror; CapturedStderr stderror;
kiwix::UpdatableNameMapper nm(lib, false); kiwix::UpdatableNameMapper nm(lib, false);
EXPECT_EQ("", std::string(stderror)); EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(stderror));
checkUnaliasedEntriesInNameMapper(nm); checkUnaliasedEntriesInNameMapper(nm);
EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range);
@ -127,12 +171,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases)
{ {
CapturedStderr stderror; CapturedStderr stderror;
kiwix::UpdatableNameMapper nm(lib, true); kiwix::UpdatableNameMapper nm(lib, true);
EXPECT_EQ( EXPECT_EQ(ALL_NAME_CONFLICTS, std::string(stderror));
"Path collision: /data/zero_four_2021-10.zim and"
" /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'."
" Therefore, only /data/zero_four_2021-10.zim will be served.\n"
, std::string(stderror)
);
checkUnaliasedEntriesInNameMapper(nm); checkUnaliasedEntriesInNameMapper(nm);
EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four"));
@ -141,7 +180,7 @@ TEST_F(NameMapperTest, UpdatableNameMapperWithAliases)
CapturedStderr nmUpdateStderror; CapturedStderr nmUpdateStderror;
lib->removeBookById("04-2021-10"); lib->removeBookById("04-2021-10");
nm.update(); nm.update();
EXPECT_EQ("", std::string(nmUpdateStderror)); EXPECT_EQ(DEFAULT_NAME_CONFLICTS, std::string(nmUpdateStderror));
} }
EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four")); EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four"));
EXPECT_THROW(nm.getNameForId("04-2021-10"), std::out_of_range); EXPECT_THROW(nm.getNameForId("04-2021-10"), std::out_of_range);