From 0e48baf9f95d7b9c10f4f984a3e4609297a27b6c Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 20 Oct 2021 22:20:04 +0400 Subject: [PATCH 01/24] Simplified Library::getReaderById() Reused `Library::getArchiveById()` in `Library::getReaderById()`. --- src/library.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 1882574d6..c411f56f4 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -148,19 +148,11 @@ std::shared_ptr Library::getReaderById(const std::string& id) return m_readers.at(id); } catch (std::out_of_range& e) {} - try { - auto reader = make_shared(m_archives.at(id)); - m_readers[id] = reader; - return reader; - } catch (std::out_of_range& e) {} - - auto book = getBookById(id); - if (!book.isPathValid()) + const auto archive = getArchiveById(id); + if ( !archive ) return nullptr; - auto archive = make_shared(book.getPath()); - m_archives[id] = archive; - auto reader = make_shared(archive); + const auto reader = make_shared(archive); m_readers[id] = reader; return reader; } From 913a368a123743b691ea664938889f2cf3468c9e Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 20 Nov 2021 20:34:13 +0400 Subject: [PATCH 02/24] Made Manager's ctors explicit --- include/manager.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/manager.h b/include/manager.h index 49bc6c8d8..d9952a40a 100644 --- a/include/manager.h +++ b/include/manager.h @@ -62,8 +62,8 @@ class DefaultLibraryManipulator : public LibraryManipulator { class Manager { public: - Manager(LibraryManipulator* manipulator); - Manager(Library* library); + explicit Manager(LibraryManipulator* manipulator); + explicit Manager(Library* library); ~Manager(); /** From 571e417d1edaf7a11a630c1511d63c655c53b82b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 20 Nov 2021 20:38:39 +0400 Subject: [PATCH 03/24] Manager is now safe to copy --- include/manager.h | 5 ++--- src/manager.cpp | 24 +++++++++++++----------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/include/manager.h b/include/manager.h index d9952a40a..af12eac02 100644 --- a/include/manager.h +++ b/include/manager.h @@ -26,6 +26,7 @@ #include #include +#include namespace pugi { class xml_document; @@ -64,7 +65,6 @@ class Manager public: explicit Manager(LibraryManipulator* manipulator); explicit Manager(Library* library); - ~Manager(); /** * Read a `library.xml` and add book in the file to the library. @@ -150,8 +150,7 @@ class Manager uint64_t m_itemsPerPage = 0; protected: - kiwix::LibraryManipulator* manipulator; - bool mustDeleteManipulator; + std::shared_ptr manipulator; bool readBookFromPath(const std::string& path, Book* book); bool parseXmlDom(const pugi::xml_document& doc, diff --git a/src/manager.cpp b/src/manager.cpp index 550842cd0..dbd094c2b 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -26,28 +26,30 @@ namespace kiwix { + +namespace +{ + +struct NoDelete +{ + template void operator()(T*) {} +}; + +} // unnamed namespace + /* Constructor */ Manager::Manager(LibraryManipulator* manipulator): writableLibraryPath(""), - manipulator(manipulator), - mustDeleteManipulator(false) + manipulator(manipulator, NoDelete()) { } Manager::Manager(Library* library) : writableLibraryPath(""), - manipulator(new DefaultLibraryManipulator(library)), - mustDeleteManipulator(true) + manipulator(new DefaultLibraryManipulator(library)) { } -/* Destructor */ -Manager::~Manager() -{ - if (mustDeleteManipulator) { - delete manipulator; - } -} bool Manager::parseXmlDom(const pugi::xml_document& doc, bool readOnly, const std::string& libraryPath, From 3296a020a1d93e5e8a40fbbcb3c211fb5f3c225e Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 17:38:33 +0400 Subject: [PATCH 04/24] Testing of Book::getHumanReadableIdFromPath() New unit test BookTest.getHumanReadableIdFromPath revealed a bug in `Book::getHumanReadableIdFromPath()`. --- test/book.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/book.cpp b/test/book.cpp index f477b6765..2de31cfe5 100644 --- a/test/book.cpp +++ b/test/book.cpp @@ -178,3 +178,32 @@ TEST(BookTest, updateTest) EXPECT_EQ(newBook.getFavicon(), book.getFavicon()); EXPECT_EQ(newBook.getFaviconMimeType(), book.getFaviconMimeType()); } + +namespace +{ + +std::string path2HumanReadableId(const std::string& path) +{ + const XMLDoc xml(""); + + kiwix::Book book; + book.updateFromXml(xml.child("book"), "/data/zim"); + return book.getHumanReadableIdFromPath(); +} + +} // unnamed namespace + +TEST(BookTest, getHumanReadableIdFromPath) +{ + EXPECT_EQ("abc", path2HumanReadableId("abc.zim")); + EXPECT_EQ("abc", path2HumanReadableId("ABC.zim")); + EXPECT_EQ("abc", path2HumanReadableId("âbç.zim")); + EXPECT_EQ("ancient", path2HumanReadableId("ancient.zimbabwe")); + EXPECT_EQ("ab_cd", path2HumanReadableId("ab cd.zim")); +#ifdef _WIN32 + EXPECT_EQ("abc", path2HumanReadableId("C:\\Data\\ZIM\\abc.zim")); +#else + EXPECT_EQ("abc", path2HumanReadableId("/Data/ZIM/abc.zim")); +#endif + EXPECT_EQ("3plus2", path2HumanReadableId("3+2.zim")); +} From 339f845fb033f9d7fd42cbde33a90e79277690af Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 17:39:45 +0400 Subject: [PATCH 05/24] Bugfix in Book::getHumanReadableIdFromPath() --- src/book.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/book.cpp b/src/book.cpp index 63ebbec19..13128565e 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -190,7 +190,7 @@ std::string Book::getHumanReadableIdFromPath() const { std::string id = m_path; if (!id.empty()) { - kiwix::removeAccents(id); + id = kiwix::removeAccents(id); #ifdef _WIN32 id = replaceRegex(id, "", "^.*\\\\"); From d62c4fd5213a89ca8a116c5993685c95f11e8a8a Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 18:24:25 +0400 Subject: [PATCH 06/24] Testing of HumanReadableNameMapper --- test/meson.build | 1 + test/name_mapper.cpp | 109 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100644 test/name_mapper.cpp diff --git a/test/meson.build b/test/meson.build index 1502c12fb..30b19b8e6 100644 --- a/test/meson.build +++ b/test/meson.build @@ -8,6 +8,7 @@ tests = [ 'kiwixserve', 'book', 'manager', + 'name_mapper', 'opds_catalog', 'reader', 'searcher' diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp new file mode 100644 index 000000000..c6b6d6c3f --- /dev/null +++ b/test/name_mapper.cpp @@ -0,0 +1,109 @@ +#include "../include/name_mapper.h" + +#include "../include/library.h" +#include "../include/manager.h" +#include "gtest/gtest.h" + +namespace +{ + +const char libraryXML[] = R"( + + + + + + + +)"; + +class NameMapperTest : public ::testing::Test { + protected: + void SetUp() override { + kiwix::Manager manager(&lib); + manager.readXml(libraryXML, true, "./library.xml", true); + for ( const std::string& id : lib.getBooksIds() ) { + lib.getBookById(id).setPathValid(true); + } + } + + kiwix::Library lib; +}; + +class CapturedStderr +{ + std::ostringstream buffer; + std::streambuf* const sbuf; +public: + CapturedStderr() + : sbuf(std::cerr.rdbuf()) + { + std::cerr.rdbuf(buffer.rdbuf()); + } + + CapturedStderr(const CapturedStderr&) = delete; + + ~CapturedStderr() + { + std::cerr.rdbuf(sbuf); + } + + operator std::string() const { return buffer.str(); } +}; + +} // unnamed namespace + +TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) +{ + CapturedStderr stderror; + kiwix::HumanReadableNameMapper nm(lib, false); + EXPECT_EQ("", std::string(stderror)); + + EXPECT_EQ("zero_one", nm.getNameForId("01")); + EXPECT_EQ("zero_two", nm.getNameForId("02")); + EXPECT_EQ("zero_three", nm.getNameForId("03")); + EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); + EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11")); + + EXPECT_EQ("01", nm.getIdForName("zero_one")); + EXPECT_EQ("02", nm.getIdForName("zero_two")); + EXPECT_EQ("03", nm.getIdForName("zero_three")); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); + EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11")); + EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); + + lib.removeBookById("04-2021-10"); + EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); + EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); +} + +TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases) +{ + CapturedStderr stderror; + kiwix::HumanReadableNameMapper nm(lib, true); + EXPECT_EQ( + "Path collision: /data/zero_four_2021-10.zim and" + " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." + " Therefore, only /data/zero_four_2021-10.zim will be served.\n" + , std::string(stderror) + ); + + EXPECT_EQ("zero_one", nm.getNameForId("01")); + EXPECT_EQ("zero_two", nm.getNameForId("02")); + EXPECT_EQ("zero_three", nm.getNameForId("03")); + EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); + EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11")); + + EXPECT_EQ("01", nm.getIdForName("zero_one")); + EXPECT_EQ("02", nm.getIdForName("zero_two")); + EXPECT_EQ("03", nm.getIdForName("zero_three")); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); + EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11")); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); + + lib.removeBookById("04-2021-10"); + EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); +} From 5f3c34ed93d6d14a08c701e2c4a2feedb0670414 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 21:06:27 +0400 Subject: [PATCH 07/24] NameMapper's API is now const --- include/name_mapper.h | 14 ++++++-------- src/name_mapper.cpp | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/name_mapper.h b/include/name_mapper.h index 3df594c12..247a19b9f 100644 --- a/include/name_mapper.h +++ b/include/name_mapper.h @@ -31,15 +31,15 @@ class Library; class NameMapper { public: virtual ~NameMapper() = default; - virtual std::string getNameForId(const std::string& id) = 0; - virtual std::string getIdForName(const std::string& name) = 0; + virtual std::string getNameForId(const std::string& id) const = 0; + virtual std::string getIdForName(const std::string& name) const = 0; }; class IdNameMapper : public NameMapper { public: - virtual std::string getNameForId(const std::string& id) { return id; }; - virtual std::string getIdForName(const std::string& name) { return name; }; + virtual std::string getNameForId(const std::string& id) const { return id; }; + virtual std::string getIdForName(const std::string& name) const { return name; }; }; class HumanReadableNameMapper : public NameMapper { @@ -50,12 +50,10 @@ class HumanReadableNameMapper : public NameMapper { public: HumanReadableNameMapper(kiwix::Library& library, bool withAlias); virtual ~HumanReadableNameMapper() = default; - virtual std::string getNameForId(const std::string& id); - virtual std::string getIdForName(const std::string& name); + virtual std::string getNameForId(const std::string& id) const; + virtual std::string getIdForName(const std::string& name) const; }; - - } #endif diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index 394e329d0..d700f4310 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -51,11 +51,11 @@ HumanReadableNameMapper::HumanReadableNameMapper(kiwix::Library& library, bool w } } -std::string HumanReadableNameMapper::getNameForId(const std::string& id) { +std::string HumanReadableNameMapper::getNameForId(const std::string& id) const { return m_idToName.at(id); } -std::string HumanReadableNameMapper::getIdForName(const std::string& name) { +std::string HumanReadableNameMapper::getIdForName(const std::string& name) const { return m_nameToId.at(name); } From 4ccbdcb7404245bd7880da156f4e7b501afd0af0 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 18:51:32 +0400 Subject: [PATCH 08/24] Code deduplication in NameMapperTest --- test/name_mapper.cpp | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index c6b6d6c3f..b27306861 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -53,12 +53,8 @@ public: } // unnamed namespace -TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) +void checkUnaliasedEntriesInNameMapper(const kiwix::NameMapper& nm) { - CapturedStderr stderror; - kiwix::HumanReadableNameMapper nm(lib, false); - EXPECT_EQ("", std::string(stderror)); - EXPECT_EQ("zero_one", nm.getNameForId("01")); EXPECT_EQ("zero_two", nm.getNameForId("02")); EXPECT_EQ("zero_three", nm.getNameForId("03")); @@ -70,6 +66,15 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) EXPECT_EQ("03", nm.getIdForName("zero_three")); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11")); +} + +TEST_F(NameMapperTest, HumanReadableNameMapperWithoutAliases) +{ + CapturedStderr stderror; + kiwix::HumanReadableNameMapper nm(lib, false); + EXPECT_EQ("", std::string(stderror)); + + checkUnaliasedEntriesInNameMapper(nm); EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); lib.removeBookById("04-2021-10"); @@ -89,17 +94,7 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases) , std::string(stderror) ); - EXPECT_EQ("zero_one", nm.getNameForId("01")); - EXPECT_EQ("zero_two", nm.getNameForId("02")); - EXPECT_EQ("zero_three", nm.getNameForId("03")); - EXPECT_EQ("zero_four_2021-10", nm.getNameForId("04-2021-10")); - EXPECT_EQ("zero_four_2021-11", nm.getNameForId("04-2021-11")); - - EXPECT_EQ("01", nm.getIdForName("zero_one")); - EXPECT_EQ("02", nm.getIdForName("zero_two")); - EXPECT_EQ("03", nm.getIdForName("zero_three")); - EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); - EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four_2021-11")); + checkUnaliasedEntriesInNameMapper(nm); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); lib.removeBookById("04-2021-10"); From 8fffa599742e609c5a837b7eac25ce98c93a4a44 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 21:08:01 +0400 Subject: [PATCH 09/24] Added NameMapperProxy from kiwix/kiwix-desktop#714 The right place for NameMapperProxy introduced by kiwix/kiwix-desktop#714 is in libkiwix (so that it can be reused in kiwix-serve). --- include/name_mapper.h | 21 +++++++++++++++++++++ src/name_mapper.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/include/name_mapper.h b/include/name_mapper.h index 247a19b9f..e7770865b 100644 --- a/include/name_mapper.h +++ b/include/name_mapper.h @@ -22,6 +22,8 @@ #include #include +#include +#include namespace kiwix { @@ -54,6 +56,25 @@ class HumanReadableNameMapper : public NameMapper { virtual std::string getIdForName(const std::string& name) const; }; +class NameMapperProxy : public NameMapper { + typedef std::shared_ptr NameMapperHandle; + public: + explicit NameMapperProxy(Library& library); + + virtual std::string getNameForId(const std::string& id) const; + virtual std::string getIdForName(const std::string& name) const; + + void update(); + + private: + NameMapperHandle currentNameMapper() const; + + private: + mutable std::mutex mutex; + Library& library; + NameMapperHandle nameMapper; +}; + } #endif diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index d700f4310..b7ec24493 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -59,4 +59,45 @@ std::string HumanReadableNameMapper::getIdForName(const std::string& name) const return m_nameToId.at(name); } +//////////////////////////////////////////////////////////////////////////////// +// NameMapperProxy +//////////////////////////////////////////////////////////////////////////////// + +NameMapperProxy::NameMapperProxy(Library& lib) + : library(lib) +{ + update(); +} + +void NameMapperProxy::update() +{ + const auto newNameMapper = new HumanReadableNameMapper(library, false); + std::lock_guard lock(mutex); + nameMapper.reset(newNameMapper); +} + +NameMapperProxy::NameMapperHandle +NameMapperProxy::currentNameMapper() const +{ + // Return a copy of the handle to the current NameMapper object. It will + // ensure that the object survives any call to NameMapperProxy::update() + // made before the completion of any pending operation on that object. + std::lock_guard lock(mutex); + return nameMapper; +} + +std::string NameMapperProxy::getNameForId(const std::string& id) const +{ + // Ensure that the current nameMapper object survives a concurrent call + // to NameMapperProxy::update() + return currentNameMapper()->getNameForId(id); +} + +std::string NameMapperProxy::getIdForName(const std::string& name) const +{ + // Ensure that the current nameMapper object survives a concurrent call + // to NameMapperProxy::update() + return currentNameMapper()->getIdForName(name); +} + } From 6199c115051f9e8720e4610bddc203f5c66651a6 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 21 Nov 2021 19:58:20 +0400 Subject: [PATCH 10/24] NameMapperProxy respects the withAlias flag --- include/name_mapper.h | 3 ++- src/name_mapper.cpp | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/name_mapper.h b/include/name_mapper.h index e7770865b..cba4bbdc8 100644 --- a/include/name_mapper.h +++ b/include/name_mapper.h @@ -59,7 +59,7 @@ class HumanReadableNameMapper : public NameMapper { class NameMapperProxy : public NameMapper { typedef std::shared_ptr NameMapperHandle; public: - explicit NameMapperProxy(Library& library); + NameMapperProxy(Library& library, bool withAlias); virtual std::string getNameForId(const std::string& id) const; virtual std::string getIdForName(const std::string& name) const; @@ -73,6 +73,7 @@ class NameMapperProxy : public NameMapper { mutable std::mutex mutex; Library& library; NameMapperHandle nameMapper; + const bool withAlias; }; } diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index b7ec24493..eb82e873d 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -63,15 +63,16 @@ std::string HumanReadableNameMapper::getIdForName(const std::string& name) const // NameMapperProxy //////////////////////////////////////////////////////////////////////////////// -NameMapperProxy::NameMapperProxy(Library& lib) +NameMapperProxy::NameMapperProxy(Library& lib, bool withAlias) : library(lib) + , withAlias(withAlias) { update(); } void NameMapperProxy::update() { - const auto newNameMapper = new HumanReadableNameMapper(library, false); + const auto newNameMapper = new HumanReadableNameMapper(library, withAlias); std::lock_guard lock(mutex); nameMapper.reset(newNameMapper); } From 2d6a7fe88d4a11d25fa7b83c867e8a282bad8284 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Mon, 22 Nov 2021 18:52:25 +0400 Subject: [PATCH 11/24] Testing of NameMapperProxy --- test/name_mapper.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index b27306861..7410e28ac 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -102,3 +102,44 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases) EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four_2021-10")); EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); } + +TEST_F(NameMapperTest, NameMapperProxyWithoutAliases) +{ + CapturedStderr stderror; + kiwix::NameMapperProxy nm(lib, false); + EXPECT_EQ("", std::string(stderror)); + + checkUnaliasedEntriesInNameMapper(nm); + EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); + + lib.removeBookById("04-2021-10"); + nm.update(); + EXPECT_THROW(nm.getNameForId("04-2021-10"), std::out_of_range); + EXPECT_THROW(nm.getIdForName("zero_four_2021-10"), std::out_of_range); + EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); +} + +TEST_F(NameMapperTest, NameMapperProxyWithAliases) +{ + CapturedStderr stderror; + kiwix::NameMapperProxy nm(lib, true); + EXPECT_EQ( + "Path collision: /data/zero_four_2021-10.zim and" + " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." + " Therefore, only /data/zero_four_2021-10.zim will be served.\n" + , std::string(stderror) + ); + + checkUnaliasedEntriesInNameMapper(nm); + EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); + + { + CapturedStderr nmUpdateStderror; + lib.removeBookById("04-2021-10"); + nm.update(); + EXPECT_EQ("", std::string(nmUpdateStderror)); + } + EXPECT_EQ("04-2021-11", nm.getIdForName("zero_four")); + EXPECT_THROW(nm.getNameForId("04-2021-10"), std::out_of_range); + EXPECT_THROW(nm.getIdForName("zero_four_2021-10"), std::out_of_range); +} From 76a5e3a87766b274b55592c9ba1550b64a4a3650 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 20 Nov 2021 22:08:12 +0400 Subject: [PATCH 12/24] Library::addBook() updates the reader cache --- include/library.h | 1 + src/library.cpp | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/include/library.h b/include/library.h index 5c44cd8a2..9191b47d2 100644 --- a/include/library.h +++ b/include/library.h @@ -357,6 +357,7 @@ private: // functions std::vector getBookPropValueSet(BookStrPropMemFn p) const; BookIdCollection filterViaBookDB(const Filter& filter) const; void updateBookDB(const Book& book); + void dropReader(const std::string& bookId); }; } diff --git a/src/library.cpp b/src/library.cpp index c411f56f4..5caacc64c 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -49,6 +49,13 @@ std::string normalizeText(const std::string& text) return removeAccents(text); } +bool booksReferToTheSameArchive(const Book& book1, const Book& book2) +{ + return book1.isPathValid() + && book2.isPathValid() + && book1.getPath() == book2.getPath(); +} + } // unnamed namespace class Library::BookDB : public Xapian::WritableDatabase @@ -79,6 +86,9 @@ bool Library::addBook(const Book& book) updateBookDB(book); try { auto& oldbook = m_books.at(book.getId()); + if ( ! booksReferToTheSameArchive(oldbook, book) ) { + dropReader(book.getId()); + } oldbook.update(book); return false; } catch (std::out_of_range&) { @@ -104,12 +114,16 @@ bool Library::removeBookmark(const std::string& zimId, const std::string& url) } +void Library::dropReader(const std::string& id) +{ + m_readers.erase(id); + m_archives.erase(id); +} bool Library::removeBookById(const std::string& id) { m_bookDB->delete_document("Q" + id); - m_readers.erase(id); - m_archives.erase(id); + dropReader(id); return m_books.erase(id) == 1; } From 226dac260411d48a3a1f1c6c5346a20d6e94c22a Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 21 Nov 2021 17:47:50 +0400 Subject: [PATCH 13/24] LibraryManipulator is now merely a notifier Originally `LibraryManipulator` was an abstract class completely decoupled from `Library`. Its `addBookToLibrary()` and `addBookmarkToLibrary()` methods could be defined in an arbitrary way. Now `LibraryManipulator` has to be bound to a library object, those methods are no longer virtual, they always update the library and allow for some additional actions via virtual functions `bookWasAddedToLibrary()` and `bookmarkWasAddedToLibrary()`. --- include/manager.h | 35 ++++++++++++++++------------------- src/manager.cpp | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/include/manager.h b/include/manager.h index af12eac02..f80421b73 100644 --- a/include/manager.h +++ b/include/manager.h @@ -35,26 +35,23 @@ class xml_document; namespace kiwix { -class LibraryManipulator { - public: - virtual ~LibraryManipulator() {} - virtual bool addBookToLibrary(Book book) = 0; - virtual void addBookmarkToLibrary(Bookmark bookmark) = 0; -}; +class LibraryManipulator +{ + public: // functions + explicit LibraryManipulator(Library* library); + virtual ~LibraryManipulator(); -class DefaultLibraryManipulator : public LibraryManipulator { - public: - DefaultLibraryManipulator(Library* library) : - library(library) {} - virtual ~DefaultLibraryManipulator() {} - bool addBookToLibrary(Book book) { - return library->addBook(book); - } - void addBookmarkToLibrary(Bookmark bookmark) { - library->addBookmark(bookmark); - } - private: - kiwix::Library* library; + Library& getLibrary() const { return library; } + + bool addBookToLibrary(const Book& book); + void addBookmarkToLibrary(const Bookmark& bookmark); + + protected: // overrides + virtual void bookWasAddedToLibrary(const Book& book); + virtual void bookmarkWasAddedToLibrary(const Bookmark& bookmark); + + private: // data + kiwix::Library& library; }; /** diff --git a/src/manager.cpp b/src/manager.cpp index dbd094c2b..f44ec9b53 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -37,6 +37,44 @@ struct NoDelete } // unnamed namespace +//////////////////////////////////////////////////////////////////////////////// +// LibraryManipulator +//////////////////////////////////////////////////////////////////////////////// + +LibraryManipulator::LibraryManipulator(Library* library) + : library(*library) +{} + +LibraryManipulator::~LibraryManipulator() +{} + +bool LibraryManipulator::addBookToLibrary(const Book& book) +{ + const auto ret = library.addBook(book); + if ( ret ) { + bookWasAddedToLibrary(book); + } + return ret; +} + +void LibraryManipulator::addBookmarkToLibrary(const Bookmark& bookmark) +{ + library.addBookmark(bookmark); + bookmarkWasAddedToLibrary(bookmark); +} + +void LibraryManipulator::bookWasAddedToLibrary(const Book& book) +{ +} + +void LibraryManipulator::bookmarkWasAddedToLibrary(const Bookmark& bookmark) +{ +} + +//////////////////////////////////////////////////////////////////////////////// +// Manager +//////////////////////////////////////////////////////////////////////////////// + /* Constructor */ Manager::Manager(LibraryManipulator* manipulator): writableLibraryPath(""), @@ -46,7 +84,7 @@ Manager::Manager(LibraryManipulator* manipulator): Manager::Manager(Library* library) : writableLibraryPath(""), - manipulator(new DefaultLibraryManipulator(library)) + manipulator(new LibraryManipulator(library)) { } From 3aeeeeee763053a412c3f9bea47f9821f756d13f Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 27 Nov 2021 21:40:12 +0400 Subject: [PATCH 14/24] Manager::reload() --- include/manager.h | 15 ++++++++++++++- src/manager.cpp | 14 ++++++++++++++ test/manager.cpp | 26 ++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/include/manager.h b/include/manager.h index f80421b73..13fe19f10 100644 --- a/include/manager.h +++ b/include/manager.h @@ -59,7 +59,10 @@ class LibraryManipulator */ class Manager { - public: + public: // types + typedef std::vector Paths; + + public: // functions explicit Manager(LibraryManipulator* manipulator); explicit Manager(Library* library); @@ -69,10 +72,20 @@ class Manager * @param path The (utf8) path to the `library.xml`. * @param readOnly Set if the libray path could be overwritten latter with * updated content. + * @param trustLibrary use book metadata coming from XML. * @return True if file has been properly parsed. */ bool readFile(const std::string& path, bool readOnly = true, bool trustLibrary = true); + /** + * Sync the contents of the library with one or more `library.xml` files. + * + * The metadata of the library files is trusted unconditionally. + * + * @param paths The (utf8) paths to the `library.xml` files. + */ + void reload(const Paths& paths); + /** * Load a library content store in the string. * diff --git a/src/manager.cpp b/src/manager.cpp index f44ec9b53..b65353542 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -289,4 +289,18 @@ bool Manager::readBookmarkFile(const std::string& path) return true; } +void Manager::reload(const Paths& paths) +{ + for (std::string path : paths) { + if (!path.empty()) { + if ( kiwix::isRelativePath(path) ) + path = kiwix::computeAbsolutePath(kiwix::getCurrentDirectory(), path); + + if (!readFile(path, false, true)) { + throw std::runtime_error("Failed to load the XML library file '" + path + "'."); + } + } + } +} + } diff --git a/test/manager.cpp b/test/manager.cpp index 09647c766..34774f630 100644 --- a/test/manager.cpp +++ b/test/manager.cpp @@ -67,3 +67,29 @@ TEST(ManagerTest, readXml) EXPECT_EQ(45U, book.getMediaCount()); EXPECT_EQ(678U*1024, book.getSize()); } + +TEST(Manager, reload) +{ + kiwix::Library lib; + kiwix::Manager manager(&lib); + + manager.reload({ "./test/library.xml" }); + EXPECT_EQ(lib.getBooksIds(), (kiwix::Library::BookIdCollection{ + "charlesray", + "raycharles", + "raycharles_uncategorized" + })); + + lib.removeBookById("raycharles"); + EXPECT_EQ(lib.getBooksIds(), (kiwix::Library::BookIdCollection{ + "charlesray", + "raycharles_uncategorized" + })); + + manager.reload({ "./test/library.xml" }); + EXPECT_EQ(lib.getBooksIds(), kiwix::Library::BookIdCollection({ + "charlesray", + "raycharles", + "raycharles_uncategorized" + })); +} From 298247ca9b7243de5d490f52b3a29d7faa52bf10 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sat, 27 Nov 2021 21:47:24 +0400 Subject: [PATCH 15/24] Renamed NameMapperProxy -> UpdatableNameMapper --- include/name_mapper.h | 4 ++-- src/name_mapper.cpp | 20 ++++++++++---------- test/name_mapper.cpp | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/name_mapper.h b/include/name_mapper.h index cba4bbdc8..4247c5c3c 100644 --- a/include/name_mapper.h +++ b/include/name_mapper.h @@ -56,10 +56,10 @@ class HumanReadableNameMapper : public NameMapper { virtual std::string getIdForName(const std::string& name) const; }; -class NameMapperProxy : public NameMapper { +class UpdatableNameMapper : public NameMapper { typedef std::shared_ptr NameMapperHandle; public: - NameMapperProxy(Library& library, bool withAlias); + UpdatableNameMapper(Library& library, bool withAlias); virtual std::string getNameForId(const std::string& id) const; virtual std::string getIdForName(const std::string& name) const; diff --git a/src/name_mapper.cpp b/src/name_mapper.cpp index eb82e873d..dccf40c9b 100644 --- a/src/name_mapper.cpp +++ b/src/name_mapper.cpp @@ -60,44 +60,44 @@ std::string HumanReadableNameMapper::getIdForName(const std::string& name) const } //////////////////////////////////////////////////////////////////////////////// -// NameMapperProxy +// UpdatableNameMapper //////////////////////////////////////////////////////////////////////////////// -NameMapperProxy::NameMapperProxy(Library& lib, bool withAlias) +UpdatableNameMapper::UpdatableNameMapper(Library& lib, bool withAlias) : library(lib) , withAlias(withAlias) { update(); } -void NameMapperProxy::update() +void UpdatableNameMapper::update() { const auto newNameMapper = new HumanReadableNameMapper(library, withAlias); std::lock_guard lock(mutex); nameMapper.reset(newNameMapper); } -NameMapperProxy::NameMapperHandle -NameMapperProxy::currentNameMapper() const +UpdatableNameMapper::NameMapperHandle +UpdatableNameMapper::currentNameMapper() const { // Return a copy of the handle to the current NameMapper object. It will - // ensure that the object survives any call to NameMapperProxy::update() + // ensure that the object survives any call to UpdatableNameMapper::update() // made before the completion of any pending operation on that object. std::lock_guard lock(mutex); return nameMapper; } -std::string NameMapperProxy::getNameForId(const std::string& id) const +std::string UpdatableNameMapper::getNameForId(const std::string& id) const { // Ensure that the current nameMapper object survives a concurrent call - // to NameMapperProxy::update() + // to UpdatableNameMapper::update() return currentNameMapper()->getNameForId(id); } -std::string NameMapperProxy::getIdForName(const std::string& name) const +std::string UpdatableNameMapper::getIdForName(const std::string& name) const { // Ensure that the current nameMapper object survives a concurrent call - // to NameMapperProxy::update() + // to UpdatableNameMapper::update() return currentNameMapper()->getIdForName(name); } diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index 7410e28ac..2a24f329b 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -103,10 +103,10 @@ TEST_F(NameMapperTest, HumanReadableNameMapperWithAliases) EXPECT_EQ("04-2021-10", nm.getIdForName("zero_four")); } -TEST_F(NameMapperTest, NameMapperProxyWithoutAliases) +TEST_F(NameMapperTest, UpdatableNameMapperWithoutAliases) { CapturedStderr stderror; - kiwix::NameMapperProxy nm(lib, false); + kiwix::UpdatableNameMapper nm(lib, false); EXPECT_EQ("", std::string(stderror)); checkUnaliasedEntriesInNameMapper(nm); @@ -119,10 +119,10 @@ TEST_F(NameMapperTest, NameMapperProxyWithoutAliases) EXPECT_THROW(nm.getIdForName("zero_four"), std::out_of_range); } -TEST_F(NameMapperTest, NameMapperProxyWithAliases) +TEST_F(NameMapperTest, UpdatableNameMapperWithAliases) { CapturedStderr stderror; - kiwix::NameMapperProxy nm(lib, true); + kiwix::UpdatableNameMapper nm(lib, true); EXPECT_EQ( "Path collision: /data/zero_four_2021-10.zim and" " /data/zero_four_2021-11.zim can't share the same URL path 'zero_four'." From b712c732f26e4c598650836dd9691c2885cd93c4 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 28 Nov 2021 13:48:04 +0400 Subject: [PATCH 16/24] Dropped Library::getBookBy*() non-const functions --- include/library.h | 2 -- src/library.cpp | 12 ------------ test/library.cpp | 5 +++-- test/name_mapper.cpp | 6 ++++-- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/include/library.h b/include/library.h index 9191b47d2..7211a843f 100644 --- a/include/library.h +++ b/include/library.h @@ -197,9 +197,7 @@ class Library bool removeBookmark(const std::string& zimId, const std::string& url); const Book& getBookById(const std::string& id) const; - Book& getBookById(const std::string& id); const Book& getBookByPath(const std::string& path) const; - Book& getBookByPath(const std::string& path); std::shared_ptr getReaderById(const std::string& id); std::shared_ptr getArchiveById(const std::string& id); diff --git a/src/library.cpp b/src/library.cpp index 5caacc64c..e7afad696 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -132,12 +132,6 @@ const Book& Library::getBookById(const std::string& id) const return m_books.at(id); } -Book& Library::getBookById(const std::string& id) -{ - const Library& const_self = *this; - return const_cast(const_self.getBookById(id)); -} - const Book& Library::getBookByPath(const std::string& path) const { for(auto& it: m_books) { @@ -150,12 +144,6 @@ const Book& Library::getBookByPath(const std::string& path) const throw std::out_of_range(ss.str()); } -Book& Library::getBookByPath(const std::string& path) -{ - const Library& const_self = *this; - return const_cast(const_self.getBookByPath(path)); -} - std::shared_ptr Library::getReaderById(const std::string& id) { try { diff --git a/test/library.cpp b/test/library.cpp index 77780ba7b..e06026318 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -232,7 +232,7 @@ class LibraryTest : public ::testing::Test { void SetUp() override { kiwix::Manager manager(&lib); manager.readOpds(sampleOpdsStream, "foo.urlHost"); - manager.readXml(sampleLibraryXML, true, "./test/library.xml", true); + manager.readXml(sampleLibraryXML, false, "./test/library.xml", true); } kiwix::Bookmark createBookmark(const std::string &id) { @@ -660,13 +660,14 @@ TEST_F(LibraryTest, filterByMultipleCriteria) TEST_F(LibraryTest, getBookByPath) { - auto& book = lib.getBookById(lib.getBooksIds()[0]); + kiwix::Book book = lib.getBookById(lib.getBooksIds()[0]); #ifdef _WIN32 auto path = "C:\\some\\abs\\path.zim"; #else auto path = "/some/abs/path.zim"; #endif book.setPath(path); + lib.addBook(book); EXPECT_EQ(lib.getBookByPath(path).getId(), book.getId()); EXPECT_THROW(lib.getBookByPath("non/existant/path.zim"), std::out_of_range); } diff --git a/test/name_mapper.cpp b/test/name_mapper.cpp index 2a24f329b..0bc60254a 100644 --- a/test/name_mapper.cpp +++ b/test/name_mapper.cpp @@ -21,9 +21,11 @@ class NameMapperTest : public ::testing::Test { protected: void SetUp() override { kiwix::Manager manager(&lib); - manager.readXml(libraryXML, true, "./library.xml", true); + manager.readXml(libraryXML, false, "./library.xml", true); for ( const std::string& id : lib.getBooksIds() ) { - lib.getBookById(id).setPathValid(true); + kiwix::Book bookCopy = lib.getBookById(id); + bookCopy.setPathValid(true); + lib.addBook(bookCopy); } } From c2927ce6f703bd8c050380049678d7bcee222bbc Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 28 Nov 2021 18:01:57 +0400 Subject: [PATCH 17/24] Library got a yet unused mutex Introducing a mutex in `Library` necessitates manually implementing the move constructor and assignment operator. It's better to still delegate that work to the compiler to eliminate any possibility of bugs when new data members are added to `Library`. The trick is to move the data into an auxiliary class `LibraryBase` and derive `Library` from it. --- include/library.h | 25 ++++++++++++++++++++++--- src/library.cpp | 28 +++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/include/library.h b/include/library.h index 7211a843f..046329742 100644 --- a/include/library.h +++ b/include/library.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "book.h" @@ -139,12 +140,14 @@ private: // functions bool accept(const Book& book) const; }; - /** - * A Library store several books. + * This class is not part of the libkiwix API. Its only purpose is + * to simplify the implementation of the Library's move operations + * and avoid bugs should new data members be added to Library. */ -class Library +class LibraryBase { +protected: // data std::map m_books; std::map> m_readers; std::map> m_archives; @@ -152,6 +155,22 @@ class Library class BookDB; std::unique_ptr m_bookDB; +protected: // functions + LibraryBase(); + ~LibraryBase(); + + LibraryBase(LibraryBase&& ); + LibraryBase& operator=(LibraryBase&& ); +}; + +/** + * A Library store several books. + */ +class Library : private LibraryBase +{ + // all data fields must be added in LibraryBase + mutable std::mutex m_mutex; + public: typedef std::vector BookIdCollection; typedef std::map AttributeCounts; diff --git a/src/library.cpp b/src/library.cpp index e7afad696..616e004b1 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -58,21 +58,39 @@ bool booksReferToTheSameArchive(const Book& book1, const Book& book2) } // unnamed namespace -class Library::BookDB : public Xapian::WritableDatabase +class LibraryBase::BookDB : public Xapian::WritableDatabase { public: BookDB() : Xapian::WritableDatabase("", Xapian::DB_BACKEND_INMEMORY) {} }; -/* Constructor */ -Library::Library() +LibraryBase::LibraryBase() : m_bookDB(new BookDB) { } -Library::Library(Library&& ) = default; +LibraryBase::~LibraryBase() +{ +} -Library& Library::operator=(Library&& ) = default; +LibraryBase::LibraryBase(LibraryBase&& ) = default; +LibraryBase& LibraryBase::operator=(LibraryBase&& ) = default; + +/* Constructor */ +Library::Library() +{ +} + +Library::Library(Library&& other) + : LibraryBase(std::move(other)) +{ +} + +Library& Library::operator=(Library&& other) +{ + LibraryBase::operator=(std::move(other)); + return *this; +} /* Destructor */ Library::~Library() From 02b9e32d18650b2670a89e8753fdef517718e22c Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 28 Nov 2021 18:50:07 +0400 Subject: [PATCH 18/24] Library became almost thread-safe Library became thread-safe with the exception of `getBookById()` and `getBookByPath()` methods - thread safety in those accessors is rendered meaningless by their return type (they return a reference to a book which can be removed any time later by another thread). --- include/library.h | 3 +++ src/library.cpp | 42 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/include/library.h b/include/library.h index 046329742..f82f6c813 100644 --- a/include/library.h +++ b/include/library.h @@ -215,8 +215,11 @@ class Library : private LibraryBase */ bool removeBookmark(const std::string& zimId, const std::string& url); + // XXX: This is a non-thread-safe operation const Book& getBookById(const std::string& id) const; + // XXX: This is a non-thread-safe operation const Book& getBookByPath(const std::string& path) const; + std::shared_ptr getReaderById(const std::string& id); std::shared_ptr getArchiveById(const std::string& id); diff --git a/src/library.cpp b/src/library.cpp index 616e004b1..15c03a81e 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -100,6 +100,7 @@ Library::~Library() bool Library::addBook(const Book& book) { + std::lock_guard lock(m_mutex); /* Try to find it */ updateBookDB(book); try { @@ -117,11 +118,13 @@ bool Library::addBook(const Book& book) void Library::addBookmark(const Bookmark& bookmark) { + std::lock_guard lock(m_mutex); m_bookmarks.push_back(bookmark); } bool Library::removeBookmark(const std::string& zimId, const std::string& url) { + std::lock_guard lock(m_mutex); for(auto it=m_bookmarks.begin(); it!=m_bookmarks.end(); it++) { if (it->getBookId() == zimId && it->getUrl() == url) { m_bookmarks.erase(it); @@ -140,6 +143,7 @@ void Library::dropReader(const std::string& id) bool Library::removeBookById(const std::string& id) { + std::lock_guard lock(m_mutex); m_bookDB->delete_document("Q" + id); dropReader(id); return m_books.erase(id) == 1; @@ -147,11 +151,15 @@ bool Library::removeBookById(const std::string& id) const Book& Library::getBookById(const std::string& id) const { + // XXX: Doesn't make sense to lock this operation since it cannot + // XXX: guarantee thread-safety because of its return type return m_books.at(id); } const Book& Library::getBookByPath(const std::string& path) const { + // XXX: Doesn't make sense to lock this operation since it cannot + // XXX: guarantee thread-safety because of its return type for(auto& it: m_books) { auto& book = it.second; if (book.getPath() == path) @@ -165,6 +173,7 @@ const Book& Library::getBookByPath(const std::string& path) const std::shared_ptr Library::getReaderById(const std::string& id) { try { + std::lock_guard lock(m_mutex); return m_readers.at(id); } catch (std::out_of_range& e) {} @@ -173,12 +182,14 @@ std::shared_ptr Library::getReaderById(const std::string& id) return nullptr; const auto reader = make_shared(archive); + std::lock_guard lock(m_mutex); m_readers[id] = reader; return reader; } std::shared_ptr Library::getArchiveById(const std::string& id) { + std::lock_guard lock(m_mutex); try { return m_archives.at(id); } catch (std::out_of_range& e) {} @@ -195,6 +206,7 @@ std::shared_ptr Library::getArchiveById(const std::string& id) unsigned int Library::getBookCount(const bool localBooks, const bool remoteBooks) const { + std::lock_guard lock(m_mutex); unsigned int result = 0; for (auto& pair: m_books) { auto& book = pair.second; @@ -208,20 +220,33 @@ unsigned int Library::getBookCount(const bool localBooks, bool Library::writeToFile(const std::string& path) const { + const auto allBookIds = getBooksIds(); + auto baseDir = removeLastPathElement(path); LibXMLDumper dumper(this); dumper.setBaseDir(baseDir); - return writeTextFile(path, dumper.dumpLibXMLContent(getBooksIds())); + std::string xml; + { + std::lock_guard lock(m_mutex); + xml = dumper.dumpLibXMLContent(allBookIds); + }; + return writeTextFile(path, xml); } bool Library::writeBookmarksToFile(const std::string& path) const { LibXMLDumper dumper(this); - return writeTextFile(path, dumper.dumpLibXMLBookmark()); + std::string xml; + { + std::lock_guard lock(m_mutex); + xml = dumper.dumpLibXMLBookmark(); + }; + return writeTextFile(path, xml); } Library::AttributeCounts Library::getBookAttributeCounts(BookStrPropMemFn p) const { + std::lock_guard lock(m_mutex); AttributeCounts propValueCounts; for (const auto& pair: m_books) { @@ -254,6 +279,7 @@ Library::AttributeCounts Library::getBooksLanguagesWithCounts() const std::vector Library::getBooksCategories() const { + std::lock_guard lock(m_mutex); std::set categories; for (const auto& pair: m_books) { @@ -284,6 +310,7 @@ const std::vector Library::getBookmarks(bool onlyValidBookmarks } std::vector validBookmarks; auto booksId = getBooksIds(); + std::lock_guard lock(m_mutex); for(auto& bookmark:m_bookmarks) { if (std::find(booksId.begin(), booksId.end(), bookmark.getBookId()) != booksId.end()) { validBookmarks.push_back(bookmark); @@ -294,6 +321,7 @@ const std::vector Library::getBookmarks(bool onlyValidBookmarks Library::BookIdCollection Library::getBooksIds() const { + std::lock_guard lock(m_mutex); BookIdCollection bookIds; for (auto& pair: m_books) { @@ -483,6 +511,7 @@ Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) const BookIdCollection bookIds; + std::lock_guard lock(m_mutex); Xapian::Enquire enquire(*m_bookDB); enquire.set_query(query); const auto results = enquire.get_mset(0, m_books.size()); @@ -496,7 +525,9 @@ Library::BookIdCollection Library::filterViaBookDB(const Filter& filter) const Library::BookIdCollection Library::filter(const Filter& filter) const { BookIdCollection result; - for(auto id : filterViaBookDB(filter)) { + const auto preliminaryResult = filterViaBookDB(filter); + std::lock_guard lock(m_mutex); + for(auto id : preliminaryResult) { if(filter.accept(m_books.at(id))) { result.push_back(id); } @@ -565,6 +596,11 @@ std::string Comparator::get_key(const std::string& id) void Library::sort(BookIdCollection& bookIds, supportedListSortBy sort, bool ascending) const { + // NOTE: Can reimplement this method in a way that doesn't require locking + // NOTE: for the entire duration of the sort. Will need to obtain (under a + // NOTE: lock) the required atributes from the books once, and then the + // NOTE: sorting will run on a copy of data without locking. + std::lock_guard lock(m_mutex); switch(sort) { case TITLE: std::sort(bookIds.begin(), bookIds.end(), Comparator(this, ascending)); From 473d2d2a6983a476466a7ec82140bd82ff19ff01 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan <veloman.yunkan@gmail.com> Date: Sun, 28 Nov 2021 19:49:31 +0400 Subject: [PATCH 19/24] Introduced Library::getBookByIdThreadSafe() --- include/library.h | 2 ++ src/library.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/include/library.h b/include/library.h index f82f6c813..3ef7b5248 100644 --- a/include/library.h +++ b/include/library.h @@ -220,6 +220,8 @@ class Library : private LibraryBase // XXX: This is a non-thread-safe operation const Book& getBookByPath(const std::string& path) const; + Book getBookByIdThreadSafe(const std::string& id) const; + std::shared_ptr<Reader> getReaderById(const std::string& id); std::shared_ptr<zim::Archive> getArchiveById(const std::string& id); diff --git a/src/library.cpp b/src/library.cpp index 15c03a81e..55ed39cd2 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -156,6 +156,12 @@ const Book& Library::getBookById(const std::string& id) const return m_books.at(id); } +Book Library::getBookByIdThreadSafe(const std::string& id) const +{ + std::lock_guard<std::mutex> lock(m_mutex); + return getBookById(id); +} + const Book& Library::getBookByPath(const std::string& path) const { // XXX: Doesn't make sense to lock this operation since it cannot From ad2eb5255343fde825b396406a3f5a14a9b769c2 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan <veloman.yunkan@gmail.com> Date: Sun, 28 Nov 2021 19:53:48 +0400 Subject: [PATCH 20/24] Thread safe dumping of the OPDS feed --- src/opds_dumper.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/opds_dumper.cpp b/src/opds_dumper.cpp index 7d463f714..3570633c2 100644 --- a/src/opds_dumper.cpp +++ b/src/opds_dumper.cpp @@ -108,10 +108,15 @@ BooksData getBooksData(const Library* library, const std::vector<std::string>& b { BooksData booksData; for ( const auto& bookId : bookIds ) { - const Book& book = library->getBookById(bookId); - booksData.push_back(kainjow::mustache::object{ - {"entry", getSingleBookEntryXML(book, false, endpointRoot, partial)} - }); + try { + const Book book = library->getBookByIdThreadSafe(bookId); + booksData.push_back(kainjow::mustache::object{ + {"entry", getSingleBookEntryXML(book, false, endpointRoot, partial)} + }); + } catch ( const std::out_of_range& ) { + // the book was removed from the library since its id was obtained + // ignore it + } } return booksData; From 1d5383435d5da63c00dbfb1e8f40e2d8d606e731 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan <veloman.yunkan@gmail.com> Date: Sun, 28 Nov 2021 20:04:10 +0400 Subject: [PATCH 21/24] Noted a potential bug in Library::addBook() --- src/library.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/library.cpp b/src/library.cpp index 55ed39cd2..3518e2b0c 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -108,7 +108,9 @@ bool Library::addBook(const Book& book) if ( ! booksReferToTheSameArchive(oldbook, book) ) { dropReader(book.getId()); } - oldbook.update(book); + oldbook.update(book); // XXX: This may have no effect if oldbook is readonly + // XXX: Then m_bookDB will become out-of-sync with + // XXX: the real contents of the library. return false; } catch (std::out_of_range&) { m_books[book.getId()] = book; From 262e13845cd2c41abb9030b694a193ae3e30e273 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan <veloman.yunkan@gmail.com> Date: Sun, 28 Nov 2021 20:40:52 +0400 Subject: [PATCH 22/24] Enter Library::removeBooksNotUpdatedSince() --- include/library.h | 33 ++++++++++++++++++++++++++++++++- src/library.cpp | 32 +++++++++++++++++++++++++++++++- test/library.cpp | 31 +++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 2 deletions(-) diff --git a/include/library.h b/include/library.h index 3ef7b5248..c5054dcc7 100644 --- a/include/library.h +++ b/include/library.h @@ -147,8 +147,20 @@ private: // functions */ class LibraryBase { +protected: // types + typedef uint64_t LibraryRevision; + + struct Entry : Book + { + LibraryRevision lastUpdatedRevision = 0; + + // May also keep the Archive and Reader pointers here and get + // rid of the m_readers and m_archives data members in Library + }; + protected: // data - std::map<std::string, kiwix::Book> m_books; + LibraryRevision m_revision; + std::map<std::string, Entry> m_books; std::map<std::string, std::shared_ptr<Reader>> m_readers; std::map<std::string, std::shared_ptr<zim::Archive>> m_archives; std::vector<kiwix::Bookmark> m_bookmarks; @@ -172,6 +184,7 @@ class Library : private LibraryBase mutable std::mutex m_mutex; public: + typedef LibraryRevision Revision; typedef std::vector<std::string> BookIdCollection; typedef std::map<std::string, int> AttributeCounts; @@ -368,6 +381,24 @@ class Library : private LibraryBase const std::vector<std::string>& tags = {}, size_t maxSize = 0) const; + /** + * Return the current revision of the library. + * + * The revision of the library is updated (incremented by one) only by + * the addBook() operation. + * + * @return Current revision of the library. + */ + LibraryRevision getRevision() const; + + /** + * Remove books that have not been updated since the specified revision. + * + * @param rev the library revision to use + * @return Count of books that were removed by this operation. + */ + uint32_t removeBooksNotUpdatedSince(LibraryRevision rev); + friend class OPDSDumper; friend class libXMLDumper; diff --git a/src/library.cpp b/src/library.cpp index 3518e2b0c..79ccbf044 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -101,6 +101,7 @@ Library::~Library() bool Library::addBook(const Book& book) { std::lock_guard<std::mutex> lock(m_mutex); + ++m_revision; /* Try to find it */ updateBookDB(book); try { @@ -111,9 +112,12 @@ bool Library::addBook(const Book& book) oldbook.update(book); // XXX: This may have no effect if oldbook is readonly // XXX: Then m_bookDB will become out-of-sync with // XXX: the real contents of the library. + oldbook.lastUpdatedRevision = m_revision; return false; } catch (std::out_of_range&) { - m_books[book.getId()] = book; + Entry& newEntry = m_books[book.getId()]; + static_cast<Book&>(newEntry) = book; + newEntry.lastUpdatedRevision = m_revision; return true; } } @@ -151,6 +155,32 @@ bool Library::removeBookById(const std::string& id) return m_books.erase(id) == 1; } +Library::Revision Library::getRevision() const +{ + std::lock_guard<std::mutex> lock(m_mutex); + return m_revision; +} + +uint32_t Library::removeBooksNotUpdatedSince(LibraryRevision libraryRevision) +{ + BookIdCollection booksToRemove; + { + std::lock_guard<std::mutex> lock(m_mutex); + for ( const auto& entry : m_books) { + if ( entry.second.lastUpdatedRevision <= libraryRevision ) { + booksToRemove.push_back(entry.first); + } + } + } + + uint32_t countOfRemovedBooks = 0; + for ( const auto& id : booksToRemove ) { + if ( removeBookById(id) ) + ++countOfRemovedBooks; + } + return countOfRemovedBooks; +} + const Book& Library::getBookById(const std::string& id) const { // XXX: Doesn't make sense to lock this operation since it cannot diff --git a/test/library.cpp b/test/library.cpp index e06026318..a8d99e896 100644 --- a/test/library.cpp +++ b/test/library.cpp @@ -707,4 +707,35 @@ TEST_F(LibraryTest, removeBookByIdUpdatesTheSearchDB) EXPECT_THROW(lib.getBookById("raycharles"), std::out_of_range); }; +TEST_F(LibraryTest, removeBooksNotUpdatedSince) +{ + EXPECT_FILTER_RESULTS(kiwix::Filter(), + "An example ZIM archive", + "Encyclopédie de la Tunisie", + "Granblue Fantasy Wiki", + "Géographie par Wikipédia", + "Islam Stack Exchange", + "Mathématiques", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange", + "Ray Charles", + "TED talks - Business", + "Tania Louis", + "Wikiquote" + ); + + const uint64_t rev = lib.getRevision(); + for ( const auto& id : lib.filter(kiwix::Filter().query("exchange")) ) { + lib.addBook(lib.getBookByIdThreadSafe(id)); + } + + EXPECT_EQ(9u, lib.removeBooksNotUpdatedSince(rev)); + + EXPECT_FILTER_RESULTS(kiwix::Filter(), + "Islam Stack Exchange", + "Movies & TV Stack Exchange", + "Mythology & Folklore Stack Exchange", + ); +}; + }; From 7161db8e2ab4b4a56b93fee72aad1f97fdd11614 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan <veloman.yunkan@gmail.com> Date: Sun, 28 Nov 2021 20:55:09 +0400 Subject: [PATCH 23/24] Manager::reload() also removes books from Library --- include/manager.h | 4 ++++ src/manager.cpp | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/include/manager.h b/include/manager.h index 13fe19f10..3023a81a2 100644 --- a/include/manager.h +++ b/include/manager.h @@ -45,10 +45,12 @@ class LibraryManipulator bool addBookToLibrary(const Book& book); void addBookmarkToLibrary(const Bookmark& bookmark); + uint32_t removeBooksNotUpdatedSince(Library::Revision rev); protected: // overrides virtual void bookWasAddedToLibrary(const Book& book); virtual void bookmarkWasAddedToLibrary(const Bookmark& bookmark); + virtual void booksWereRemovedFromLibrary(); private: // data kiwix::Library& library; @@ -81,6 +83,8 @@ class Manager * Sync the contents of the library with one or more `library.xml` files. * * The metadata of the library files is trusted unconditionally. + * Any books not present in the input library.xml files are removed + * from the library. * * @param paths The (utf8) paths to the `library.xml` files. */ diff --git a/src/manager.cpp b/src/manager.cpp index b65353542..3f24fad17 100644 --- a/src/manager.cpp +++ b/src/manager.cpp @@ -63,6 +63,15 @@ void LibraryManipulator::addBookmarkToLibrary(const Bookmark& bookmark) bookmarkWasAddedToLibrary(bookmark); } +uint32_t LibraryManipulator::removeBooksNotUpdatedSince(Library::Revision rev) +{ + const auto n = library.removeBooksNotUpdatedSince(rev); + if ( n != 0 ) { + booksWereRemovedFromLibrary(); + } + return n; +} + void LibraryManipulator::bookWasAddedToLibrary(const Book& book) { } @@ -71,6 +80,10 @@ void LibraryManipulator::bookmarkWasAddedToLibrary(const Bookmark& bookmark) { } +void LibraryManipulator::booksWereRemovedFromLibrary() +{ +} + //////////////////////////////////////////////////////////////////////////////// // Manager //////////////////////////////////////////////////////////////////////////////// @@ -291,6 +304,7 @@ bool Manager::readBookmarkFile(const std::string& path) void Manager::reload(const Paths& paths) { + const auto libRevision = manipulator->getLibrary().getRevision(); for (std::string path : paths) { if (!path.empty()) { if ( kiwix::isRelativePath(path) ) @@ -301,6 +315,8 @@ void Manager::reload(const Paths& paths) } } } + + manipulator->removeBooksNotUpdatedSince(libRevision); } } From 405ea2990043eb480fcc5b4ab07601d1d181d88b Mon Sep 17 00:00:00 2001 From: Veloman Yunkan <veloman.yunkan@gmail.com> Date: Mon, 29 Nov 2021 21:21:03 +0400 Subject: [PATCH 24/24] Added Library::addOrUpdateBook() alias --- include/library.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/library.h b/include/library.h index c5054dcc7..d54c8db4a 100644 --- a/include/library.h +++ b/include/library.h @@ -212,6 +212,11 @@ class Library : private LibraryBase */ bool addBook(const Book& book); + /** + * A self-explanatory alias for addBook() + */ + bool addOrUpdateBook(const Book& book) { return addBook(book); } + /** * Add a bookmark to the library. *