From 61c28f0e3dc37d15579504cd9b6552d15dbab368 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Thu, 8 Aug 2019 12:09:52 +0200 Subject: [PATCH] Make the regexTool thread safe. --- include/tools/regexTools.h | 3 -- src/tools/regexTools.cpp | 62 ++++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/include/tools/regexTools.h b/include/tools/regexTools.h index cff6aaf98..f5656eedd 100644 --- a/include/tools/regexTools.h +++ b/include/tools/regexTools.h @@ -20,9 +20,6 @@ #ifndef KIWIX_REGEXTOOLS_H #define KIWIX_REGEXTOOLS_H -#include -#include -#include #include bool matchRegex(const std::string& content, const std::string& regex); diff --git a/src/tools/regexTools.cpp b/src/tools/regexTools.cpp index 912be6cd4..c31ffd525 100644 --- a/src/tools/regexTools.cpp +++ b/src/tools/regexTools.cpp @@ -18,40 +18,45 @@ */ #include +#include -std::map regexCache; +#include +#include -icu::RegexMatcher* buildRegex(const std::string& regex) +#include +#include +#include + +std::map> regexCache; +static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER; + +std::unique_ptr buildMatcher(const std::string& regex, const icu::UnicodeString& content) { - icu::RegexMatcher* matcher; - auto itr = regexCache.find(regex); - + std::shared_ptr pattern; /* Regex is in cache */ - if (itr != regexCache.end()) { - matcher = itr->second; + try { + pattern = regexCache.at(regex); + } catch (std::out_of_range&) { + // Redo the search with a lock to avoid race condition. + kiwix::Lock l(®exLock); + try { + pattern = regexCache.at(regex); + } catch (std::out_of_range&) { + UErrorCode status = U_ZERO_ERROR; + UParseError pe; + icu::UnicodeString uregex(regex.c_str()); + pattern.reset(icu::RegexPattern::compile(uregex, UREGEX_CASE_INSENSITIVE, pe, status)); + regexCache[regex] = pattern; + } } - - /* Regex needs to be parsed (and cached) */ - else { - UErrorCode status = U_ZERO_ERROR; - icu::UnicodeString uregex(regex.c_str()); - matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status); - regexCache[regex] = matcher; - } - - return matcher; + UErrorCode status = U_ZERO_ERROR; + return std::unique_ptr(pattern->matcher(content, status)); } -/* todo */ -void freeRegexCache() -{ -} bool matchRegex(const std::string& content, const std::string& regex) { ucnv_setDefaultName("UTF-8"); - icu::UnicodeString ucontent(content.c_str()); - auto matcher = buildRegex(regex); - matcher->reset(ucontent); + auto matcher = buildMatcher(regex, content.c_str()); return matcher->find(); } @@ -60,10 +65,9 @@ std::string replaceRegex(const std::string& content, const std::string& regex) { ucnv_setDefaultName("UTF-8"); - icu::UnicodeString ucontent(content.c_str()); icu::UnicodeString ureplacement(replacement.c_str()); - auto matcher = buildRegex(regex); - matcher->reset(ucontent); + icu::UnicodeString ucontent(content.c_str()); + auto matcher = buildMatcher(regex, ucontent); UErrorCode status = U_ZERO_ERROR; auto uresult = matcher->replaceAll(ureplacement, status); std::string tmp; @@ -78,9 +82,7 @@ std::string appendToFirstOccurence(const std::string& content, ucnv_setDefaultName("UTF-8"); icu::UnicodeString ucontent(content.c_str()); icu::UnicodeString ureplacement(replacement.c_str()); - auto matcher = buildRegex(regex); - matcher->reset(ucontent); - + auto matcher = buildMatcher(regex, ucontent); if (matcher->find()) { UErrorCode status = U_ZERO_ERROR; ucontent.insert(matcher->end(status), ureplacement);