Make the regexTool thread safe.

This commit is contained in:
Matthieu Gautier 2019-08-08 12:09:52 +02:00
parent c8e719101e
commit 61c28f0e3d
2 changed files with 32 additions and 33 deletions

View File

@ -20,9 +20,6 @@
#ifndef KIWIX_REGEXTOOLS_H #ifndef KIWIX_REGEXTOOLS_H
#define KIWIX_REGEXTOOLS_H #define KIWIX_REGEXTOOLS_H
#include <unicode/regex.h>
#include <unicode/ucnv.h>
#include <map>
#include <string> #include <string>
bool matchRegex(const std::string& content, const std::string& regex); bool matchRegex(const std::string& content, const std::string& regex);

View File

@ -18,40 +18,45 @@
*/ */
#include <tools/regexTools.h> #include <tools/regexTools.h>
#include <tools/lock.h>
std::map<std::string, icu::RegexMatcher*> regexCache; #include <unicode/regex.h>
#include <unicode/ucnv.h>
icu::RegexMatcher* buildRegex(const std::string& regex) #include <memory>
#include <map>
#include <pthread.h>
std::map<std::string, std::shared_ptr<icu::RegexPattern>> regexCache;
static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER;
std::unique_ptr<icu::RegexMatcher> buildMatcher(const std::string& regex, const icu::UnicodeString& content)
{ {
icu::RegexMatcher* matcher; std::shared_ptr<icu::RegexPattern> pattern;
auto itr = regexCache.find(regex);
/* Regex is in cache */ /* Regex is in cache */
if (itr != regexCache.end()) { try {
matcher = itr->second; pattern = regexCache.at(regex);
} catch (std::out_of_range&) {
// Redo the search with a lock to avoid race condition.
kiwix::Lock l(&regexLock);
try {
pattern = regexCache.at(regex);
} catch (std::out_of_range&) {
UErrorCode status = U_ZERO_ERROR;
UParseError pe;
icu::UnicodeString uregex(regex.c_str());
pattern.reset(icu::RegexPattern::compile(uregex, UREGEX_CASE_INSENSITIVE, pe, status));
regexCache[regex] = pattern;
}
} }
UErrorCode status = U_ZERO_ERROR;
/* Regex needs to be parsed (and cached) */ return std::unique_ptr<icu::RegexMatcher>(pattern->matcher(content, status));
else {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString uregex(regex.c_str());
matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
regexCache[regex] = matcher;
}
return matcher;
} }
/* todo */
void freeRegexCache()
{
}
bool matchRegex(const std::string& content, const std::string& regex) bool matchRegex(const std::string& content, const std::string& regex)
{ {
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str()); auto matcher = buildMatcher(regex, content.c_str());
auto matcher = buildRegex(regex);
matcher->reset(ucontent);
return matcher->find(); return matcher->find();
} }
@ -60,10 +65,9 @@ std::string replaceRegex(const std::string& content,
const std::string& regex) const std::string& regex)
{ {
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str());
icu::UnicodeString ureplacement(replacement.c_str()); icu::UnicodeString ureplacement(replacement.c_str());
auto matcher = buildRegex(regex); icu::UnicodeString ucontent(content.c_str());
matcher->reset(ucontent); auto matcher = buildMatcher(regex, ucontent);
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
auto uresult = matcher->replaceAll(ureplacement, status); auto uresult = matcher->replaceAll(ureplacement, status);
std::string tmp; std::string tmp;
@ -78,9 +82,7 @@ std::string appendToFirstOccurence(const std::string& content,
ucnv_setDefaultName("UTF-8"); ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str()); icu::UnicodeString ucontent(content.c_str());
icu::UnicodeString ureplacement(replacement.c_str()); icu::UnicodeString ureplacement(replacement.c_str());
auto matcher = buildRegex(regex); auto matcher = buildMatcher(regex, ucontent);
matcher->reset(ucontent);
if (matcher->find()) { if (matcher->find()) {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
ucontent.insert(matcher->end(status), ureplacement); ucontent.insert(matcher->end(status), ureplacement);