mirror of https://github.com/kiwix/libkiwix.git
Make the regexTool thread safe.
This commit is contained in:
parent
c8e719101e
commit
61c28f0e3d
|
@ -20,9 +20,6 @@
|
||||||
#ifndef KIWIX_REGEXTOOLS_H
|
#ifndef KIWIX_REGEXTOOLS_H
|
||||||
#define KIWIX_REGEXTOOLS_H
|
#define KIWIX_REGEXTOOLS_H
|
||||||
|
|
||||||
#include <unicode/regex.h>
|
|
||||||
#include <unicode/ucnv.h>
|
|
||||||
#include <map>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
bool matchRegex(const std::string& content, const std::string& regex);
|
bool matchRegex(const std::string& content, const std::string& regex);
|
||||||
|
|
|
@ -18,40 +18,45 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <tools/regexTools.h>
|
#include <tools/regexTools.h>
|
||||||
|
#include <tools/lock.h>
|
||||||
|
|
||||||
std::map<std::string, icu::RegexMatcher*> regexCache;
|
#include <unicode/regex.h>
|
||||||
|
#include <unicode/ucnv.h>
|
||||||
|
|
||||||
icu::RegexMatcher* buildRegex(const std::string& regex)
|
#include <memory>
|
||||||
|
#include <map>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
std::map<std::string, std::shared_ptr<icu::RegexPattern>> regexCache;
|
||||||
|
static pthread_mutex_t regexLock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
std::unique_ptr<icu::RegexMatcher> buildMatcher(const std::string& regex, const icu::UnicodeString& content)
|
||||||
{
|
{
|
||||||
icu::RegexMatcher* matcher;
|
std::shared_ptr<icu::RegexPattern> pattern;
|
||||||
auto itr = regexCache.find(regex);
|
|
||||||
|
|
||||||
/* Regex is in cache */
|
/* Regex is in cache */
|
||||||
if (itr != regexCache.end()) {
|
try {
|
||||||
matcher = itr->second;
|
pattern = regexCache.at(regex);
|
||||||
}
|
} catch (std::out_of_range&) {
|
||||||
|
// Redo the search with a lock to avoid race condition.
|
||||||
/* Regex needs to be parsed (and cached) */
|
kiwix::Lock l(®exLock);
|
||||||
else {
|
try {
|
||||||
|
pattern = regexCache.at(regex);
|
||||||
|
} catch (std::out_of_range&) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
UParseError pe;
|
||||||
icu::UnicodeString uregex(regex.c_str());
|
icu::UnicodeString uregex(regex.c_str());
|
||||||
matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
|
pattern.reset(icu::RegexPattern::compile(uregex, UREGEX_CASE_INSENSITIVE, pe, status));
|
||||||
regexCache[regex] = matcher;
|
regexCache[regex] = pattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
|
return std::unique_ptr<icu::RegexMatcher>(pattern->matcher(content, status));
|
||||||
}
|
}
|
||||||
|
|
||||||
return matcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* todo */
|
|
||||||
void freeRegexCache()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
bool matchRegex(const std::string& content, const std::string& regex)
|
bool matchRegex(const std::string& content, const std::string& regex)
|
||||||
{
|
{
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
icu::UnicodeString ucontent(content.c_str());
|
auto matcher = buildMatcher(regex, content.c_str());
|
||||||
auto matcher = buildRegex(regex);
|
|
||||||
matcher->reset(ucontent);
|
|
||||||
return matcher->find();
|
return matcher->find();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,10 +65,9 @@ std::string replaceRegex(const std::string& content,
|
||||||
const std::string& regex)
|
const std::string& regex)
|
||||||
{
|
{
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
icu::UnicodeString ucontent(content.c_str());
|
|
||||||
icu::UnicodeString ureplacement(replacement.c_str());
|
icu::UnicodeString ureplacement(replacement.c_str());
|
||||||
auto matcher = buildRegex(regex);
|
icu::UnicodeString ucontent(content.c_str());
|
||||||
matcher->reset(ucontent);
|
auto matcher = buildMatcher(regex, ucontent);
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
auto uresult = matcher->replaceAll(ureplacement, status);
|
auto uresult = matcher->replaceAll(ureplacement, status);
|
||||||
std::string tmp;
|
std::string tmp;
|
||||||
|
@ -78,9 +82,7 @@ std::string appendToFirstOccurence(const std::string& content,
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
icu::UnicodeString ucontent(content.c_str());
|
icu::UnicodeString ucontent(content.c_str());
|
||||||
icu::UnicodeString ureplacement(replacement.c_str());
|
icu::UnicodeString ureplacement(replacement.c_str());
|
||||||
auto matcher = buildRegex(regex);
|
auto matcher = buildMatcher(regex, ucontent);
|
||||||
matcher->reset(ucontent);
|
|
||||||
|
|
||||||
if (matcher->find()) {
|
if (matcher->find()) {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
ucontent.insert(matcher->end(status), ureplacement);
|
ucontent.insert(matcher->end(status), ureplacement);
|
||||||
|
|
Loading…
Reference in New Issue