mirror of https://github.com/kiwix/libkiwix.git
parent
2e38aa796f
commit
9aaf82a36d
|
@ -38,7 +38,7 @@ namespace kiwix
|
||||||
std::string beautifyInteger(const unsigned int number);
|
std::string beautifyInteger(const unsigned int number);
|
||||||
std::string beautifyFileSize(const unsigned int number);
|
std::string beautifyFileSize(const unsigned int number);
|
||||||
void printStringInHexadecimal(const char* s);
|
void printStringInHexadecimal(const char* s);
|
||||||
void printStringInHexadecimal(UnicodeString s);
|
void printStringInHexadecimal(icu::UnicodeString s);
|
||||||
void stringReplacement(std::string& str,
|
void stringReplacement(std::string& str,
|
||||||
const std::string& oldStr,
|
const std::string& oldStr,
|
||||||
const std::string& newStr);
|
const std::string& newStr);
|
||||||
|
|
|
@ -19,12 +19,12 @@
|
||||||
|
|
||||||
#include <common/regexTools.h>
|
#include <common/regexTools.h>
|
||||||
|
|
||||||
std::map<std::string, RegexMatcher*> regexCache;
|
std::map<std::string, icu::RegexMatcher*> regexCache;
|
||||||
|
|
||||||
RegexMatcher* buildRegex(const std::string& regex)
|
icu::RegexMatcher* buildRegex(const std::string& regex)
|
||||||
{
|
{
|
||||||
RegexMatcher* matcher;
|
icu::RegexMatcher* matcher;
|
||||||
std::map<std::string, RegexMatcher*>::iterator itr = regexCache.find(regex);
|
auto itr = regexCache.find(regex);
|
||||||
|
|
||||||
/* Regex is in cache */
|
/* Regex is in cache */
|
||||||
if (itr != regexCache.end()) {
|
if (itr != regexCache.end()) {
|
||||||
|
@ -34,8 +34,8 @@ RegexMatcher* buildRegex(const std::string& regex)
|
||||||
/* Regex needs to be parsed (and cached) */
|
/* Regex needs to be parsed (and cached) */
|
||||||
else {
|
else {
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UnicodeString uregex = UnicodeString(regex.c_str());
|
icu::UnicodeString uregex(regex.c_str());
|
||||||
matcher = new RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
|
matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
|
||||||
regexCache[regex] = matcher;
|
regexCache[regex] = matcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,8 +49,8 @@ void freeRegexCache()
|
||||||
bool matchRegex(const std::string& content, const std::string& regex)
|
bool matchRegex(const std::string& content, const std::string& regex)
|
||||||
{
|
{
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
UnicodeString ucontent = UnicodeString(content.c_str());
|
icu::UnicodeString ucontent(content.c_str());
|
||||||
RegexMatcher* matcher = buildRegex(regex);
|
auto matcher = buildRegex(regex);
|
||||||
matcher->reset(ucontent);
|
matcher->reset(ucontent);
|
||||||
return matcher->find();
|
return matcher->find();
|
||||||
}
|
}
|
||||||
|
@ -60,12 +60,12 @@ std::string replaceRegex(const std::string& content,
|
||||||
const std::string& regex)
|
const std::string& regex)
|
||||||
{
|
{
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
UnicodeString ucontent = UnicodeString(content.c_str());
|
icu::UnicodeString ucontent(content.c_str());
|
||||||
UnicodeString ureplacement = UnicodeString(replacement.c_str());
|
icu::UnicodeString ureplacement(replacement.c_str());
|
||||||
RegexMatcher* matcher = buildRegex(regex);
|
auto matcher = buildRegex(regex);
|
||||||
matcher->reset(ucontent);
|
matcher->reset(ucontent);
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
UnicodeString uresult = matcher->replaceAll(ureplacement, status);
|
auto uresult = matcher->replaceAll(ureplacement, status);
|
||||||
std::string tmp;
|
std::string tmp;
|
||||||
uresult.toUTF8String(tmp);
|
uresult.toUTF8String(tmp);
|
||||||
return tmp;
|
return tmp;
|
||||||
|
@ -76,9 +76,9 @@ std::string appendToFirstOccurence(const std::string& content,
|
||||||
const std::string& replacement)
|
const std::string& replacement)
|
||||||
{
|
{
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
UnicodeString ucontent = UnicodeString(content.c_str());
|
icu::UnicodeString ucontent(content.c_str());
|
||||||
UnicodeString ureplacement = UnicodeString(replacement.c_str());
|
icu::UnicodeString ureplacement(replacement.c_str());
|
||||||
RegexMatcher* matcher = buildRegex(regex);
|
auto matcher = buildRegex(regex);
|
||||||
matcher->reset(ucontent);
|
matcher->reset(ucontent);
|
||||||
|
|
||||||
if (matcher->find()) {
|
if (matcher->find()) {
|
||||||
|
|
|
@ -47,9 +47,9 @@ std::string kiwix::removeAccents(const std::string& text)
|
||||||
loadICUExternalTables();
|
loadICUExternalTables();
|
||||||
ucnv_setDefaultName("UTF-8");
|
ucnv_setDefaultName("UTF-8");
|
||||||
UErrorCode status = U_ZERO_ERROR;
|
UErrorCode status = U_ZERO_ERROR;
|
||||||
Transliterator* removeAccentsTrans = Transliterator::createInstance(
|
auto removeAccentsTrans = icu::Transliterator::createInstance(
|
||||||
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
|
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
|
||||||
UnicodeString ustring = UnicodeString(text.c_str());
|
icu::UnicodeString ustring(text.c_str());
|
||||||
removeAccentsTrans->transliterate(ustring);
|
removeAccentsTrans->transliterate(ustring);
|
||||||
delete removeAccentsTrans;
|
delete removeAccentsTrans;
|
||||||
std::string unaccentedText;
|
std::string unaccentedText;
|
||||||
|
@ -85,7 +85,7 @@ std::string kiwix::beautifyFileSize(const unsigned int number)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void kiwix::printStringInHexadecimal(UnicodeString s)
|
void kiwix::printStringInHexadecimal(icu::UnicodeString s)
|
||||||
{
|
{
|
||||||
std::cout << std::showbase << std::hex;
|
std::cout << std::showbase << std::hex;
|
||||||
for (int i = 0; i < s.length(); i++) {
|
for (int i = 0; i < s.length(); i++) {
|
||||||
|
@ -300,8 +300,8 @@ std::string kiwix::ucFirst(const std::string& word)
|
||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
UnicodeString unicodeWord(word.c_str());
|
icu::UnicodeString unicodeWord(word.c_str());
|
||||||
UnicodeString unicodeFirstLetter = UnicodeString(unicodeWord, 0, 1).toUpper();
|
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toUpper();
|
||||||
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||||
unicodeWord.toUTF8String(result);
|
unicodeWord.toUTF8String(result);
|
||||||
|
|
||||||
|
@ -316,7 +316,7 @@ std::string kiwix::ucAll(const std::string& word)
|
||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
UnicodeString unicodeWord(word.c_str());
|
icu::UnicodeString unicodeWord(word.c_str());
|
||||||
unicodeWord.toUpper().toUTF8String(result);
|
unicodeWord.toUpper().toUTF8String(result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -330,8 +330,8 @@ std::string kiwix::lcFirst(const std::string& word)
|
||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
UnicodeString unicodeWord(word.c_str());
|
icu::UnicodeString unicodeWord(word.c_str());
|
||||||
UnicodeString unicodeFirstLetter = UnicodeString(unicodeWord, 0, 1).toLower();
|
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toLower();
|
||||||
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||||
unicodeWord.toUTF8String(result);
|
unicodeWord.toUTF8String(result);
|
||||||
|
|
||||||
|
@ -346,7 +346,7 @@ std::string kiwix::lcAll(const std::string& word)
|
||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
UnicodeString unicodeWord(word.c_str());
|
icu::UnicodeString unicodeWord(word.c_str());
|
||||||
unicodeWord.toLower().toUTF8String(result);
|
unicodeWord.toLower().toUTF8String(result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -360,7 +360,7 @@ std::string kiwix::toTitle(const std::string& word)
|
||||||
|
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
UnicodeString unicodeWord(word.c_str());
|
icu::UnicodeString unicodeWord(word.c_str());
|
||||||
unicodeWord = unicodeWord.toTitle(0);
|
unicodeWord = unicodeWord.toTitle(0);
|
||||||
unicodeWord.toUTF8String(result);
|
unicodeWord.toUTF8String(result);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue