mirror of https://github.com/kiwix/libkiwix.git
228 lines
5.9 KiB
C++
228 lines
5.9 KiB
C++
/*
|
|
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "stringTools.h"
|
|
|
|
#ifndef __ANDROID__
|
|
|
|
/* Prepare integer for display */
|
|
std::string kiwix::beautifyInteger(const unsigned int number) {
|
|
std::stringstream numberStream;
|
|
numberStream << number;
|
|
std::string numberString = numberStream.str();
|
|
|
|
signed int offset = numberString.size() - 3;
|
|
while (offset > 0) {
|
|
numberString.insert(offset, ",");
|
|
offset -= 3;
|
|
}
|
|
|
|
return numberString;
|
|
}
|
|
|
|
std::string kiwix::removeAccents(const std::string &text) {
|
|
ucnv_setDefaultName("UTF-8");
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
|
|
UnicodeString ustring = UnicodeString(text.c_str());
|
|
removeAccentsTrans->transliterate(ustring);
|
|
delete removeAccentsTrans;
|
|
std::string unaccentedText;
|
|
ustring.toUTF8String(unaccentedText);
|
|
return unaccentedText;
|
|
}
|
|
|
|
void kiwix::printStringInHexadecimal(UnicodeString s) {
|
|
std::cout << std::showbase << std::hex;
|
|
for (int i=0; i<s.length(); i++) {
|
|
char c = (char)((s.getTerminatedBuffer())[i]);
|
|
if (c & 0x80)
|
|
std::cout << (c & 0xffff) << " ";
|
|
else
|
|
std::cout << c << " ";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
void kiwix::printStringInHexadecimal(const char *s) {
|
|
std::cout << std::showbase << std::hex;
|
|
for (char const* pc = s; *pc; ++pc) {
|
|
if (*pc & 0x80)
|
|
std::cout << (*pc & 0xffff);
|
|
else
|
|
std::cout << *pc;
|
|
std::cout << ' ';
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
|
|
void kiwix::stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr) {
|
|
size_t pos = 0;
|
|
while((pos = str.find(oldStr, pos)) != std::string::npos) {
|
|
str.replace(pos, oldStr.length(), newStr);
|
|
pos += newStr.length();
|
|
}
|
|
}
|
|
|
|
// Urlencode
|
|
//based on javascript encodeURIComponent()
|
|
|
|
std::string char2hex( char dec )
|
|
{
|
|
char dig1 = (dec&0xF0)>>4;
|
|
char dig2 = (dec&0x0F);
|
|
if ( 0<= dig1 && dig1<= 9) dig1+=48; //0,48inascii
|
|
if (10<= dig1 && dig1<=15) dig1+=97-10; //a,97inascii
|
|
if ( 0<= dig2 && dig2<= 9) dig2+=48;
|
|
if (10<= dig2 && dig2<=15) dig2+=97-10;
|
|
|
|
std::string r;
|
|
r.append( &dig1, 1);
|
|
r.append( &dig2, 1);
|
|
return r;
|
|
}
|
|
|
|
std::string kiwix::urlEncode(const std::string &c) {
|
|
std::string escaped="";
|
|
int max = c.length();
|
|
for(int i=0; i<max; i++)
|
|
{
|
|
if ( (48 <= c[i] && c[i] <= 57) ||//0-9
|
|
(65 <= c[i] && c[i] <= 90) ||//abc...xyz
|
|
(97 <= c[i] && c[i] <= 122) || //ABC...XYZ
|
|
(c[i]=='~' || c[i]=='!' || c[i]=='*' || c[i]=='(' || c[i]==')' || c[i]=='\'')
|
|
)
|
|
{
|
|
escaped.append( &c[i], 1);
|
|
}
|
|
else
|
|
{
|
|
escaped.append("%");
|
|
escaped.append( char2hex(c[i]) );//converts char 255 to string "ff"
|
|
}
|
|
}
|
|
return escaped;
|
|
}
|
|
|
|
std::string kiwix::urlDecode(const std::string &SRC) {
|
|
std::string ret;
|
|
char ch;
|
|
unsigned int i, ii;
|
|
for (i=0; i<SRC.length(); i++) {
|
|
if (int(SRC[i])==37) {
|
|
sscanf(SRC.substr(i+1,2).c_str(), "%x", &ii);
|
|
ch=static_cast<char>(ii);
|
|
ret+=ch;
|
|
i=i+2;
|
|
} else {
|
|
ret+=SRC[i];
|
|
}
|
|
}
|
|
return (ret);
|
|
}
|
|
|
|
#endif
|
|
|
|
/* Split string in a token array */
|
|
std::vector<std::string> kiwix::split(const std::string & str,
|
|
const std::string & delims=" *-")
|
|
{
|
|
std::string::size_type lastPos = str.find_first_not_of(delims, 0);
|
|
std::string::size_type pos = str.find_first_of(delims, lastPos);
|
|
std::vector<std::string> tokens;
|
|
|
|
while (std::string::npos != pos || std::string::npos != lastPos)
|
|
{
|
|
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
|
lastPos = str.find_first_not_of(delims, pos);
|
|
pos = str.find_first_of(delims, lastPos);
|
|
}
|
|
|
|
return tokens;
|
|
}
|
|
|
|
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs){
|
|
const std::string m1 (lhs), m2 (rhs);
|
|
return split(m1, m2);
|
|
}
|
|
|
|
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs){
|
|
return split(lhs, rhs.c_str());
|
|
}
|
|
|
|
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs){
|
|
return split(lhs.c_str(), rhs);
|
|
}
|
|
|
|
std::string kiwix::ucFirst (const std::string &word) {
|
|
if (word.empty())
|
|
return "";
|
|
|
|
std::string result;
|
|
|
|
#ifdef __ANDROID__
|
|
result = word;
|
|
result[0] = toupper(result[0]);
|
|
#else
|
|
UnicodeString unicodeWord(word.c_str());
|
|
UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toUpper();
|
|
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
|
unicodeWord.toUTF8String(result);
|
|
#endif
|
|
|
|
return result;
|
|
}
|
|
|
|
std::string kiwix::lcFirst (const std::string &word) {
|
|
if (word.empty())
|
|
return "";
|
|
|
|
std::string result;
|
|
|
|
#ifdef __ANDROID__
|
|
result = word;
|
|
result[0] = tolower(result[0]);
|
|
#else
|
|
UnicodeString unicodeWord(word.c_str());
|
|
UnicodeString unicodeFirstLetter = unicodeWord.tempSubString(0, 1).toLower();
|
|
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
|
unicodeWord.toUTF8String(result);
|
|
#endif
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
std::string kiwix::toTitle (const std::string &word) {
|
|
if (word.empty())
|
|
return "";
|
|
|
|
std::string result;
|
|
|
|
#ifdef __ANDROID__
|
|
result = word;
|
|
#else
|
|
UnicodeString unicodeWord(word.c_str());
|
|
unicodeWord = unicodeWord.toTitle(0);
|
|
unicodeWord.toUTF8String(result);
|
|
#endif
|
|
|
|
return result;
|
|
}
|