mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-06-27 21:39:37 +00:00
[API break] Move all the tools in the tools directory instead of common.
The `common` name is from the time where kiwix was only one repository for all the project (android, desktop, server...). Now we have split the repositories and kiwix-lib is the "common" repo, the "common" directory is somehow nonsense.
This commit is contained in:
124
src/tools/base64.cpp
Normal file
124
src/tools/base64.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
base64.cpp and base64.h
|
||||
|
||||
Copyright (C) 2004-2008 René Nyffenegger
|
||||
|
||||
This source code is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this source code must not be misrepresented; you must not
|
||||
claim that you wrote the original source code. If you use this source code
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original source code.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
René Nyffenegger rene.nyffenegger@adp-gmbh.ch
|
||||
*/
|
||||
|
||||
#include <tools/base64.h>
|
||||
#include <iostream>
|
||||
|
||||
static const std::string base64_chars =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
"abcdefghijklmnopqrstuvwxyz"
|
||||
"0123456789+/";
|
||||
|
||||
|
||||
static inline bool is_base64(unsigned char c) {
|
||||
return (isalnum(c) || (c == '+') || (c == '/'));
|
||||
}
|
||||
|
||||
std::string base64_encode(const std::string& inString) {
|
||||
std::string ret;
|
||||
auto in_len = inString.size();
|
||||
const unsigned char* bytes_to_encode = reinterpret_cast<const unsigned char*>(inString.data());
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
unsigned char char_array_3[3];
|
||||
unsigned char char_array_4[4];
|
||||
|
||||
while (in_len--) {
|
||||
char_array_3[i++] = *(bytes_to_encode++);
|
||||
if (i == 3) {
|
||||
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
|
||||
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
|
||||
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
|
||||
char_array_4[3] = char_array_3[2] & 0x3f;
|
||||
|
||||
for(i = 0; (i <4) ; i++)
|
||||
ret += base64_chars[char_array_4[i]];
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i)
|
||||
{
|
||||
for(j = i; j < 3; j++)
|
||||
char_array_3[j] = '\0';
|
||||
|
||||
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
|
||||
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
|
||||
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
|
||||
char_array_4[3] = char_array_3[2] & 0x3f;
|
||||
|
||||
for (j = 0; (j < i + 1); j++)
|
||||
ret += base64_chars[char_array_4[j]];
|
||||
|
||||
while((i++ < 3))
|
||||
ret += '=';
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
std::string base64_decode(std::string const& encoded_string) {
|
||||
int in_len = encoded_string.size();
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int in_ = 0;
|
||||
unsigned char char_array_4[4], char_array_3[3];
|
||||
std::string ret;
|
||||
|
||||
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
|
||||
char_array_4[i++] = encoded_string[in_]; in_++;
|
||||
if (i ==4) {
|
||||
for (i = 0; i <4; i++)
|
||||
char_array_4[i] = base64_chars.find(char_array_4[i]);
|
||||
|
||||
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||
|
||||
for (i = 0; (i < 3); i++)
|
||||
ret += char_array_3[i];
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i) {
|
||||
for (j = i; j <4; j++)
|
||||
char_array_4[j] = 0;
|
||||
|
||||
for (j = 0; j <4; j++)
|
||||
char_array_4[j] = base64_chars.find(char_array_4[j]);
|
||||
|
||||
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
|
||||
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
|
||||
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
|
||||
|
||||
for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
209
src/tools/networkTools.cpp
Normal file
209
src/tools/networkTools.cpp
Normal file
@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Copyright 2012 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <tools/networkTools.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock2.h>
|
||||
#include <ws2tcpip.h>
|
||||
#else
|
||||
#include <net/if.h>
|
||||
#include <netdb.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
std::map<std::string, std::string> kiwix::getNetworkInterfaces()
|
||||
{
|
||||
std::map<std::string, std::string> interfaces;
|
||||
|
||||
#ifdef _WIN32
|
||||
SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
|
||||
if (sd == (SOCKET)SOCKET_ERROR) {
|
||||
std::cerr << "Failed to get a socket. Error " << WSAGetLastError()
|
||||
<< std::endl;
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
INTERFACE_INFO InterfaceList[20];
|
||||
unsigned long nBytesReturned;
|
||||
if (WSAIoctl(sd,
|
||||
SIO_GET_INTERFACE_LIST,
|
||||
0,
|
||||
0,
|
||||
&InterfaceList,
|
||||
sizeof(InterfaceList),
|
||||
&nBytesReturned,
|
||||
0,
|
||||
0)
|
||||
== SOCKET_ERROR) {
|
||||
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError()
|
||||
<< std::endl;
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
|
||||
for (int i = 0; i < nNumInterfaces; ++i) {
|
||||
sockaddr_in* pAddress;
|
||||
pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress);
|
||||
|
||||
/* Add to the map */
|
||||
std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
|
||||
std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr));
|
||||
interfaces.insert(
|
||||
std::pair<std::string, std::string>(interfaceName, interfaceIp));
|
||||
}
|
||||
#else
|
||||
/* Get Network interfaces information */
|
||||
char buf[16384];
|
||||
struct ifconf ifconf;
|
||||
int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
|
||||
ifconf.ifc_len = sizeof buf;
|
||||
ifconf.ifc_buf = buf;
|
||||
if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) {
|
||||
perror("ioctl(SIOCGIFCONF)");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Go through each interface */
|
||||
int i;
|
||||
size_t len;
|
||||
struct ifreq* ifreq;
|
||||
ifreq = ifconf.ifc_req;
|
||||
for (i = 0; i < ifconf.ifc_len;) {
|
||||
if (ifreq->ifr_addr.sa_family == AF_INET) {
|
||||
/* Get the network interface ip */
|
||||
char host[128] = {0};
|
||||
const int error = getnameinfo(&(ifreq->ifr_addr),
|
||||
sizeof ifreq->ifr_addr,
|
||||
host,
|
||||
sizeof host,
|
||||
0,
|
||||
0,
|
||||
NI_NUMERICHOST);
|
||||
if (!error) {
|
||||
std::string interfaceName = std::string(ifreq->ifr_name);
|
||||
std::string interfaceIp = std::string(host);
|
||||
/* Add to the map */
|
||||
interfaces.insert(
|
||||
std::pair<std::string, std::string>(interfaceName, interfaceIp));
|
||||
} else {
|
||||
perror("getnameinfo()");
|
||||
}
|
||||
}
|
||||
|
||||
/* some systems have ifr_addr.sa_len and adjust the length that
|
||||
* way, but not mine. weird */
|
||||
#ifndef __linux__
|
||||
len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
|
||||
#else
|
||||
len = sizeof *ifreq;
|
||||
#endif
|
||||
ifreq = (struct ifreq*)((char*)ifreq + len);
|
||||
i += len;
|
||||
}
|
||||
#endif
|
||||
return interfaces;
|
||||
}
|
||||
|
||||
std::string kiwix::getBestPublicIp()
|
||||
{
|
||||
std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces();
|
||||
|
||||
#ifndef _WIN32
|
||||
const char* const prioritizedNames[]
|
||||
= {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"};
|
||||
const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
std::map<std::string, std::string>::const_iterator it
|
||||
= interfaces.find(prioritizedNames[i]);
|
||||
if (it != interfaces.end()) {
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
|
||||
iter != interfaces.end();
|
||||
++iter) {
|
||||
std::string interfaceIp = iter->second;
|
||||
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") {
|
||||
return interfaceIp;
|
||||
}
|
||||
}
|
||||
|
||||
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
|
||||
iter != interfaces.end();
|
||||
++iter) {
|
||||
std::string interfaceIp = iter->second;
|
||||
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") {
|
||||
return interfaceIp;
|
||||
}
|
||||
}
|
||||
|
||||
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
|
||||
iter != interfaces.end();
|
||||
++iter) {
|
||||
std::string interfaceIp = iter->second;
|
||||
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") {
|
||||
return interfaceIp;
|
||||
}
|
||||
}
|
||||
|
||||
return "127.0.0.1";
|
||||
}
|
||||
|
||||
size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata)
|
||||
{
|
||||
auto str = static_cast<std::stringstream*>(userdata);
|
||||
str->write(ptr, nmemb);
|
||||
return nmemb;
|
||||
}
|
||||
|
||||
std::string kiwix::download(const std::string& url) {
|
||||
auto curl = curl_easy_init();
|
||||
std::stringstream ss;
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPGET, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ss);
|
||||
auto res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK) {
|
||||
curl_easy_cleanup(curl);
|
||||
throw std::runtime_error("Cannot perform request");
|
||||
}
|
||||
long response_code;
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);
|
||||
curl_easy_cleanup(curl);
|
||||
if (response_code != 200) {
|
||||
throw std::runtime_error("Invalid return code from server");
|
||||
}
|
||||
return ss.str();
|
||||
}
|
326
src/tools/otherTools.cpp
Normal file
326
src/tools/otherTools.cpp
Normal file
@ -0,0 +1,326 @@
|
||||
/*
|
||||
* Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <tools/otherTools.h>
|
||||
#include <map>
|
||||
|
||||
static std::map<std::string, std::string> codeisomapping {
|
||||
//a
|
||||
{ "ad", "and" },
|
||||
{ "ae", "are" },
|
||||
{ "af", "afg" },
|
||||
{ "ag", "atg" },
|
||||
{ "ai", "aia" },
|
||||
{ "al", "alb" },
|
||||
{ "am", "arm" },
|
||||
{ "an", "ant" },
|
||||
{ "ao", "ago" },
|
||||
{ "aq", "ata" },
|
||||
{ "ar", "arg" },
|
||||
{ "as", "asm" },
|
||||
{ "at", "aut" },
|
||||
{ "au", "aus" },
|
||||
{ "aw", "abw" },
|
||||
{ "ax", "ala" },
|
||||
{ "az", "aze" },
|
||||
//b
|
||||
{ "ba", "bih" },
|
||||
{ "bb", "brb" },
|
||||
{ "bd", "bgd" },
|
||||
{ "be", "bel" },
|
||||
{ "bf", "bfa" },
|
||||
{ "bg", "bgr" },
|
||||
{ "bh", "bhr" },
|
||||
{ "bi", "bdi" },
|
||||
{ "bj", "ben" },
|
||||
{ "bl", "blm" },
|
||||
{ "bn", "brn" },
|
||||
{ "bm", "bmu" },
|
||||
{ "bo", "bol" },
|
||||
{ "br", "bra" },
|
||||
{ "bs", "bhs" },
|
||||
{ "bt", "btn" },
|
||||
{ "bv", "bvt" },
|
||||
{ "bw", "bwa" },
|
||||
{ "by", "blr" },
|
||||
{ "bz", "blz" },
|
||||
//c
|
||||
{ "ca", "can" },
|
||||
{ "cc", "cck" },
|
||||
{ "cd", "cod" },
|
||||
{ "cf", "caf" },
|
||||
{ "cg", "cog" },
|
||||
{ "ch", "che" },
|
||||
{ "ci", "civ" },
|
||||
{ "ck", "cok" },
|
||||
{ "cl", "chl" },
|
||||
{ "cm", "cmr" },
|
||||
{ "cn", "chn" },
|
||||
{ "co", "col" },
|
||||
{ "cr", "cri" },
|
||||
{ "cu", "cub" },
|
||||
{ "cv", "cpv" },
|
||||
{ "cx", "cxr" },
|
||||
{ "cy", "cyp" },
|
||||
{ "cz", "cze" },
|
||||
//d
|
||||
{ "de", "deu" },
|
||||
{ "dj", "dji" },
|
||||
{ "dk", "dnk" },
|
||||
{ "dm", "dma" },
|
||||
{ "do", "dom" },
|
||||
{ "dz", "dza" },
|
||||
//e
|
||||
{ "ec", "ecu" },
|
||||
{ "ee", "est" },
|
||||
{ "eg", "egy" },
|
||||
{ "eh", "esh" },
|
||||
{ "en", "eng" },
|
||||
{ "er", "eri" },
|
||||
{ "es", "esp" },
|
||||
{ "et", "eth" },
|
||||
//f
|
||||
{ "fi", "fin" },
|
||||
{ "fj", "fji" },
|
||||
{ "fk", "flk" },
|
||||
{ "fm", "fsm" },
|
||||
{ "fo", "fro" },
|
||||
{ "fr", "fra" },
|
||||
//g
|
||||
{ "ga", "gab" },
|
||||
{ "gb", "gbr" },
|
||||
{ "gd", "grd" },
|
||||
{ "ge", "geo" },
|
||||
{ "gf", "guf" },
|
||||
{ "gg", "ggy" },
|
||||
{ "gh", "gha" },
|
||||
{ "gi", "gib" },
|
||||
{ "gl", "grl" },
|
||||
{ "gm", "gmb" },
|
||||
{ "gn", "gin" },
|
||||
{ "gp", "glp" },
|
||||
{ "gq", "gnq" },
|
||||
{ "gr", "grc" },
|
||||
{ "gs", "sgs" },
|
||||
{ "gt", "gtm" },
|
||||
{ "gu", "gum" },
|
||||
{ "gw", "gnb" },
|
||||
{ "gy", "guy" },
|
||||
//h
|
||||
{ "hk", "hkg" },
|
||||
{ "hm", "hmd" },
|
||||
{ "hn", "hnd" },
|
||||
{ "hr", "hrv" },
|
||||
{ "ht", "hti" },
|
||||
{ "hu", "hun" },
|
||||
//i
|
||||
{ "id", "idn" },
|
||||
{ "ie", "irl" },
|
||||
{ "il", "isr" },
|
||||
{ "im", "imn" },
|
||||
{ "in", "ind" },
|
||||
{ "io", "iot" },
|
||||
{ "iq", "irq" },
|
||||
{ "ir", "irn" },
|
||||
{ "is", "isl" },
|
||||
{ "it", "ita" },
|
||||
//j
|
||||
{ "je", "jey" },
|
||||
{ "jm", "jam" },
|
||||
{ "jo", "jor" },
|
||||
{ "jp", "jpn" },
|
||||
//k
|
||||
{ "ke", "ken" },
|
||||
{ "kg", "kgz" },
|
||||
{ "kh", "khm" },
|
||||
{ "ki", "kir" },
|
||||
{ "km", "com" },
|
||||
{ "kn", "kna" },
|
||||
{ "kp", "prk" },
|
||||
{ "kr", "kor" },
|
||||
{ "kw", "kwt" },
|
||||
{ "ky", "cym" },
|
||||
{ "kz", "kaz" },
|
||||
//l
|
||||
{ "la", "lao" },
|
||||
{ "lb", "lbn" },
|
||||
{ "lc", "lca" },
|
||||
{ "li", "lie" },
|
||||
{ "lk", "lka" },
|
||||
{ "lr", "lbr" },
|
||||
{ "ls", "lso" },
|
||||
{ "lt", "ltu" },
|
||||
{ "lu", "lux" },
|
||||
{ "lv", "lva" },
|
||||
{ "ly", "lby" },
|
||||
//m
|
||||
{ "ma", "mar" },
|
||||
{ "mc", "mco" },
|
||||
{ "md", "mda" },
|
||||
{ "me", "mne" },
|
||||
{ "mf", "maf" },
|
||||
{ "mg", "mdg" },
|
||||
{ "mh", "mhl" },
|
||||
{ "mk", "mkd" },
|
||||
{ "ml", "mli" },
|
||||
{ "mm", "mmr" },
|
||||
{ "mn", "mng" },
|
||||
{ "mo", "mac" },
|
||||
{ "mp", "mnp" },
|
||||
{ "mq", "mtq" },
|
||||
{ "mr", "mrt" },
|
||||
{ "ms", "msr" },
|
||||
{ "mt", "mlt" },
|
||||
{ "mu", "mus" },
|
||||
{ "mv", "mdv" },
|
||||
{ "mw", "mwi" },
|
||||
{ "mx", "mex" },
|
||||
{ "my", "mys" },
|
||||
{ "mz", "moz" },
|
||||
//n
|
||||
{ "na", "nam" },
|
||||
{ "nc", "ncl" },
|
||||
{ "ne", "ner" },
|
||||
{ "nf", "nfk" },
|
||||
{ "ng", "nga" },
|
||||
{ "ni", "nic" },
|
||||
{ "nl", "nld" },
|
||||
{ "no", "nor" },
|
||||
{ "np", "npl" },
|
||||
{ "nr", "nru" },
|
||||
{ "nu", "niu" },
|
||||
{ "nz", "nzl" },
|
||||
//o
|
||||
{ "om", "omn" },
|
||||
//p
|
||||
{ "pa", "pan" },
|
||||
{ "pe", "per" },
|
||||
{ "pf", "pyf" },
|
||||
{ "pg", "png" },
|
||||
{ "ph", "phl" },
|
||||
{ "pk", "pak" },
|
||||
{ "pl", "pol" },
|
||||
{ "pm", "spm" },
|
||||
{ "pn", "pcn" },
|
||||
{ "pr", "pri" },
|
||||
{ "ps", "pse" },
|
||||
{ "pt", "prt" },
|
||||
{ "pw", "plw" },
|
||||
{ "py", "pry" },
|
||||
//q
|
||||
{ "qa", "qat" },
|
||||
//r
|
||||
{ "re", "reu" },
|
||||
{ "ro", "rou" },
|
||||
{ "rs", "srb" },
|
||||
{ "ru", "rus" },
|
||||
{ "rw", "rwa" },
|
||||
//s
|
||||
{ "sa", "sau" },
|
||||
{ "sb", "slb" },
|
||||
{ "sc", "syc" },
|
||||
{ "sd", "sdn" },
|
||||
{ "se", "swe" },
|
||||
{ "sg", "sgp" },
|
||||
{ "sh", "shn" },
|
||||
{ "si", "svn" },
|
||||
{ "sj", "sjm" },
|
||||
{ "sk", "svk" },
|
||||
{ "sl", "sle" },
|
||||
{ "sm", "smr" },
|
||||
{ "sn", "sen" },
|
||||
{ "so", "som" },
|
||||
{ "sr", "sur" },
|
||||
{ "ss", "ssd" },
|
||||
{ "st", "stp" },
|
||||
{ "sv", "slv" },
|
||||
{ "sy", "syr" },
|
||||
{ "sz", "swz" },
|
||||
//t
|
||||
{ "tc", "tca" },
|
||||
{ "td", "tcd" },
|
||||
{ "tf", "atf" },
|
||||
{ "tg", "tgo" },
|
||||
{ "th", "tha" },
|
||||
{ "tj", "tjk" },
|
||||
{ "tk", "tkl" },
|
||||
{ "tl", "tls" },
|
||||
{ "tm", "tkm" },
|
||||
{ "tn", "tun" },
|
||||
{ "to", "ton" },
|
||||
{ "tr", "tur" },
|
||||
{ "tt", "tto" },
|
||||
{ "tv", "tuv" },
|
||||
{ "tw", "twn" },
|
||||
{ "tz", "tza" },
|
||||
//u
|
||||
{ "ua", "ukr" },
|
||||
{ "ug", "uga" },
|
||||
{ "um", "umi" },
|
||||
{ "us", "usa" },
|
||||
{ "uy", "ury" },
|
||||
{ "uz", "uzb" },
|
||||
//v
|
||||
{ "va", "vat" },
|
||||
{ "vc", "vct" },
|
||||
{ "ve", "ven" },
|
||||
{ "vg", "vgb" },
|
||||
{ "vi", "vir" },
|
||||
{ "vn", "vnm" },
|
||||
{ "vu", "vut" },
|
||||
//w
|
||||
{ "wf", "wlf" },
|
||||
{ "ws", "wsm" },
|
||||
//y
|
||||
{ "ye", "yem" },
|
||||
{ "yt", "myt" },
|
||||
// z
|
||||
{ "za", "zaf" },
|
||||
{ "zm", "zmb" },
|
||||
{ "zw", "zwe" }
|
||||
};
|
||||
|
||||
void kiwix::sleep(unsigned int milliseconds)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
Sleep(milliseconds);
|
||||
#else
|
||||
usleep(1000 * milliseconds);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
struct XmlStringWriter: pugi::xml_writer
|
||||
{
|
||||
std::string result;
|
||||
virtual void write(const void* data, size_t size){
|
||||
result.append(static_cast<const char*>(data), size);
|
||||
}
|
||||
};
|
||||
|
||||
std::string kiwix::nodeToString(pugi::xml_node node)
|
||||
{
|
||||
XmlStringWriter writer;
|
||||
node.print(writer, " ");
|
||||
return writer.result;
|
||||
}
|
||||
|
||||
std::string kiwix::converta2toa3(const std::string& a2code){
|
||||
return codeisomapping.at(a2code);
|
||||
}
|
335
src/tools/pathTools.cpp
Normal file
335
src/tools/pathTools.cpp
Normal file
@ -0,0 +1,335 @@
|
||||
/*
|
||||
* Copyright 2011-2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <tools/pathTools.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <limits.h>
|
||||
#include <mach-o/dyld.h>
|
||||
#elif _WIN32
|
||||
#include <direct.h>
|
||||
#include <windows.h>
|
||||
#include "shlwapi.h"
|
||||
#define getcwd _getcwd // stupid MSFT "deprecation" warning
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
const std::string SEPARATOR("\\");
|
||||
#else
|
||||
const std::string SEPARATOR("/");
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifndef PATH_MAX
|
||||
#define PATH_MAX 1024
|
||||
#endif
|
||||
|
||||
bool isRelativePath(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
|
||||
#else
|
||||
return path.empty() || path.substr(0, 1) == "/" ? false : true;
|
||||
#endif
|
||||
}
|
||||
|
||||
string computeRelativePath(const string path, const string absolutePath)
|
||||
{
|
||||
std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR);
|
||||
std::vector<std::string> absolutePathParts
|
||||
= kiwix::split(absolutePath, SEPARATOR);
|
||||
|
||||
unsigned int commonCount = 0;
|
||||
while (commonCount < pathParts.size()
|
||||
&& commonCount < absolutePathParts.size()
|
||||
&& pathParts[commonCount] == absolutePathParts[commonCount]) {
|
||||
commonCount++;
|
||||
}
|
||||
|
||||
string relativePath;
|
||||
#ifdef _WIN32
|
||||
/* On Windows you have a token more because the root is represented
|
||||
by a letter */
|
||||
if (commonCount == 0) {
|
||||
relativePath = ".." + SEPARATOR;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (unsigned int i = commonCount; i < pathParts.size(); i++) {
|
||||
relativePath += ".." + SEPARATOR;
|
||||
}
|
||||
for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) {
|
||||
relativePath += absolutePathParts[i];
|
||||
relativePath += i + 1 < absolutePathParts.size() ? SEPARATOR : "";
|
||||
}
|
||||
return relativePath;
|
||||
}
|
||||
|
||||
/* Warning: the relative path must be with slashes */
|
||||
string computeAbsolutePath(const string path, const string relativePath)
|
||||
{
|
||||
string absolutePath;
|
||||
|
||||
if (path.empty()) {
|
||||
char* path = NULL;
|
||||
size_t size = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
path = _getcwd(path, size);
|
||||
#else
|
||||
path = getcwd(path, size);
|
||||
#endif
|
||||
|
||||
absolutePath = string(path) + SEPARATOR;
|
||||
} else {
|
||||
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR
|
||||
? path
|
||||
: path + SEPARATOR;
|
||||
}
|
||||
|
||||
#if _WIN32
|
||||
char* cRelativePath = _strdup(relativePath.c_str());
|
||||
#else
|
||||
char* cRelativePath = strdup(relativePath.c_str());
|
||||
#endif
|
||||
char* token = strtok(cRelativePath, "/");
|
||||
|
||||
while (token != NULL) {
|
||||
if (string(token) == "..") {
|
||||
absolutePath = removeLastPathElement(absolutePath, true, false);
|
||||
token = strtok(NULL, "/");
|
||||
} else if (strcmp(token, ".") && strcmp(token, "")) {
|
||||
absolutePath += string(token);
|
||||
token = strtok(NULL, "/");
|
||||
if (token != NULL) {
|
||||
absolutePath += SEPARATOR;
|
||||
}
|
||||
} else {
|
||||
token = strtok(NULL, "/");
|
||||
}
|
||||
}
|
||||
|
||||
return absolutePath;
|
||||
}
|
||||
|
||||
string removeLastPathElement(const string path,
|
||||
const bool removePreSeparator,
|
||||
const bool removePostSeparator)
|
||||
{
|
||||
string newPath = path;
|
||||
size_t offset = newPath.find_last_of(SEPARATOR);
|
||||
if (removePreSeparator &&
|
||||
#ifndef _WIN32
|
||||
offset != newPath.find_first_of(SEPARATOR) &&
|
||||
#endif
|
||||
offset == newPath.length() - 1) {
|
||||
newPath = newPath.substr(0, offset);
|
||||
offset = newPath.find_last_of(SEPARATOR);
|
||||
}
|
||||
newPath = removePostSeparator ? newPath.substr(0, offset)
|
||||
: newPath.substr(0, offset + 1);
|
||||
return newPath;
|
||||
}
|
||||
|
||||
string appendToDirectory(const string& directoryPath, const string& filename)
|
||||
{
|
||||
string newPath = directoryPath + SEPARATOR + filename;
|
||||
return newPath;
|
||||
}
|
||||
|
||||
string getLastPathElement(const string& path)
|
||||
{
|
||||
return path.substr(path.find_last_of(SEPARATOR) + 1);
|
||||
}
|
||||
|
||||
unsigned int getFileSize(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
struct _stat filestatus;
|
||||
_stat(path.c_str(), &filestatus);
|
||||
#else
|
||||
struct stat filestatus;
|
||||
stat(path.c_str(), &filestatus);
|
||||
#endif
|
||||
|
||||
return filestatus.st_size / 1024;
|
||||
}
|
||||
|
||||
string getFileSizeAsString(const string& path)
|
||||
{
|
||||
ostringstream convert;
|
||||
convert << getFileSize(path);
|
||||
return convert.str();
|
||||
}
|
||||
|
||||
string getFileContent(const string& path)
|
||||
{
|
||||
std::ifstream f(path, std::ios::in|std::ios::ate);
|
||||
std::string content;
|
||||
if (f.is_open()) {
|
||||
auto size = f.tellg();
|
||||
content.reserve(size);
|
||||
f.seekg(0, std::ios::beg);
|
||||
content.assign((std::istreambuf_iterator<char>(f)),
|
||||
std::istreambuf_iterator<char>());
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
bool fileExists(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return PathFileExists(path.c_str());
|
||||
#else
|
||||
bool flag = false;
|
||||
fstream fin;
|
||||
fin.open(path.c_str(), ios::in);
|
||||
if (fin.is_open()) {
|
||||
flag = true;
|
||||
}
|
||||
fin.close();
|
||||
return flag;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool makeDirectory(const string& path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
int status = _mkdir(path.c_str());
|
||||
#else
|
||||
int status = mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
|
||||
#endif
|
||||
return status == 0;
|
||||
}
|
||||
|
||||
string makeTmpDirectory()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
char cbase[MAX_PATH+1];
|
||||
int base_len = GetTempPath(MAX_PATH+1, cbase);
|
||||
UUID uuid;
|
||||
UuidCreate(&uuid);
|
||||
char* dir_name;
|
||||
UuidToString(&uuid, reinterpret_cast<unsigned char**>(&dir_name));
|
||||
string dir(cbase, base_len);
|
||||
dir += dir_name;
|
||||
_mkdir(dir.c_str());
|
||||
RpcStringFree(reinterpret_cast<unsigned char**>(&dir_name));
|
||||
#else
|
||||
string base = "/tmp";
|
||||
auto _template = base + "/kiwix-lib_XXXXXX";
|
||||
char* _template_array = new char[_template.size()+1];
|
||||
memcpy(_template_array, _template.c_str(), _template.size());
|
||||
string dir = mkdtemp(_template_array);
|
||||
delete[] _template_array;
|
||||
#endif
|
||||
return dir;
|
||||
}
|
||||
|
||||
/* Try to create a link and if does not work then make a copy */
|
||||
bool copyFile(const string& sourcePath, const string& destPath)
|
||||
{
|
||||
try {
|
||||
#ifndef _WIN32
|
||||
if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
|
||||
#endif
|
||||
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
|
||||
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
|
||||
outfile << infile.rdbuf();
|
||||
#ifndef _WIN32
|
||||
}
|
||||
#endif
|
||||
} catch (exception& e) {
|
||||
cerr << e.what() << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
string getExecutablePath()
|
||||
{
|
||||
char binRootPath[PATH_MAX];
|
||||
|
||||
#ifdef _WIN32
|
||||
GetModuleFileName(NULL, binRootPath, PATH_MAX);
|
||||
return std::string(binRootPath);
|
||||
#elif __APPLE__
|
||||
uint32_t max = (uint32_t)PATH_MAX;
|
||||
_NSGetExecutablePath(binRootPath, &max);
|
||||
return std::string(binRootPath);
|
||||
#else
|
||||
ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
|
||||
if (size != -1) {
|
||||
return std::string(binRootPath, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
bool writeTextFile(const string& path, const string& content)
|
||||
{
|
||||
std::ofstream file;
|
||||
file.open(path.c_str());
|
||||
file << content;
|
||||
file.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
string getCurrentDirectory()
|
||||
{
|
||||
char* a_cwd = getcwd(NULL, 0);
|
||||
string s_cwd(a_cwd);
|
||||
free(a_cwd);
|
||||
return s_cwd;
|
||||
}
|
||||
|
||||
string getDataDirectory()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
char* cDataDir = ::getenv("APPDATA");
|
||||
#else
|
||||
char* cDataDir = ::getenv("KIWIX_DATA_DIR");
|
||||
#endif
|
||||
std::string dataDir = cDataDir==nullptr ? "" : cDataDir;
|
||||
if (!dataDir.empty())
|
||||
return dataDir;
|
||||
#ifdef _WIN32
|
||||
cDataDir = ::getenv("USERPROFILE");
|
||||
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
|
||||
#else
|
||||
cDataDir = ::getenv("XDG_DATA_HOME");
|
||||
dataDir = cDataDir==nullptr ? "" : cDataDir;
|
||||
if (dataDir.empty()) {
|
||||
cDataDir = ::getenv("HOME");
|
||||
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
|
||||
dataDir = appendToDirectory(dataDir, ".local");
|
||||
dataDir = appendToDirectory(dataDir, "share");
|
||||
}
|
||||
#endif
|
||||
return appendToDirectory(dataDir, "kiwix");
|
||||
}
|
93
src/tools/regexTools.cpp
Normal file
93
src/tools/regexTools.cpp
Normal file
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <tools/regexTools.h>
|
||||
|
||||
std::map<std::string, icu::RegexMatcher*> regexCache;
|
||||
|
||||
icu::RegexMatcher* buildRegex(const std::string& regex)
|
||||
{
|
||||
icu::RegexMatcher* matcher;
|
||||
auto itr = regexCache.find(regex);
|
||||
|
||||
/* Regex is in cache */
|
||||
if (itr != regexCache.end()) {
|
||||
matcher = itr->second;
|
||||
}
|
||||
|
||||
/* Regex needs to be parsed (and cached) */
|
||||
else {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
icu::UnicodeString uregex(regex.c_str());
|
||||
matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
|
||||
regexCache[regex] = matcher;
|
||||
}
|
||||
|
||||
return matcher;
|
||||
}
|
||||
|
||||
/* todo */
|
||||
void freeRegexCache()
|
||||
{
|
||||
}
|
||||
bool matchRegex(const std::string& content, const std::string& regex)
|
||||
{
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
icu::UnicodeString ucontent(content.c_str());
|
||||
auto matcher = buildRegex(regex);
|
||||
matcher->reset(ucontent);
|
||||
return matcher->find();
|
||||
}
|
||||
|
||||
std::string replaceRegex(const std::string& content,
|
||||
const std::string& replacement,
|
||||
const std::string& regex)
|
||||
{
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
icu::UnicodeString ucontent(content.c_str());
|
||||
icu::UnicodeString ureplacement(replacement.c_str());
|
||||
auto matcher = buildRegex(regex);
|
||||
matcher->reset(ucontent);
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
auto uresult = matcher->replaceAll(ureplacement, status);
|
||||
std::string tmp;
|
||||
uresult.toUTF8String(tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
std::string appendToFirstOccurence(const std::string& content,
|
||||
const std::string regex,
|
||||
const std::string& replacement)
|
||||
{
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
icu::UnicodeString ucontent(content.c_str());
|
||||
icu::UnicodeString ureplacement(replacement.c_str());
|
||||
auto matcher = buildRegex(regex);
|
||||
matcher->reset(ucontent);
|
||||
|
||||
if (matcher->find()) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ucontent.insert(matcher->end(status), ureplacement);
|
||||
std::string tmp;
|
||||
ucontent.toUTF8String(tmp);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
374
src/tools/stringTools.cpp
Normal file
374
src/tools/stringTools.cpp
Normal file
@ -0,0 +1,374 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <tools/stringTools.h>
|
||||
|
||||
#include <unicode/normlzr.h>
|
||||
#include <unicode/rep.h>
|
||||
#include <unicode/translit.h>
|
||||
#include <unicode/ucnv.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/ustring.h>
|
||||
|
||||
/* tell ICU where to find its dat file (tables) */
|
||||
void kiwix::loadICUExternalTables()
|
||||
{
|
||||
#ifdef __APPLE__
|
||||
std::string executablePath = getExecutablePath();
|
||||
std::string executableDirectory = removeLastPathElement(executablePath);
|
||||
std::string datPath
|
||||
= computeAbsolutePath(executableDirectory, "icudt58l.dat");
|
||||
try {
|
||||
u_setDataDirectory(datPath.c_str());
|
||||
} catch (exception& e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string kiwix::removeAccents(const std::string& text)
|
||||
{
|
||||
loadICUExternalTables();
|
||||
ucnv_setDefaultName("UTF-8");
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
auto removeAccentsTrans = icu::Transliterator::createInstance(
|
||||
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
|
||||
icu::UnicodeString ustring(text.c_str());
|
||||
removeAccentsTrans->transliterate(ustring);
|
||||
delete removeAccentsTrans;
|
||||
std::string unaccentedText;
|
||||
ustring.toUTF8String(unaccentedText);
|
||||
return unaccentedText;
|
||||
}
|
||||
|
||||
/* Prepare integer for display */
|
||||
std::string kiwix::beautifyInteger(uint64_t number)
|
||||
{
|
||||
std::stringstream numberStream;
|
||||
numberStream << number;
|
||||
std::string numberString = numberStream.str();
|
||||
|
||||
signed int offset = numberString.size() - 3;
|
||||
while (offset > 0) {
|
||||
numberString.insert(offset, ",");
|
||||
offset -= 3;
|
||||
}
|
||||
|
||||
return numberString;
|
||||
}
|
||||
|
||||
std::string kiwix::beautifyFileSize(uint64_t number)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << std::fixed << std::setprecision(2);
|
||||
if (number>>30)
|
||||
ss << (number/(1024.0*1024*1024)) << " GB";
|
||||
else if (number>>20)
|
||||
ss << (number/(1024.0*1024)) << " MB";
|
||||
else if (number>>10)
|
||||
ss << (number/1024.0) << " KB";
|
||||
else
|
||||
ss << number << " B";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
void kiwix::printStringInHexadecimal(icu::UnicodeString s)
|
||||
{
|
||||
std::cout << std::showbase << std::hex;
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = (char)((s.getTerminatedBuffer())[i]);
|
||||
if (c & 0x80) {
|
||||
std::cout << (c & 0xffff) << " ";
|
||||
} else {
|
||||
std::cout << c << " ";
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void kiwix::printStringInHexadecimal(const char* s)
|
||||
{
|
||||
std::cout << std::showbase << std::hex;
|
||||
for (char const* pc = s; *pc; ++pc) {
|
||||
if (*pc & 0x80) {
|
||||
std::cout << (*pc & 0xffff);
|
||||
} else {
|
||||
std::cout << *pc;
|
||||
}
|
||||
std::cout << ' ';
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void kiwix::stringReplacement(std::string& str,
|
||||
const std::string& oldStr,
|
||||
const std::string& newStr)
|
||||
{
|
||||
size_t pos = 0;
|
||||
while ((pos = str.find(oldStr, pos)) != std::string::npos) {
|
||||
str.replace(pos, oldStr.length(), newStr);
|
||||
pos += newStr.length();
|
||||
}
|
||||
}
|
||||
|
||||
/* Encode string to avoid XSS attacks */
|
||||
std::string kiwix::encodeDiples(const std::string& str)
|
||||
{
|
||||
std::string result = str;
|
||||
kiwix::stringReplacement(result, "<", "<");
|
||||
kiwix::stringReplacement(result, ">", ">");
|
||||
return result;
|
||||
}
|
||||
|
||||
/* urlEncode() based on javascript encodeURI() &
|
||||
encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
|
||||
|
||||
bool isReservedUrlChar(char c)
|
||||
{
|
||||
switch (c) {
|
||||
case ';':
|
||||
case ',':
|
||||
case '/':
|
||||
case '?':
|
||||
case ':':
|
||||
case '@':
|
||||
case '&':
|
||||
case '=':
|
||||
case '+':
|
||||
case '$':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool needsEscape(char c, bool encodeReserved)
|
||||
{
|
||||
if (c >= 'a' && c <= 'z')
|
||||
return false;
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
return false;
|
||||
if (c >= '0' && c <= '9')
|
||||
return false;
|
||||
if (isReservedUrlChar(c))
|
||||
return encodeReserved;
|
||||
switch (c) {
|
||||
case '-':
|
||||
case '_':
|
||||
case '.':
|
||||
case '!':
|
||||
case '~':
|
||||
case '*':
|
||||
case '\'':
|
||||
case '(':
|
||||
case ')':
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int hexToInt(char c) {
|
||||
switch (c) {
|
||||
case '0': return 0;
|
||||
case '1': return 1;
|
||||
case '2': return 2;
|
||||
case '3': return 3;
|
||||
case '4': return 4;
|
||||
case '5': return 5;
|
||||
case '6': return 6;
|
||||
case '7': return 7;
|
||||
case '8': return 8;
|
||||
case '9': return 9;
|
||||
case 'A': case 'a': return 10;
|
||||
case 'B': case 'b': return 11;
|
||||
case 'C': case 'c': return 12;
|
||||
case 'D': case 'd': return 13;
|
||||
case 'E': case 'e': return 14;
|
||||
case 'F': case 'f': return 15;
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << std::hex << std::uppercase;
|
||||
for (std::string::const_iterator it = value.begin();
|
||||
it != value.end();
|
||||
it++) {
|
||||
|
||||
if (!needsEscape(*it, encodeReserved)) {
|
||||
os << *it;
|
||||
} else {
|
||||
os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
|
||||
}
|
||||
}
|
||||
return os.str();
|
||||
}
|
||||
|
||||
std::string kiwix::urlDecode(const std::string& value, bool component)
|
||||
{
|
||||
std::ostringstream os;
|
||||
for (std::string::const_iterator it = value.begin();
|
||||
it != value.end();
|
||||
it++) {
|
||||
|
||||
// If there aren't enough characters left for this to be a
|
||||
// valid escape code, just use the character and move on
|
||||
if (it > value.end() - 3) {
|
||||
os << *it;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*it == '%') {
|
||||
char hi = *(++it);
|
||||
char lo = *(++it);
|
||||
int iHi = hexToInt(hi);
|
||||
int iLo = hexToInt(lo);
|
||||
if (iHi < 0 || iLo < 0) {
|
||||
// Invalid escape sequence
|
||||
os << '%' << hi << lo;
|
||||
continue;
|
||||
}
|
||||
char c = (char)(iHi << 4 | iLo);
|
||||
if (!component && isReservedUrlChar(c)) {
|
||||
os << '%' << hi << lo;
|
||||
} else {
|
||||
os << c;
|
||||
}
|
||||
} else {
|
||||
os << *it;
|
||||
}
|
||||
}
|
||||
|
||||
return os.str();
|
||||
}
|
||||
|
||||
/* Split string in a token array */
|
||||
std::vector<std::string> kiwix::split(const std::string& str,
|
||||
const std::string& delims = " *-")
|
||||
{
|
||||
std::string::size_type lastPos = str.find_first_not_of(delims, 0);
|
||||
std::string::size_type pos = str.find_first_of(delims, lastPos);
|
||||
std::vector<std::string> tokens;
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
||||
lastPos = str.find_first_not_of(delims, pos);
|
||||
pos = str.find_first_of(delims, lastPos);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs)
|
||||
{
|
||||
const std::string m1(lhs), m2(rhs);
|
||||
return split(m1, m2);
|
||||
}
|
||||
|
||||
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs)
|
||||
{
|
||||
return split(lhs, rhs.c_str());
|
||||
}
|
||||
|
||||
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
|
||||
{
|
||||
return split(lhs.c_str(), rhs);
|
||||
}
|
||||
|
||||
std::string kiwix::ucFirst(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toUpper();
|
||||
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||
unicodeWord.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::ucAll(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
unicodeWord.toUpper().toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::lcFirst(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toLower();
|
||||
unicodeWord.replace(0, 1, unicodeFirstLetter);
|
||||
unicodeWord.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::lcAll(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
unicodeWord.toLower().toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::toTitle(const std::string& word)
|
||||
{
|
||||
if (word.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string result;
|
||||
|
||||
icu::UnicodeString unicodeWord(word.c_str());
|
||||
unicodeWord = unicodeWord.toTitle(0);
|
||||
unicodeWord.toUTF8String(result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string kiwix::normalize(const std::string& word)
|
||||
{
|
||||
return kiwix::lcAll(word);
|
||||
}
|
Reference in New Issue
Block a user