[API break] Move all the tools in the tools directory instead of common.

The `common` name is from the time where kiwix was only one repository
for all the project (android, desktop, server...).

Now we have split the repositories and kiwix-lib is the "common" repo,
the "common" directory is somehow nonsense.
This commit is contained in:
Matthieu Gautier
2018-11-28 14:15:49 +01:00
parent ecb2a80baf
commit af7689e3e8
26 changed files with 42 additions and 43 deletions

124
src/tools/base64.cpp Normal file
View File

@ -0,0 +1,124 @@
/*
base64.cpp and base64.h
Copyright (C) 2004-2008 René Nyffenegger
This source code is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this source code must not be misrepresented; you must not
claim that you wrote the original source code. If you use this source code
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original source code.
3. This notice may not be removed or altered from any source distribution.
René Nyffenegger rene.nyffenegger@adp-gmbh.ch
*/
#include <tools/base64.h>
#include <iostream>
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(unsigned char c) {
return (isalnum(c) || (c == '+') || (c == '/'));
}
std::string base64_encode(const std::string& inString) {
std::string ret;
auto in_len = inString.size();
const unsigned char* bytes_to_encode = reinterpret_cast<const unsigned char*>(inString.data());
int i = 0;
int j = 0;
unsigned char char_array_3[3];
unsigned char char_array_4[4];
while (in_len--) {
char_array_3[i++] = *(bytes_to_encode++);
if (i == 3) {
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for(i = 0; (i <4) ; i++)
ret += base64_chars[char_array_4[i]];
i = 0;
}
}
if (i)
{
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
char_array_4[3] = char_array_3[2] & 0x3f;
for (j = 0; (j < i + 1); j++)
ret += base64_chars[char_array_4[j]];
while((i++ < 3))
ret += '=';
}
return ret;
}
std::string base64_decode(std::string const& encoded_string) {
int in_len = encoded_string.size();
int i = 0;
int j = 0;
int in_ = 0;
unsigned char char_array_4[4], char_array_3[3];
std::string ret;
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
ret += char_array_3[i];
i = 0;
}
}
if (i) {
for (j = i; j <4; j++)
char_array_4[j] = 0;
for (j = 0; j <4; j++)
char_array_4[j] = base64_chars.find(char_array_4[j]);
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
}
return ret;
}

209
src/tools/networkTools.cpp Normal file
View File

@ -0,0 +1,209 @@
/*
* Copyright 2012 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/networkTools.h>
#ifdef _WIN32
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <net/if.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
#endif
#include <curl/curl.h>
#include <sstream>
#include <iostream>
std::map<std::string, std::string> kiwix::getNetworkInterfaces()
{
std::map<std::string, std::string> interfaces;
#ifdef _WIN32
SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
if (sd == (SOCKET)SOCKET_ERROR) {
std::cerr << "Failed to get a socket. Error " << WSAGetLastError()
<< std::endl;
return interfaces;
}
INTERFACE_INFO InterfaceList[20];
unsigned long nBytesReturned;
if (WSAIoctl(sd,
SIO_GET_INTERFACE_LIST,
0,
0,
&InterfaceList,
sizeof(InterfaceList),
&nBytesReturned,
0,
0)
== SOCKET_ERROR) {
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError()
<< std::endl;
return interfaces;
}
int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
for (int i = 0; i < nNumInterfaces; ++i) {
sockaddr_in* pAddress;
pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress);
/* Add to the map */
std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr));
interfaces.insert(
std::pair<std::string, std::string>(interfaceName, interfaceIp));
}
#else
/* Get Network interfaces information */
char buf[16384];
struct ifconf ifconf;
int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
ifconf.ifc_len = sizeof buf;
ifconf.ifc_buf = buf;
if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) {
perror("ioctl(SIOCGIFCONF)");
exit(EXIT_FAILURE);
}
/* Go through each interface */
int i;
size_t len;
struct ifreq* ifreq;
ifreq = ifconf.ifc_req;
for (i = 0; i < ifconf.ifc_len;) {
if (ifreq->ifr_addr.sa_family == AF_INET) {
/* Get the network interface ip */
char host[128] = {0};
const int error = getnameinfo(&(ifreq->ifr_addr),
sizeof ifreq->ifr_addr,
host,
sizeof host,
0,
0,
NI_NUMERICHOST);
if (!error) {
std::string interfaceName = std::string(ifreq->ifr_name);
std::string interfaceIp = std::string(host);
/* Add to the map */
interfaces.insert(
std::pair<std::string, std::string>(interfaceName, interfaceIp));
} else {
perror("getnameinfo()");
}
}
/* some systems have ifr_addr.sa_len and adjust the length that
* way, but not mine. weird */
#ifndef __linux__
len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
#else
len = sizeof *ifreq;
#endif
ifreq = (struct ifreq*)((char*)ifreq + len);
i += len;
}
#endif
return interfaces;
}
std::string kiwix::getBestPublicIp()
{
std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces();
#ifndef _WIN32
const char* const prioritizedNames[]
= {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"};
const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]);
for (int i = 0; i < count; ++i) {
std::map<std::string, std::string>::const_iterator it
= interfaces.find(prioritizedNames[i]);
if (it != interfaces.end()) {
return it->second;
}
}
#endif
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") {
return interfaceIp;
}
}
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") {
return interfaceIp;
}
}
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") {
return interfaceIp;
}
}
return "127.0.0.1";
}
size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata)
{
auto str = static_cast<std::stringstream*>(userdata);
str->write(ptr, nmemb);
return nmemb;
}
std::string kiwix::download(const std::string& url) {
auto curl = curl_easy_init();
std::stringstream ss;
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_HTTPGET, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ss);
auto res = curl_easy_perform(curl);
if (res != CURLE_OK) {
curl_easy_cleanup(curl);
throw std::runtime_error("Cannot perform request");
}
long response_code;
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);
curl_easy_cleanup(curl);
if (response_code != 200) {
throw std::runtime_error("Invalid return code from server");
}
return ss.str();
}

326
src/tools/otherTools.cpp Normal file
View File

@ -0,0 +1,326 @@
/*
* Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/otherTools.h>
#include <map>
static std::map<std::string, std::string> codeisomapping {
//a
{ "ad", "and" },
{ "ae", "are" },
{ "af", "afg" },
{ "ag", "atg" },
{ "ai", "aia" },
{ "al", "alb" },
{ "am", "arm" },
{ "an", "ant" },
{ "ao", "ago" },
{ "aq", "ata" },
{ "ar", "arg" },
{ "as", "asm" },
{ "at", "aut" },
{ "au", "aus" },
{ "aw", "abw" },
{ "ax", "ala" },
{ "az", "aze" },
//b
{ "ba", "bih" },
{ "bb", "brb" },
{ "bd", "bgd" },
{ "be", "bel" },
{ "bf", "bfa" },
{ "bg", "bgr" },
{ "bh", "bhr" },
{ "bi", "bdi" },
{ "bj", "ben" },
{ "bl", "blm" },
{ "bn", "brn" },
{ "bm", "bmu" },
{ "bo", "bol" },
{ "br", "bra" },
{ "bs", "bhs" },
{ "bt", "btn" },
{ "bv", "bvt" },
{ "bw", "bwa" },
{ "by", "blr" },
{ "bz", "blz" },
//c
{ "ca", "can" },
{ "cc", "cck" },
{ "cd", "cod" },
{ "cf", "caf" },
{ "cg", "cog" },
{ "ch", "che" },
{ "ci", "civ" },
{ "ck", "cok" },
{ "cl", "chl" },
{ "cm", "cmr" },
{ "cn", "chn" },
{ "co", "col" },
{ "cr", "cri" },
{ "cu", "cub" },
{ "cv", "cpv" },
{ "cx", "cxr" },
{ "cy", "cyp" },
{ "cz", "cze" },
//d
{ "de", "deu" },
{ "dj", "dji" },
{ "dk", "dnk" },
{ "dm", "dma" },
{ "do", "dom" },
{ "dz", "dza" },
//e
{ "ec", "ecu" },
{ "ee", "est" },
{ "eg", "egy" },
{ "eh", "esh" },
{ "en", "eng" },
{ "er", "eri" },
{ "es", "esp" },
{ "et", "eth" },
//f
{ "fi", "fin" },
{ "fj", "fji" },
{ "fk", "flk" },
{ "fm", "fsm" },
{ "fo", "fro" },
{ "fr", "fra" },
//g
{ "ga", "gab" },
{ "gb", "gbr" },
{ "gd", "grd" },
{ "ge", "geo" },
{ "gf", "guf" },
{ "gg", "ggy" },
{ "gh", "gha" },
{ "gi", "gib" },
{ "gl", "grl" },
{ "gm", "gmb" },
{ "gn", "gin" },
{ "gp", "glp" },
{ "gq", "gnq" },
{ "gr", "grc" },
{ "gs", "sgs" },
{ "gt", "gtm" },
{ "gu", "gum" },
{ "gw", "gnb" },
{ "gy", "guy" },
//h
{ "hk", "hkg" },
{ "hm", "hmd" },
{ "hn", "hnd" },
{ "hr", "hrv" },
{ "ht", "hti" },
{ "hu", "hun" },
//i
{ "id", "idn" },
{ "ie", "irl" },
{ "il", "isr" },
{ "im", "imn" },
{ "in", "ind" },
{ "io", "iot" },
{ "iq", "irq" },
{ "ir", "irn" },
{ "is", "isl" },
{ "it", "ita" },
//j
{ "je", "jey" },
{ "jm", "jam" },
{ "jo", "jor" },
{ "jp", "jpn" },
//k
{ "ke", "ken" },
{ "kg", "kgz" },
{ "kh", "khm" },
{ "ki", "kir" },
{ "km", "com" },
{ "kn", "kna" },
{ "kp", "prk" },
{ "kr", "kor" },
{ "kw", "kwt" },
{ "ky", "cym" },
{ "kz", "kaz" },
//l
{ "la", "lao" },
{ "lb", "lbn" },
{ "lc", "lca" },
{ "li", "lie" },
{ "lk", "lka" },
{ "lr", "lbr" },
{ "ls", "lso" },
{ "lt", "ltu" },
{ "lu", "lux" },
{ "lv", "lva" },
{ "ly", "lby" },
//m
{ "ma", "mar" },
{ "mc", "mco" },
{ "md", "mda" },
{ "me", "mne" },
{ "mf", "maf" },
{ "mg", "mdg" },
{ "mh", "mhl" },
{ "mk", "mkd" },
{ "ml", "mli" },
{ "mm", "mmr" },
{ "mn", "mng" },
{ "mo", "mac" },
{ "mp", "mnp" },
{ "mq", "mtq" },
{ "mr", "mrt" },
{ "ms", "msr" },
{ "mt", "mlt" },
{ "mu", "mus" },
{ "mv", "mdv" },
{ "mw", "mwi" },
{ "mx", "mex" },
{ "my", "mys" },
{ "mz", "moz" },
//n
{ "na", "nam" },
{ "nc", "ncl" },
{ "ne", "ner" },
{ "nf", "nfk" },
{ "ng", "nga" },
{ "ni", "nic" },
{ "nl", "nld" },
{ "no", "nor" },
{ "np", "npl" },
{ "nr", "nru" },
{ "nu", "niu" },
{ "nz", "nzl" },
//o
{ "om", "omn" },
//p
{ "pa", "pan" },
{ "pe", "per" },
{ "pf", "pyf" },
{ "pg", "png" },
{ "ph", "phl" },
{ "pk", "pak" },
{ "pl", "pol" },
{ "pm", "spm" },
{ "pn", "pcn" },
{ "pr", "pri" },
{ "ps", "pse" },
{ "pt", "prt" },
{ "pw", "plw" },
{ "py", "pry" },
//q
{ "qa", "qat" },
//r
{ "re", "reu" },
{ "ro", "rou" },
{ "rs", "srb" },
{ "ru", "rus" },
{ "rw", "rwa" },
//s
{ "sa", "sau" },
{ "sb", "slb" },
{ "sc", "syc" },
{ "sd", "sdn" },
{ "se", "swe" },
{ "sg", "sgp" },
{ "sh", "shn" },
{ "si", "svn" },
{ "sj", "sjm" },
{ "sk", "svk" },
{ "sl", "sle" },
{ "sm", "smr" },
{ "sn", "sen" },
{ "so", "som" },
{ "sr", "sur" },
{ "ss", "ssd" },
{ "st", "stp" },
{ "sv", "slv" },
{ "sy", "syr" },
{ "sz", "swz" },
//t
{ "tc", "tca" },
{ "td", "tcd" },
{ "tf", "atf" },
{ "tg", "tgo" },
{ "th", "tha" },
{ "tj", "tjk" },
{ "tk", "tkl" },
{ "tl", "tls" },
{ "tm", "tkm" },
{ "tn", "tun" },
{ "to", "ton" },
{ "tr", "tur" },
{ "tt", "tto" },
{ "tv", "tuv" },
{ "tw", "twn" },
{ "tz", "tza" },
//u
{ "ua", "ukr" },
{ "ug", "uga" },
{ "um", "umi" },
{ "us", "usa" },
{ "uy", "ury" },
{ "uz", "uzb" },
//v
{ "va", "vat" },
{ "vc", "vct" },
{ "ve", "ven" },
{ "vg", "vgb" },
{ "vi", "vir" },
{ "vn", "vnm" },
{ "vu", "vut" },
//w
{ "wf", "wlf" },
{ "ws", "wsm" },
//y
{ "ye", "yem" },
{ "yt", "myt" },
// z
{ "za", "zaf" },
{ "zm", "zmb" },
{ "zw", "zwe" }
};
void kiwix::sleep(unsigned int milliseconds)
{
#ifdef _WIN32
Sleep(milliseconds);
#else
usleep(1000 * milliseconds);
#endif
}
struct XmlStringWriter: pugi::xml_writer
{
std::string result;
virtual void write(const void* data, size_t size){
result.append(static_cast<const char*>(data), size);
}
};
std::string kiwix::nodeToString(pugi::xml_node node)
{
XmlStringWriter writer;
node.print(writer, " ");
return writer.result;
}
std::string kiwix::converta2toa3(const std::string& a2code){
return codeisomapping.at(a2code);
}

335
src/tools/pathTools.cpp Normal file
View File

@ -0,0 +1,335 @@
/*
* Copyright 2011-2014 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/pathTools.h>
#ifdef __APPLE__
#include <limits.h>
#include <mach-o/dyld.h>
#elif _WIN32
#include <direct.h>
#include <windows.h>
#include "shlwapi.h"
#define getcwd _getcwd // stupid MSFT "deprecation" warning
#endif
#ifdef _WIN32
#else
#include <unistd.h>
#endif
#ifdef _WIN32
const std::string SEPARATOR("\\");
#else
const std::string SEPARATOR("/");
#include <unistd.h>
#endif
#include <stdlib.h>
#ifndef PATH_MAX
#define PATH_MAX 1024
#endif
bool isRelativePath(const string& path)
{
#ifdef _WIN32
return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
#else
return path.empty() || path.substr(0, 1) == "/" ? false : true;
#endif
}
string computeRelativePath(const string path, const string absolutePath)
{
std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR);
std::vector<std::string> absolutePathParts
= kiwix::split(absolutePath, SEPARATOR);
unsigned int commonCount = 0;
while (commonCount < pathParts.size()
&& commonCount < absolutePathParts.size()
&& pathParts[commonCount] == absolutePathParts[commonCount]) {
commonCount++;
}
string relativePath;
#ifdef _WIN32
/* On Windows you have a token more because the root is represented
by a letter */
if (commonCount == 0) {
relativePath = ".." + SEPARATOR;
}
#endif
for (unsigned int i = commonCount; i < pathParts.size(); i++) {
relativePath += ".." + SEPARATOR;
}
for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) {
relativePath += absolutePathParts[i];
relativePath += i + 1 < absolutePathParts.size() ? SEPARATOR : "";
}
return relativePath;
}
/* Warning: the relative path must be with slashes */
string computeAbsolutePath(const string path, const string relativePath)
{
string absolutePath;
if (path.empty()) {
char* path = NULL;
size_t size = 0;
#ifdef _WIN32
path = _getcwd(path, size);
#else
path = getcwd(path, size);
#endif
absolutePath = string(path) + SEPARATOR;
} else {
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR
? path
: path + SEPARATOR;
}
#if _WIN32
char* cRelativePath = _strdup(relativePath.c_str());
#else
char* cRelativePath = strdup(relativePath.c_str());
#endif
char* token = strtok(cRelativePath, "/");
while (token != NULL) {
if (string(token) == "..") {
absolutePath = removeLastPathElement(absolutePath, true, false);
token = strtok(NULL, "/");
} else if (strcmp(token, ".") && strcmp(token, "")) {
absolutePath += string(token);
token = strtok(NULL, "/");
if (token != NULL) {
absolutePath += SEPARATOR;
}
} else {
token = strtok(NULL, "/");
}
}
return absolutePath;
}
string removeLastPathElement(const string path,
const bool removePreSeparator,
const bool removePostSeparator)
{
string newPath = path;
size_t offset = newPath.find_last_of(SEPARATOR);
if (removePreSeparator &&
#ifndef _WIN32
offset != newPath.find_first_of(SEPARATOR) &&
#endif
offset == newPath.length() - 1) {
newPath = newPath.substr(0, offset);
offset = newPath.find_last_of(SEPARATOR);
}
newPath = removePostSeparator ? newPath.substr(0, offset)
: newPath.substr(0, offset + 1);
return newPath;
}
string appendToDirectory(const string& directoryPath, const string& filename)
{
string newPath = directoryPath + SEPARATOR + filename;
return newPath;
}
string getLastPathElement(const string& path)
{
return path.substr(path.find_last_of(SEPARATOR) + 1);
}
unsigned int getFileSize(const string& path)
{
#ifdef _WIN32
struct _stat filestatus;
_stat(path.c_str(), &filestatus);
#else
struct stat filestatus;
stat(path.c_str(), &filestatus);
#endif
return filestatus.st_size / 1024;
}
string getFileSizeAsString(const string& path)
{
ostringstream convert;
convert << getFileSize(path);
return convert.str();
}
string getFileContent(const string& path)
{
std::ifstream f(path, std::ios::in|std::ios::ate);
std::string content;
if (f.is_open()) {
auto size = f.tellg();
content.reserve(size);
f.seekg(0, std::ios::beg);
content.assign((std::istreambuf_iterator<char>(f)),
std::istreambuf_iterator<char>());
}
return content;
}
bool fileExists(const string& path)
{
#ifdef _WIN32
return PathFileExists(path.c_str());
#else
bool flag = false;
fstream fin;
fin.open(path.c_str(), ios::in);
if (fin.is_open()) {
flag = true;
}
fin.close();
return flag;
#endif
}
bool makeDirectory(const string& path)
{
#ifdef _WIN32
int status = _mkdir(path.c_str());
#else
int status = mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
#endif
return status == 0;
}
string makeTmpDirectory()
{
#ifdef _WIN32
char cbase[MAX_PATH+1];
int base_len = GetTempPath(MAX_PATH+1, cbase);
UUID uuid;
UuidCreate(&uuid);
char* dir_name;
UuidToString(&uuid, reinterpret_cast<unsigned char**>(&dir_name));
string dir(cbase, base_len);
dir += dir_name;
_mkdir(dir.c_str());
RpcStringFree(reinterpret_cast<unsigned char**>(&dir_name));
#else
string base = "/tmp";
auto _template = base + "/kiwix-lib_XXXXXX";
char* _template_array = new char[_template.size()+1];
memcpy(_template_array, _template.c_str(), _template.size());
string dir = mkdtemp(_template_array);
delete[] _template_array;
#endif
return dir;
}
/* Try to create a link and if does not work then make a copy */
bool copyFile(const string& sourcePath, const string& destPath)
{
try {
#ifndef _WIN32
if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
#endif
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
outfile << infile.rdbuf();
#ifndef _WIN32
}
#endif
} catch (exception& e) {
cerr << e.what() << endl;
return false;
}
return true;
}
string getExecutablePath()
{
char binRootPath[PATH_MAX];
#ifdef _WIN32
GetModuleFileName(NULL, binRootPath, PATH_MAX);
return std::string(binRootPath);
#elif __APPLE__
uint32_t max = (uint32_t)PATH_MAX;
_NSGetExecutablePath(binRootPath, &max);
return std::string(binRootPath);
#else
ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
if (size != -1) {
return std::string(binRootPath, size);
}
#endif
return "";
}
bool writeTextFile(const string& path, const string& content)
{
std::ofstream file;
file.open(path.c_str());
file << content;
file.close();
return true;
}
string getCurrentDirectory()
{
char* a_cwd = getcwd(NULL, 0);
string s_cwd(a_cwd);
free(a_cwd);
return s_cwd;
}
string getDataDirectory()
{
#ifdef _WIN32
char* cDataDir = ::getenv("APPDATA");
#else
char* cDataDir = ::getenv("KIWIX_DATA_DIR");
#endif
std::string dataDir = cDataDir==nullptr ? "" : cDataDir;
if (!dataDir.empty())
return dataDir;
#ifdef _WIN32
cDataDir = ::getenv("USERPROFILE");
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
#else
cDataDir = ::getenv("XDG_DATA_HOME");
dataDir = cDataDir==nullptr ? "" : cDataDir;
if (dataDir.empty()) {
cDataDir = ::getenv("HOME");
dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
dataDir = appendToDirectory(dataDir, ".local");
dataDir = appendToDirectory(dataDir, "share");
}
#endif
return appendToDirectory(dataDir, "kiwix");
}

93
src/tools/regexTools.cpp Normal file
View File

@ -0,0 +1,93 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/regexTools.h>
std::map<std::string, icu::RegexMatcher*> regexCache;
icu::RegexMatcher* buildRegex(const std::string& regex)
{
icu::RegexMatcher* matcher;
auto itr = regexCache.find(regex);
/* Regex is in cache */
if (itr != regexCache.end()) {
matcher = itr->second;
}
/* Regex needs to be parsed (and cached) */
else {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString uregex(regex.c_str());
matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
regexCache[regex] = matcher;
}
return matcher;
}
/* todo */
void freeRegexCache()
{
}
bool matchRegex(const std::string& content, const std::string& regex)
{
ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str());
auto matcher = buildRegex(regex);
matcher->reset(ucontent);
return matcher->find();
}
std::string replaceRegex(const std::string& content,
const std::string& replacement,
const std::string& regex)
{
ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str());
icu::UnicodeString ureplacement(replacement.c_str());
auto matcher = buildRegex(regex);
matcher->reset(ucontent);
UErrorCode status = U_ZERO_ERROR;
auto uresult = matcher->replaceAll(ureplacement, status);
std::string tmp;
uresult.toUTF8String(tmp);
return tmp;
}
std::string appendToFirstOccurence(const std::string& content,
const std::string regex,
const std::string& replacement)
{
ucnv_setDefaultName("UTF-8");
icu::UnicodeString ucontent(content.c_str());
icu::UnicodeString ureplacement(replacement.c_str());
auto matcher = buildRegex(regex);
matcher->reset(ucontent);
if (matcher->find()) {
UErrorCode status = U_ZERO_ERROR;
ucontent.insert(matcher->end(status), ureplacement);
std::string tmp;
ucontent.toUTF8String(tmp);
return tmp;
}
return content;
}

374
src/tools/stringTools.cpp Normal file
View File

@ -0,0 +1,374 @@
/*
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <tools/stringTools.h>
#include <unicode/normlzr.h>
#include <unicode/rep.h>
#include <unicode/translit.h>
#include <unicode/ucnv.h>
#include <unicode/uniset.h>
#include <unicode/ustring.h>
/* tell ICU where to find its dat file (tables) */
void kiwix::loadICUExternalTables()
{
#ifdef __APPLE__
std::string executablePath = getExecutablePath();
std::string executableDirectory = removeLastPathElement(executablePath);
std::string datPath
= computeAbsolutePath(executableDirectory, "icudt58l.dat");
try {
u_setDataDirectory(datPath.c_str());
} catch (exception& e) {
std::cerr << e.what() << std::endl;
}
#endif
}
std::string kiwix::removeAccents(const std::string& text)
{
loadICUExternalTables();
ucnv_setDefaultName("UTF-8");
UErrorCode status = U_ZERO_ERROR;
auto removeAccentsTrans = icu::Transliterator::createInstance(
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
icu::UnicodeString ustring(text.c_str());
removeAccentsTrans->transliterate(ustring);
delete removeAccentsTrans;
std::string unaccentedText;
ustring.toUTF8String(unaccentedText);
return unaccentedText;
}
/* Prepare integer for display */
std::string kiwix::beautifyInteger(uint64_t number)
{
std::stringstream numberStream;
numberStream << number;
std::string numberString = numberStream.str();
signed int offset = numberString.size() - 3;
while (offset > 0) {
numberString.insert(offset, ",");
offset -= 3;
}
return numberString;
}
std::string kiwix::beautifyFileSize(uint64_t number)
{
std::stringstream ss;
ss << std::fixed << std::setprecision(2);
if (number>>30)
ss << (number/(1024.0*1024*1024)) << " GB";
else if (number>>20)
ss << (number/(1024.0*1024)) << " MB";
else if (number>>10)
ss << (number/1024.0) << " KB";
else
ss << number << " B";
return ss.str();
}
void kiwix::printStringInHexadecimal(icu::UnicodeString s)
{
std::cout << std::showbase << std::hex;
for (int i = 0; i < s.length(); i++) {
char c = (char)((s.getTerminatedBuffer())[i]);
if (c & 0x80) {
std::cout << (c & 0xffff) << " ";
} else {
std::cout << c << " ";
}
}
std::cout << std::endl;
}
void kiwix::printStringInHexadecimal(const char* s)
{
std::cout << std::showbase << std::hex;
for (char const* pc = s; *pc; ++pc) {
if (*pc & 0x80) {
std::cout << (*pc & 0xffff);
} else {
std::cout << *pc;
}
std::cout << ' ';
}
std::cout << std::endl;
}
void kiwix::stringReplacement(std::string& str,
const std::string& oldStr,
const std::string& newStr)
{
size_t pos = 0;
while ((pos = str.find(oldStr, pos)) != std::string::npos) {
str.replace(pos, oldStr.length(), newStr);
pos += newStr.length();
}
}
/* Encode string to avoid XSS attacks */
std::string kiwix::encodeDiples(const std::string& str)
{
std::string result = str;
kiwix::stringReplacement(result, "<", "&lt;");
kiwix::stringReplacement(result, ">", "&gt;");
return result;
}
/* urlEncode() based on javascript encodeURI() &
encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
bool isReservedUrlChar(char c)
{
switch (c) {
case ';':
case ',':
case '/':
case '?':
case ':':
case '@':
case '&':
case '=':
case '+':
case '$':
return true;
default:
return false;
}
}
bool needsEscape(char c, bool encodeReserved)
{
if (c >= 'a' && c <= 'z')
return false;
if (c >= 'A' && c <= 'Z')
return false;
if (c >= '0' && c <= '9')
return false;
if (isReservedUrlChar(c))
return encodeReserved;
switch (c) {
case '-':
case '_':
case '.':
case '!':
case '~':
case '*':
case '\'':
case '(':
case ')':
return false;
}
return true;
}
int hexToInt(char c) {
switch (c) {
case '0': return 0;
case '1': return 1;
case '2': return 2;
case '3': return 3;
case '4': return 4;
case '5': return 5;
case '6': return 6;
case '7': return 7;
case '8': return 8;
case '9': return 9;
case 'A': case 'a': return 10;
case 'B': case 'b': return 11;
case 'C': case 'c': return 12;
case 'D': case 'd': return 13;
case 'E': case 'e': return 14;
case 'F': case 'f': return 15;
default: return -1;
}
}
std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
{
std::ostringstream os;
os << std::hex << std::uppercase;
for (std::string::const_iterator it = value.begin();
it != value.end();
it++) {
if (!needsEscape(*it, encodeReserved)) {
os << *it;
} else {
os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
}
}
return os.str();
}
std::string kiwix::urlDecode(const std::string& value, bool component)
{
std::ostringstream os;
for (std::string::const_iterator it = value.begin();
it != value.end();
it++) {
// If there aren't enough characters left for this to be a
// valid escape code, just use the character and move on
if (it > value.end() - 3) {
os << *it;
continue;
}
if (*it == '%') {
char hi = *(++it);
char lo = *(++it);
int iHi = hexToInt(hi);
int iLo = hexToInt(lo);
if (iHi < 0 || iLo < 0) {
// Invalid escape sequence
os << '%' << hi << lo;
continue;
}
char c = (char)(iHi << 4 | iLo);
if (!component && isReservedUrlChar(c)) {
os << '%' << hi << lo;
} else {
os << c;
}
} else {
os << *it;
}
}
return os.str();
}
/* Split string in a token array */
std::vector<std::string> kiwix::split(const std::string& str,
const std::string& delims = " *-")
{
std::string::size_type lastPos = str.find_first_not_of(delims, 0);
std::string::size_type pos = str.find_first_of(delims, lastPos);
std::vector<std::string> tokens;
while (std::string::npos != pos || std::string::npos != lastPos) {
tokens.push_back(str.substr(lastPos, pos - lastPos));
lastPos = str.find_first_not_of(delims, pos);
pos = str.find_first_of(delims, lastPos);
}
return tokens;
}
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs)
{
const std::string m1(lhs), m2(rhs);
return split(m1, m2);
}
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs)
{
return split(lhs, rhs.c_str());
}
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
{
return split(lhs.c_str(), rhs);
}
std::string kiwix::ucFirst(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
icu::UnicodeString unicodeWord(word.c_str());
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toUpper();
unicodeWord.replace(0, 1, unicodeFirstLetter);
unicodeWord.toUTF8String(result);
return result;
}
std::string kiwix::ucAll(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
icu::UnicodeString unicodeWord(word.c_str());
unicodeWord.toUpper().toUTF8String(result);
return result;
}
std::string kiwix::lcFirst(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
icu::UnicodeString unicodeWord(word.c_str());
auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toLower();
unicodeWord.replace(0, 1, unicodeFirstLetter);
unicodeWord.toUTF8String(result);
return result;
}
std::string kiwix::lcAll(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
icu::UnicodeString unicodeWord(word.c_str());
unicodeWord.toLower().toUTF8String(result);
return result;
}
std::string kiwix::toTitle(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
icu::UnicodeString unicodeWord(word.c_str());
unicodeWord = unicodeWord.toTitle(0);
unicodeWord.toUTF8String(result);
return result;
}
std::string kiwix::normalize(const std::string& word)
{
return kiwix::lcAll(word);
}