[API break] Move all the tools in the tools directory instead of common.

The `common` name is from the time where kiwix was only one repository for all the project (android, desktop, server...). Now we have split the repositories and kiwix-lib is the "common" repo, the "common" directory is somehow nonsense.
2025-06-27 21:39:37 +00:00 · 2018-11-28 14:15:49 +01:00
parent ecb2a80baf
commit af7689e3e8
26 changed files with 42 additions and 43 deletions
--- a/src/tools/base64.cpp
+++ b/src/tools/base64.cpp
@ -0,0 +1,124 @@
+/* 
+   base64.cpp and base64.h
+
+   Copyright (C) 2004-2008 René Nyffenegger
+
+   This source code is provided 'as-is', without any express or implied
+   warranty. In no event will the author be held liable for any damages
+   arising from the use of this software.
+
+   Permission is granted to anyone to use this software for any purpose,
+   including commercial applications, and to alter it and redistribute it
+   freely, subject to the following restrictions:
+
+   1. The origin of this source code must not be misrepresented; you must not
+      claim that you wrote the original source code. If you use this source code
+      in a product, an acknowledgment in the product documentation would be
+      appreciated but is not required.
+
+   2. Altered source versions must be plainly marked as such, and must not be
+      misrepresented as being the original source code.
+
+   3. This notice may not be removed or altered from any source distribution.
+
+   René Nyffenegger rene.nyffenegger@adp-gmbh.ch
+*/
+
+#include <tools/base64.h>
+#include <iostream>
+
+static const std::string base64_chars = 
+             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+             "abcdefghijklmnopqrstuvwxyz"
+  "0123456789+/";
+
+
+static inline bool is_base64(unsigned char c) {
+  return (isalnum(c) || (c == '+') || (c == '/'));
+}
+
+std::string base64_encode(const std::string& inString) {
+  std::string ret;
+  auto in_len = inString.size();
+  const unsigned char* bytes_to_encode = reinterpret_cast<const unsigned char*>(inString.data());
+  int i = 0;
+  int j = 0;
+  unsigned char char_array_3[3];
+  unsigned char char_array_4[4];
+
+  while (in_len--) {
+    char_array_3[i++] = *(bytes_to_encode++);
+    if (i == 3) {
+      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+      char_array_4[3] = char_array_3[2] & 0x3f;
+
+      for(i = 0; (i <4) ; i++)
+        ret += base64_chars[char_array_4[i]];
+      i = 0;
+    }
+  }
+
+  if (i)
+    {
+      for(j = i; j < 3; j++)
+	char_array_3[j] = '\0';
+
+      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
+      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
+      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
+      char_array_4[3] = char_array_3[2] & 0x3f;
+
+      for (j = 0; (j < i + 1); j++)
+	ret += base64_chars[char_array_4[j]];
+
+      while((i++ < 3))
+	ret += '=';
+
+    }
+
+  return ret;
+
+}
+
+std::string base64_decode(std::string const& encoded_string) {
+  int in_len = encoded_string.size();
+  int i = 0;
+  int j = 0;
+  int in_ = 0;
+  unsigned char char_array_4[4], char_array_3[3];
+  std::string ret;
+
+  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
+    char_array_4[i++] = encoded_string[in_]; in_++;
+    if (i ==4) {
+      for (i = 0; i <4; i++)
+        char_array_4[i] = base64_chars.find(char_array_4[i]);
+
+      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+      for (i = 0; (i < 3); i++)
+        ret += char_array_3[i];
+      i = 0;
+    }
+  }
+
+  if (i) {
+    for (j = i; j <4; j++)
+      char_array_4[j] = 0;
+
+    for (j = 0; j <4; j++)
+      char_array_4[j] = base64_chars.find(char_array_4[j]);
+
+    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
+    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
+    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
+
+    for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
+  }
+
+  return ret;
+}
--- a/src/tools/networkTools.cpp
+++ b/src/tools/networkTools.cpp
@ -0,0 +1,209 @@
+/*
+ * Copyright 2012 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <tools/networkTools.h>
+
+#ifdef _WIN32
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#else
+#include <net/if.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+#include <curl/curl.h>
+
+#include <sstream>
+#include <iostream>
+
+
+std::map<std::string, std::string> kiwix::getNetworkInterfaces()
+{
+  std::map<std::string, std::string> interfaces;
+
+#ifdef _WIN32
+  SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
+  if (sd == (SOCKET)SOCKET_ERROR) {
+    std::cerr << "Failed to get a socket. Error " << WSAGetLastError()
+              << std::endl;
+    return interfaces;
+  }
+
+  INTERFACE_INFO InterfaceList[20];
+  unsigned long nBytesReturned;
+  if (WSAIoctl(sd,
+               SIO_GET_INTERFACE_LIST,
+               0,
+               0,
+               &InterfaceList,
+               sizeof(InterfaceList),
+               &nBytesReturned,
+               0,
+               0)
+      == SOCKET_ERROR) {
+    std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError()
+              << std::endl;
+    return interfaces;
+  }
+
+  int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
+  for (int i = 0; i < nNumInterfaces; ++i) {
+    sockaddr_in* pAddress;
+    pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress);
+
+    /* Add to the map */
+    std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
+    std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr));
+    interfaces.insert(
+        std::pair<std::string, std::string>(interfaceName, interfaceIp));
+  }
+#else
+  /* Get Network interfaces information */
+  char buf[16384];
+  struct ifconf ifconf;
+  int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
+  ifconf.ifc_len = sizeof buf;
+  ifconf.ifc_buf = buf;
+  if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) {
+    perror("ioctl(SIOCGIFCONF)");
+    exit(EXIT_FAILURE);
+  }
+
+  /* Go through each interface */
+  int i;
+  size_t len;
+  struct ifreq* ifreq;
+  ifreq = ifconf.ifc_req;
+  for (i = 0; i < ifconf.ifc_len;) {
+    if (ifreq->ifr_addr.sa_family == AF_INET) {
+      /* Get the network interface ip */
+      char host[128] = {0};
+      const int error = getnameinfo(&(ifreq->ifr_addr),
+                                    sizeof ifreq->ifr_addr,
+                                    host,
+                                    sizeof host,
+                                    0,
+                                    0,
+                                    NI_NUMERICHOST);
+      if (!error) {
+        std::string interfaceName = std::string(ifreq->ifr_name);
+        std::string interfaceIp = std::string(host);
+        /* Add to the map */
+        interfaces.insert(
+            std::pair<std::string, std::string>(interfaceName, interfaceIp));
+      } else {
+        perror("getnameinfo()");
+      }
+    }
+
+/* some systems have ifr_addr.sa_len and adjust the length that
+ * way, but not mine. weird */
+#ifndef __linux__
+    len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
+#else
+    len = sizeof *ifreq;
+#endif
+    ifreq = (struct ifreq*)((char*)ifreq + len);
+    i += len;
+  }
+#endif
+  return interfaces;
+}
+
+std::string kiwix::getBestPublicIp()
+{
+  std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces();
+
+#ifndef _WIN32
+  const char* const prioritizedNames[]
+      = {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"};
+  const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]);
+  for (int i = 0; i < count; ++i) {
+    std::map<std::string, std::string>::const_iterator it
+        = interfaces.find(prioritizedNames[i]);
+    if (it != interfaces.end()) {
+      return it->second;
+    }
+  }
+#endif
+
+  for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
+       iter != interfaces.end();
+       ++iter) {
+    std::string interfaceIp = iter->second;
+    if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") {
+      return interfaceIp;
+    }
+  }
+
+  for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
+       iter != interfaces.end();
+       ++iter) {
+    std::string interfaceIp = iter->second;
+    if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") {
+      return interfaceIp;
+    }
+  }
+
+  for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
+       iter != interfaces.end();
+       ++iter) {
+    std::string interfaceIp = iter->second;
+    if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") {
+      return interfaceIp;
+    }
+  }
+
+  return "127.0.0.1";
+}
+
+size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata)
+{
+  auto str = static_cast<std::stringstream*>(userdata);
+  str->write(ptr, nmemb);
+  return nmemb;
+}
+
+std::string kiwix::download(const std::string& url) {
+  auto curl = curl_easy_init();
+  std::stringstream ss;
+  curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
+  curl_easy_setopt(curl, CURLOPT_HTTPGET, 1L);
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ss);
+  auto res = curl_easy_perform(curl);
+  if (res != CURLE_OK) {
+    curl_easy_cleanup(curl);
+    throw std::runtime_error("Cannot perform request");
+  }
+  long response_code;
+  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);
+  curl_easy_cleanup(curl);
+  if (response_code != 200) {
+    throw std::runtime_error("Invalid return code from server");
+  }
+  return ss.str();
+}
--- a/src/tools/otherTools.cpp
+++ b/src/tools/otherTools.cpp
@ -0,0 +1,326 @@
+/*
+ * Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <tools/otherTools.h>
+#include <map>
+
+static std::map<std::string, std::string> codeisomapping {
+//a
+  { "ad", "and" },
+  { "ae", "are" },
+  { "af", "afg" },
+  { "ag", "atg" },
+  { "ai", "aia" },
+  { "al", "alb" },
+  { "am", "arm" },
+  { "an", "ant" },
+  { "ao", "ago" },
+  { "aq", "ata" },
+  { "ar", "arg" },
+  { "as", "asm" },
+  { "at", "aut" },
+  { "au", "aus" },
+  { "aw", "abw" },
+  { "ax", "ala" },
+  { "az", "aze" },
+//b
+  { "ba", "bih" },
+  { "bb", "brb" },
+  { "bd", "bgd" },
+  { "be", "bel" },
+  { "bf", "bfa" },
+  { "bg", "bgr" },
+  { "bh", "bhr" },
+  { "bi", "bdi" },
+  { "bj", "ben" },
+  { "bl", "blm" },
+  { "bn", "brn" },
+  { "bm", "bmu" },
+  { "bo", "bol" },
+  { "br", "bra" },
+  { "bs", "bhs" },
+  { "bt", "btn" },
+  { "bv", "bvt" },
+  { "bw", "bwa" },
+  { "by", "blr" },
+  { "bz", "blz" },
+//c
+  { "ca", "can" },
+  { "cc", "cck" },
+  { "cd", "cod" },
+  { "cf", "caf" },
+  { "cg", "cog" },
+  { "ch", "che" },
+  { "ci", "civ" },
+  { "ck", "cok" },
+  { "cl", "chl" },
+  { "cm", "cmr" },
+  { "cn", "chn" },
+  { "co", "col" },
+  { "cr", "cri" },
+  { "cu", "cub" },
+  { "cv", "cpv" },
+  { "cx", "cxr" },
+  { "cy", "cyp" },
+  { "cz", "cze" },
+//d
+  { "de", "deu" },
+  { "dj", "dji" },
+  { "dk", "dnk" },
+  { "dm", "dma" },
+  { "do", "dom" },
+  { "dz", "dza" },
+//e
+  { "ec", "ecu" },
+  { "ee", "est" },
+  { "eg", "egy" },
+  { "eh", "esh" },
+  { "en", "eng" },
+  { "er", "eri" },
+  { "es", "esp" },
+  { "et", "eth" },
+//f
+  { "fi", "fin" },
+  { "fj", "fji" },
+  { "fk", "flk" },
+  { "fm", "fsm" },
+  { "fo", "fro" },
+  { "fr", "fra" },
+//g
+  { "ga", "gab" },
+  { "gb", "gbr" },
+  { "gd", "grd" },
+  { "ge", "geo" },
+  { "gf", "guf" },
+  { "gg", "ggy" },
+  { "gh", "gha" },
+  { "gi", "gib" },
+  { "gl", "grl" },
+  { "gm", "gmb" },
+  { "gn", "gin" },
+  { "gp", "glp" },
+  { "gq", "gnq" },
+  { "gr", "grc" },
+  { "gs", "sgs" },
+  { "gt", "gtm" },
+  { "gu", "gum" },
+  { "gw", "gnb" },
+  { "gy", "guy" },
+//h
+  { "hk", "hkg" },
+  { "hm", "hmd" },
+  { "hn", "hnd" },
+  { "hr", "hrv" },
+  { "ht", "hti" },
+  { "hu", "hun" },
+//i
+  { "id", "idn" },
+  { "ie", "irl" },
+  { "il", "isr" },
+  { "im", "imn" },
+  { "in", "ind" },
+  { "io", "iot" },
+  { "iq", "irq" },
+  { "ir", "irn" },
+  { "is", "isl" },
+  { "it", "ita" },
+//j
+  { "je", "jey" },
+  { "jm", "jam" },
+  { "jo", "jor" },
+  { "jp", "jpn" },
+//k
+  { "ke", "ken" },
+  { "kg", "kgz" },
+  { "kh", "khm" },
+  { "ki", "kir" },
+  { "km", "com" },
+  { "kn", "kna" },
+  { "kp", "prk" },
+  { "kr", "kor" },
+  { "kw", "kwt" },
+  { "ky", "cym" },
+  { "kz", "kaz" },
+//l
+  { "la", "lao" },
+  { "lb", "lbn" },
+  { "lc", "lca" },
+  { "li", "lie" },
+  { "lk", "lka" },
+  { "lr", "lbr" },
+  { "ls", "lso" },
+  { "lt", "ltu" },
+  { "lu", "lux" },
+  { "lv", "lva" },
+  { "ly", "lby" },
+//m
+  { "ma", "mar" },
+  { "mc", "mco" },
+  { "md", "mda" },
+  { "me", "mne" },
+  { "mf", "maf" },
+  { "mg", "mdg" },
+  { "mh", "mhl" },
+  { "mk", "mkd" },
+  { "ml", "mli" },
+  { "mm", "mmr" },
+  { "mn", "mng" },
+  { "mo", "mac" },
+  { "mp", "mnp" },
+  { "mq", "mtq" },
+  { "mr", "mrt" },
+  { "ms", "msr" },
+  { "mt", "mlt" },
+  { "mu", "mus" },
+  { "mv", "mdv" },
+  { "mw", "mwi" },
+  { "mx", "mex" },
+  { "my", "mys" },
+  { "mz", "moz" },
+//n
+  { "na", "nam" },
+  { "nc", "ncl" },
+  { "ne", "ner" },
+  { "nf", "nfk" },
+  { "ng", "nga" },
+  { "ni", "nic" },
+  { "nl", "nld" },
+  { "no", "nor" },
+  { "np", "npl" },
+  { "nr", "nru" },
+  { "nu", "niu" },
+  { "nz", "nzl" },
+//o
+  { "om", "omn" },
+//p
+  { "pa", "pan" },
+  { "pe", "per" },
+  { "pf", "pyf" },
+  { "pg", "png" },
+  { "ph", "phl" },
+  { "pk", "pak" },
+  { "pl", "pol" },
+  { "pm", "spm" },
+  { "pn", "pcn" },
+  { "pr", "pri" },
+  { "ps", "pse" },
+  { "pt", "prt" },
+  { "pw", "plw" },
+  { "py", "pry" },
+//q
+  { "qa", "qat" },
+//r
+  { "re", "reu" },
+  { "ro", "rou" },
+  { "rs", "srb" },
+  { "ru", "rus" },
+  { "rw", "rwa" },
+//s
+  { "sa", "sau" },
+  { "sb", "slb" },
+  { "sc", "syc" },
+  { "sd", "sdn" },
+  { "se", "swe" },
+  { "sg", "sgp" },
+  { "sh", "shn" },
+  { "si", "svn" },
+  { "sj", "sjm" },
+  { "sk", "svk" },
+  { "sl", "sle" },
+  { "sm", "smr" },
+  { "sn", "sen" },
+  { "so", "som" },
+  { "sr", "sur" },
+  { "ss", "ssd" },
+  { "st", "stp" },
+  { "sv", "slv" },
+  { "sy", "syr" },
+  { "sz", "swz" },
+//t
+  { "tc", "tca" },
+  { "td", "tcd" },
+  { "tf", "atf" },
+  { "tg", "tgo" },
+  { "th", "tha" },
+  { "tj", "tjk" },
+  { "tk", "tkl" },
+  { "tl", "tls" },
+  { "tm", "tkm" },
+  { "tn", "tun" },
+  { "to", "ton" },
+  { "tr", "tur" },
+  { "tt", "tto" },
+  { "tv", "tuv" },
+  { "tw", "twn" },
+  { "tz", "tza" },
+//u
+  { "ua", "ukr" },
+  { "ug", "uga" },
+  { "um", "umi" },
+  { "us", "usa" },
+  { "uy", "ury" },
+  { "uz", "uzb" },
+//v
+  { "va", "vat" },
+  { "vc", "vct" },
+  { "ve", "ven" },
+  { "vg", "vgb" },
+  { "vi", "vir" },
+  { "vn", "vnm" },
+  { "vu", "vut" },
+//w
+  { "wf", "wlf" },
+  { "ws", "wsm" },
+//y
+  { "ye", "yem" },
+  { "yt", "myt" },
+// z
+  { "za", "zaf" },
+  { "zm", "zmb" },
+  { "zw", "zwe" }
+};
+
+void kiwix::sleep(unsigned int milliseconds)
+{
+#ifdef _WIN32
+  Sleep(milliseconds);
+#else
+  usleep(1000 * milliseconds);
+#endif
+}
+
+
+struct XmlStringWriter: pugi::xml_writer
+{
+  std::string result;
+  virtual void write(const void* data, size_t size){
+    result.append(static_cast<const char*>(data), size);
+  }
+};
+
+std::string kiwix::nodeToString(pugi::xml_node node)
+{
+  XmlStringWriter writer;
+  node.print(writer, "  ");
+  return writer.result;
+}
+
+std::string kiwix::converta2toa3(const std::string& a2code){
+  return codeisomapping.at(a2code);
+}
--- a/src/tools/pathTools.cpp
+++ b/src/tools/pathTools.cpp
@ -0,0 +1,335 @@
+/*
+ * Copyright 2011-2014 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <tools/pathTools.h>
+
+#ifdef __APPLE__
+#include <limits.h>
+#include <mach-o/dyld.h>
+#elif _WIN32
+#include <direct.h>
+#include <windows.h>
+#include "shlwapi.h"
+#define getcwd _getcwd  // stupid MSFT "deprecation" warning
+#endif
+
+#ifdef _WIN32
+#else
+#include <unistd.h>
+#endif
+
+#ifdef _WIN32
+const std::string SEPARATOR("\\");
+#else
+const std::string SEPARATOR("/");
+#include <unistd.h>
+#endif
+
+#include <stdlib.h>
+
+#ifndef PATH_MAX
+#define PATH_MAX 1024
+#endif
+
+bool isRelativePath(const string& path)
+{
+#ifdef _WIN32
+  return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
+#else
+  return path.empty() || path.substr(0, 1) == "/" ? false : true;
+#endif
+}
+
+string computeRelativePath(const string path, const string absolutePath)
+{
+  std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR);
+  std::vector<std::string> absolutePathParts
+      = kiwix::split(absolutePath, SEPARATOR);
+
+  unsigned int commonCount = 0;
+  while (commonCount < pathParts.size()
+         && commonCount < absolutePathParts.size()
+         && pathParts[commonCount] == absolutePathParts[commonCount]) {
+      commonCount++;
+  }
+
+  string relativePath;
+#ifdef _WIN32
+  /* On Windows you have a token more because the root is represented
+     by a letter */
+  if (commonCount == 0) {
+    relativePath = ".." + SEPARATOR;
+  }
+#endif
+
+  for (unsigned int i = commonCount; i < pathParts.size(); i++) {
+    relativePath += ".." + SEPARATOR;
+  }
+  for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) {
+    relativePath += absolutePathParts[i];
+    relativePath += i + 1 < absolutePathParts.size() ? SEPARATOR : "";
+  }
+  return relativePath;
+}
+
+/* Warning: the relative path must be with slashes */
+string computeAbsolutePath(const string path, const string relativePath)
+{
+  string absolutePath;
+
+  if (path.empty()) {
+    char* path = NULL;
+    size_t size = 0;
+
+#ifdef _WIN32
+    path = _getcwd(path, size);
+#else
+    path = getcwd(path, size);
+#endif
+
+    absolutePath = string(path) + SEPARATOR;
+  } else {
+    absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR
+                       ? path
+                       : path + SEPARATOR;
+  }
+
+#if _WIN32
+  char* cRelativePath = _strdup(relativePath.c_str());
+#else
+  char* cRelativePath = strdup(relativePath.c_str());
+#endif
+  char* token = strtok(cRelativePath, "/");
+
+  while (token != NULL) {
+    if (string(token) == "..") {
+      absolutePath = removeLastPathElement(absolutePath, true, false);
+      token = strtok(NULL, "/");
+    } else if (strcmp(token, ".") && strcmp(token, "")) {
+      absolutePath += string(token);
+      token = strtok(NULL, "/");
+      if (token != NULL) {
+        absolutePath += SEPARATOR;
+      }
+    } else {
+      token = strtok(NULL, "/");
+    }
+  }
+
+  return absolutePath;
+}
+
+string removeLastPathElement(const string path,
+                             const bool removePreSeparator,
+                             const bool removePostSeparator)
+{
+  string newPath = path;
+  size_t offset = newPath.find_last_of(SEPARATOR);
+  if (removePreSeparator &&
+#ifndef _WIN32
+      offset != newPath.find_first_of(SEPARATOR) &&
+#endif
+      offset == newPath.length() - 1) {
+    newPath = newPath.substr(0, offset);
+    offset = newPath.find_last_of(SEPARATOR);
+  }
+  newPath = removePostSeparator ? newPath.substr(0, offset)
+                                : newPath.substr(0, offset + 1);
+  return newPath;
+}
+
+string appendToDirectory(const string& directoryPath, const string& filename)
+{
+  string newPath = directoryPath + SEPARATOR + filename;
+  return newPath;
+}
+
+string getLastPathElement(const string& path)
+{
+  return path.substr(path.find_last_of(SEPARATOR) + 1);
+}
+
+unsigned int getFileSize(const string& path)
+{
+#ifdef _WIN32
+  struct _stat filestatus;
+  _stat(path.c_str(), &filestatus);
+#else
+  struct stat filestatus;
+  stat(path.c_str(), &filestatus);
+#endif
+
+  return filestatus.st_size / 1024;
+}
+
+string getFileSizeAsString(const string& path)
+{
+  ostringstream convert;
+  convert << getFileSize(path);
+  return convert.str();
+}
+
+string getFileContent(const string& path)
+{
+  std::ifstream f(path, std::ios::in|std::ios::ate);
+  std::string content;
+  if (f.is_open()) {
+    auto size = f.tellg();
+    content.reserve(size);
+    f.seekg(0, std::ios::beg);
+    content.assign((std::istreambuf_iterator<char>(f)),
+                    std::istreambuf_iterator<char>());
+  }
+  return content;
+}
+
+bool fileExists(const string& path)
+{
+#ifdef _WIN32
+  return PathFileExists(path.c_str());
+#else
+  bool flag = false;
+  fstream fin;
+  fin.open(path.c_str(), ios::in);
+  if (fin.is_open()) {
+    flag = true;
+  }
+  fin.close();
+  return flag;
+#endif
+}
+
+bool makeDirectory(const string& path)
+{
+#ifdef _WIN32
+  int status = _mkdir(path.c_str());
+#else
+  int status = mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+#endif
+  return status == 0;
+}
+
+string makeTmpDirectory()
+{
+#ifdef _WIN32
+  char cbase[MAX_PATH+1];
+  int base_len = GetTempPath(MAX_PATH+1, cbase);
+  UUID uuid;
+  UuidCreate(&uuid);
+  char* dir_name;
+  UuidToString(&uuid, reinterpret_cast<unsigned char**>(&dir_name));
+  string dir(cbase, base_len);
+  dir += dir_name;
+  _mkdir(dir.c_str());
+  RpcStringFree(reinterpret_cast<unsigned char**>(&dir_name));
+#else
+  string base = "/tmp";
+  auto _template = base + "/kiwix-lib_XXXXXX";
+  char* _template_array = new char[_template.size()+1];
+  memcpy(_template_array, _template.c_str(), _template.size());
+  string dir = mkdtemp(_template_array);
+  delete[] _template_array;
+#endif
+  return dir;
+}
+
+/* Try to create a link and if does not work then make a copy */
+bool copyFile(const string& sourcePath, const string& destPath)
+{
+  try {
+#ifndef _WIN32
+    if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
+#endif
+      std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
+      std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
+      outfile << infile.rdbuf();
+#ifndef _WIN32
+    }
+#endif
+  } catch (exception& e) {
+    cerr << e.what() << endl;
+    return false;
+  }
+
+  return true;
+}
+
+string getExecutablePath()
+{
+  char binRootPath[PATH_MAX];
+
+#ifdef _WIN32
+  GetModuleFileName(NULL, binRootPath, PATH_MAX);
+  return std::string(binRootPath);
+#elif __APPLE__
+  uint32_t max = (uint32_t)PATH_MAX;
+  _NSGetExecutablePath(binRootPath, &max);
+  return std::string(binRootPath);
+#else
+  ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
+  if (size != -1) {
+    return std::string(binRootPath, size);
+  }
+#endif
+
+  return "";
+}
+
+bool writeTextFile(const string& path, const string& content)
+{
+  std::ofstream file;
+  file.open(path.c_str());
+  file << content;
+  file.close();
+  return true;
+}
+
+string getCurrentDirectory()
+{
+  char* a_cwd = getcwd(NULL, 0);
+  string s_cwd(a_cwd);
+  free(a_cwd);
+  return s_cwd;
+}
+
+string getDataDirectory()
+{
+#ifdef _WIN32
+  char* cDataDir = ::getenv("APPDATA");
+#else
+  char* cDataDir = ::getenv("KIWIX_DATA_DIR");
+#endif
+  std::string dataDir = cDataDir==nullptr ? "" : cDataDir;
+  if (!dataDir.empty())
+    return dataDir;
+#ifdef _WIN32
+  cDataDir = ::getenv("USERPROFILE");
+  dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
+#else
+  cDataDir = ::getenv("XDG_DATA_HOME");
+  dataDir = cDataDir==nullptr ? "" : cDataDir;
+  if (dataDir.empty()) {
+    cDataDir = ::getenv("HOME");
+    dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir;
+    dataDir = appendToDirectory(dataDir, ".local");
+    dataDir = appendToDirectory(dataDir, "share");
+  }
+#endif
+  return appendToDirectory(dataDir, "kiwix");
+}
--- a/src/tools/regexTools.cpp
+++ b/src/tools/regexTools.cpp
@ -0,0 +1,93 @@
+/*
+ * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <tools/regexTools.h>
+
+std::map<std::string, icu::RegexMatcher*> regexCache;
+
+icu::RegexMatcher* buildRegex(const std::string& regex)
+{
+  icu::RegexMatcher* matcher;
+  auto itr = regexCache.find(regex);
+
+  /* Regex is in cache */
+  if (itr != regexCache.end()) {
+    matcher = itr->second;
+  }
+
+  /* Regex needs to be parsed (and cached) */
+  else {
+    UErrorCode status = U_ZERO_ERROR;
+    icu::UnicodeString uregex(regex.c_str());
+    matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status);
+    regexCache[regex] = matcher;
+  }
+
+  return matcher;
+}
+
+/* todo */
+void freeRegexCache()
+{
+}
+bool matchRegex(const std::string& content, const std::string& regex)
+{
+  ucnv_setDefaultName("UTF-8");
+  icu::UnicodeString ucontent(content.c_str());
+  auto matcher = buildRegex(regex);
+  matcher->reset(ucontent);
+  return matcher->find();
+}
+
+std::string replaceRegex(const std::string& content,
+                         const std::string& replacement,
+                         const std::string& regex)
+{
+  ucnv_setDefaultName("UTF-8");
+  icu::UnicodeString ucontent(content.c_str());
+  icu::UnicodeString ureplacement(replacement.c_str());
+  auto matcher = buildRegex(regex);
+  matcher->reset(ucontent);
+  UErrorCode status = U_ZERO_ERROR;
+  auto uresult = matcher->replaceAll(ureplacement, status);
+  std::string tmp;
+  uresult.toUTF8String(tmp);
+  return tmp;
+}
+
+std::string appendToFirstOccurence(const std::string& content,
+                                   const std::string regex,
+                                   const std::string& replacement)
+{
+  ucnv_setDefaultName("UTF-8");
+  icu::UnicodeString ucontent(content.c_str());
+  icu::UnicodeString ureplacement(replacement.c_str());
+  auto matcher = buildRegex(regex);
+  matcher->reset(ucontent);
+
+  if (matcher->find()) {
+    UErrorCode status = U_ZERO_ERROR;
+    ucontent.insert(matcher->end(status), ureplacement);
+    std::string tmp;
+    ucontent.toUTF8String(tmp);
+    return tmp;
+  }
+
+  return content;
+}
--- a/src/tools/stringTools.cpp
+++ b/src/tools/stringTools.cpp
@ -0,0 +1,374 @@
+/*
+ * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <tools/stringTools.h>
+
+#include <unicode/normlzr.h>
+#include <unicode/rep.h>
+#include <unicode/translit.h>
+#include <unicode/ucnv.h>
+#include <unicode/uniset.h>
+#include <unicode/ustring.h>
+
+/* tell ICU where to find its dat file (tables) */
+void kiwix::loadICUExternalTables()
+{
+#ifdef __APPLE__
+  std::string executablePath = getExecutablePath();
+  std::string executableDirectory = removeLastPathElement(executablePath);
+  std::string datPath
+      = computeAbsolutePath(executableDirectory, "icudt58l.dat");
+  try {
+    u_setDataDirectory(datPath.c_str());
+  } catch (exception& e) {
+    std::cerr << e.what() << std::endl;
+  }
+#endif
+}
+
+std::string kiwix::removeAccents(const std::string& text)
+{
+  loadICUExternalTables();
+  ucnv_setDefaultName("UTF-8");
+  UErrorCode status = U_ZERO_ERROR;
+  auto removeAccentsTrans = icu::Transliterator::createInstance(
+      "Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
+  icu::UnicodeString ustring(text.c_str());
+  removeAccentsTrans->transliterate(ustring);
+  delete removeAccentsTrans;
+  std::string unaccentedText;
+  ustring.toUTF8String(unaccentedText);
+  return unaccentedText;
+}
+
+/* Prepare integer for display */
+std::string kiwix::beautifyInteger(uint64_t number)
+{
+  std::stringstream numberStream;
+  numberStream << number;
+  std::string numberString = numberStream.str();
+
+  signed int offset = numberString.size() - 3;
+  while (offset > 0) {
+    numberString.insert(offset, ",");
+    offset -= 3;
+  }
+
+  return numberString;
+}
+
+std::string kiwix::beautifyFileSize(uint64_t number)
+{
+  std::stringstream ss;
+  ss << std::fixed << std::setprecision(2);
+  if (number>>30)
+    ss << (number/(1024.0*1024*1024)) << " GB";
+  else if (number>>20)
+    ss << (number/(1024.0*1024)) << " MB";
+  else if (number>>10)
+    ss << (number/1024.0) << " KB";
+  else
+    ss << number << " B";
+  return ss.str();
+}
+
+void kiwix::printStringInHexadecimal(icu::UnicodeString s)
+{
+  std::cout << std::showbase << std::hex;
+  for (int i = 0; i < s.length(); i++) {
+    char c = (char)((s.getTerminatedBuffer())[i]);
+    if (c & 0x80) {
+      std::cout << (c & 0xffff) << " ";
+    } else {
+      std::cout << c << " ";
+    }
+  }
+  std::cout << std::endl;
+}
+
+void kiwix::printStringInHexadecimal(const char* s)
+{
+  std::cout << std::showbase << std::hex;
+  for (char const* pc = s; *pc; ++pc) {
+    if (*pc & 0x80) {
+      std::cout << (*pc & 0xffff);
+    } else {
+      std::cout << *pc;
+    }
+    std::cout << ' ';
+  }
+  std::cout << std::endl;
+}
+
+void kiwix::stringReplacement(std::string& str,
+                              const std::string& oldStr,
+                              const std::string& newStr)
+{
+  size_t pos = 0;
+  while ((pos = str.find(oldStr, pos)) != std::string::npos) {
+    str.replace(pos, oldStr.length(), newStr);
+    pos += newStr.length();
+  }
+}
+
+/* Encode string to avoid XSS attacks */
+std::string kiwix::encodeDiples(const std::string& str)
+{
+  std::string result = str;
+  kiwix::stringReplacement(result, "<", "&lt;");
+  kiwix::stringReplacement(result, ">", "&gt;");
+  return result;
+}
+
+/* urlEncode() based on javascript encodeURI() &
+   encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */
+
+bool isReservedUrlChar(char c)
+{
+  switch (c) {
+  case ';':
+  case ',':
+  case '/':
+  case '?':
+  case ':':
+  case '@':
+  case '&':
+  case '=':
+  case '+':
+  case '$':
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool needsEscape(char c, bool encodeReserved)
+{
+  if (c >= 'a' && c <= 'z')
+    return false;
+  if (c >= 'A' && c <= 'Z')
+    return false;
+  if (c >= '0' && c <= '9')
+    return false;
+  if (isReservedUrlChar(c))
+    return encodeReserved;
+  switch (c) {
+  case '-':
+  case '_':
+  case '.':
+  case '!':
+  case '~':
+  case '*':
+  case '\'':
+  case '(':
+  case ')':
+    return false;
+  }
+  return true;
+}
+
+int hexToInt(char c) {
+  switch (c) {
+  case '0': return 0;
+  case '1': return 1;
+  case '2': return 2;
+  case '3': return 3;
+  case '4': return 4;
+  case '5': return 5;
+  case '6': return 6;
+  case '7': return 7;
+  case '8': return 8;
+  case '9': return 9;
+  case 'A': case 'a': return 10;
+  case 'B': case 'b': return 11;
+  case 'C': case 'c': return 12;
+  case 'D': case 'd': return 13;
+  case 'E': case 'e': return 14;
+  case 'F': case 'f': return 15;
+  default: return -1;
+  }
+}
+
+std::string kiwix::urlEncode(const std::string& value, bool encodeReserved)
+{
+  std::ostringstream os;
+  os << std::hex << std::uppercase;
+  for (std::string::const_iterator it = value.begin();
+       it != value.end();
+       it++) {
+
+    if (!needsEscape(*it, encodeReserved)) {
+      os << *it;
+    } else {
+      os << '%' << std::setw(2) << static_cast<unsigned int>(static_cast<unsigned char>(*it));
+    }
+  }
+  return os.str();
+}
+
+std::string kiwix::urlDecode(const std::string& value, bool component)
+{
+  std::ostringstream os;
+  for (std::string::const_iterator it = value.begin();
+       it != value.end();
+       it++) {
+
+    // If there aren't enough characters left for this to be a
+    // valid escape code, just use the character and move on
+    if (it > value.end() - 3) {
+      os << *it;
+      continue;
+    }
+
+    if (*it == '%') {
+      char hi = *(++it);
+      char lo = *(++it);
+      int iHi = hexToInt(hi);
+      int iLo = hexToInt(lo);
+      if (iHi < 0 || iLo < 0) {
+	// Invalid escape sequence
+	os << '%' << hi << lo;
+	continue;
+      }
+      char c = (char)(iHi << 4 | iLo);
+      if (!component && isReservedUrlChar(c)) {
+	os << '%' << hi << lo;
+      } else {
+	os << c;
+      }
+    } else {
+      os << *it;
+    }
+  }
+
+  return os.str();
+}
+
+/* Split string in a token array */
+std::vector<std::string> kiwix::split(const std::string& str,
+                                      const std::string& delims = " *-")
+{
+  std::string::size_type lastPos = str.find_first_not_of(delims, 0);
+  std::string::size_type pos = str.find_first_of(delims, lastPos);
+  std::vector<std::string> tokens;
+
+  while (std::string::npos != pos || std::string::npos != lastPos) {
+    tokens.push_back(str.substr(lastPos, pos - lastPos));
+    lastPos = str.find_first_not_of(delims, pos);
+    pos = str.find_first_of(delims, lastPos);
+  }
+
+  return tokens;
+}
+
+std::vector<std::string> kiwix::split(const char* lhs, const char* rhs)
+{
+  const std::string m1(lhs), m2(rhs);
+  return split(m1, m2);
+}
+
+std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs)
+{
+  return split(lhs, rhs.c_str());
+}
+
+std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
+{
+  return split(lhs.c_str(), rhs);
+}
+
+std::string kiwix::ucFirst(const std::string& word)
+{
+  if (word.empty()) {
+    return "";
+  }
+
+  std::string result;
+
+  icu::UnicodeString unicodeWord(word.c_str());
+  auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toUpper();
+  unicodeWord.replace(0, 1, unicodeFirstLetter);
+  unicodeWord.toUTF8String(result);
+
+  return result;
+}
+
+std::string kiwix::ucAll(const std::string& word)
+{
+  if (word.empty()) {
+    return "";
+  }
+
+  std::string result;
+
+  icu::UnicodeString unicodeWord(word.c_str());
+  unicodeWord.toUpper().toUTF8String(result);
+
+  return result;
+}
+
+std::string kiwix::lcFirst(const std::string& word)
+{
+  if (word.empty()) {
+    return "";
+  }
+
+  std::string result;
+
+  icu::UnicodeString unicodeWord(word.c_str());
+  auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toLower();
+  unicodeWord.replace(0, 1, unicodeFirstLetter);
+  unicodeWord.toUTF8String(result);
+
+  return result;
+}
+
+std::string kiwix::lcAll(const std::string& word)
+{
+  if (word.empty()) {
+    return "";
+  }
+
+  std::string result;
+
+  icu::UnicodeString unicodeWord(word.c_str());
+  unicodeWord.toLower().toUTF8String(result);
+
+  return result;
+}
+
+std::string kiwix::toTitle(const std::string& word)
+{
+  if (word.empty()) {
+    return "";
+  }
+
+  std::string result;
+
+  icu::UnicodeString unicodeWord(word.c_str());
+  unicodeWord = unicodeWord.toTitle(0);
+  unicodeWord.toUTF8String(result);
+
+  return result;
+}
+
+std::string kiwix::normalize(const std::string& word)
+{
+  return kiwix::lcAll(word);
+}