From 68cf5e49774bc1165ecf54c344b0ed9db25d9445 Mon Sep 17 00:00:00 2001 From: kelson42 Date: Tue, 14 Aug 2012 10:58:03 +0000 Subject: [PATCH] + move unaccent.[h|cpp] to stringTools.[h|cpp] + remove unaccent.[h|cpp] + put stringTools.[h|cpp] methods in the "kiwix" namespace + modify code & compilation script to keep everything compiling --- src/common/kiwix/indexer.cpp | 7 ++--- src/common/kiwix/indexer.h | 2 +- src/common/kiwix/library.cpp | 2 +- src/common/kiwix/library.h | 2 +- src/common/kiwix/searcher.cpp | 6 ++-- src/common/kiwix/searcher.h | 1 - src/common/stringTools.cpp | 57 +++++++++++++++++++++++++++++------ src/common/stringTools.h | 32 +++++++++++++++----- src/common/unaccent.cpp | 56 ---------------------------------- src/common/unaccent.h | 39 ------------------------ 10 files changed, 81 insertions(+), 123 deletions(-) delete mode 100644 src/common/unaccent.cpp delete mode 100644 src/common/unaccent.h diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index f05e0f8d0..1fc99de13 100644 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -154,11 +154,10 @@ namespace kiwix { token.size = sizeStringStream.str(); /* Remove accent */ - token.title = removeAccents(token.accentedTitle); - token.keywords = removeAccents(htmlParser.keywords); - token.content = removeAccents(htmlParser.dump); + token.title = kiwix::removeAccents(token.accentedTitle); + token.keywords = kiwix::removeAccents(htmlParser.keywords); + token.content = kiwix::removeAccents(htmlParser.dump); self->pushToIndexQueue(token); - } /* Test if the thread should be cancelled */ diff --git a/src/common/kiwix/indexer.h b/src/common/kiwix/indexer.h index 46ae5f304..49650038b 100644 --- a/src/common/kiwix/indexer.h +++ b/src/common/kiwix/indexer.h @@ -35,7 +35,7 @@ #endif #include -#include +#include #include #include #include diff --git a/src/common/kiwix/library.cpp b/src/common/kiwix/library.cpp index 4285e2458..389a5bd33 100644 --- a/src/common/kiwix/library.cpp +++ b/src/common/kiwix/library.cpp @@ -62,7 +62,7 @@ namespace kiwix { std::string Book::getHumanReadableIdFromPath() { std::string id = path; if (!id.empty()) { - removeAccents(id); + kiwix::removeAccents(id); id = replaceRegex(id, "", "^.*/"); id = replaceRegex(id, "", "\\.zim[a-z]*$"); id = replaceRegex(id, "_", " "); diff --git a/src/common/kiwix/library.h b/src/common/kiwix/library.h index 8523bc4b1..130890a8d 100644 --- a/src/common/kiwix/library.h +++ b/src/common/kiwix/library.h @@ -27,7 +27,7 @@ #include #include -#include +#include #include #define KIWIX_LIBRARY_VERSION "20110515" diff --git a/src/common/kiwix/searcher.cpp b/src/common/kiwix/searcher.cpp index 624101fb2..a5709dff4 100644 --- a/src/common/kiwix/searcher.cpp +++ b/src/common/kiwix/searcher.cpp @@ -160,10 +160,10 @@ namespace kiwix { result["snippet"] = this->resultOffset->snippet; if (this->resultOffset->size >= 0) - result["size"] = ::beautifyInteger(this->resultOffset->size); + result["size"] = kiwix::beautifyInteger(this->resultOffset->size); if (this->resultOffset->wordCount >= 0) - result["wordCount"] = ::beautifyInteger(this->resultOffset->wordCount); + result["wordCount"] = kiwix::beautifyInteger(this->resultOffset->wordCount); resultsCDT.PushBack(result); this->resultOffset++; @@ -195,7 +195,7 @@ namespace kiwix { } oData["pages"] = pagesCDT; - oData["count"] = ::beautifyInteger(this->estimatedResultCount); + oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount); oData["searchPattern"] = this->searchPattern; oData["resultStart"] = this->resultStart + 1; oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd); diff --git a/src/common/kiwix/searcher.h b/src/common/kiwix/searcher.h index 37cc8e901..f72bffb85 100644 --- a/src/common/kiwix/searcher.h +++ b/src/common/kiwix/searcher.h @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/src/common/stringTools.cpp b/src/common/stringTools.cpp index 217038670..0457c2f6a 100644 --- a/src/common/stringTools.cpp +++ b/src/common/stringTools.cpp @@ -20,8 +20,8 @@ #include "stringTools.h" /* Prepare integer for display */ -std::string beautifyInteger(const unsigned int number) { - stringstream numberStream; +std::string kiwix::beautifyInteger(const unsigned int number) { + std::stringstream numberStream; numberStream << number; std::string numberString = numberStream.str(); @@ -35,14 +35,14 @@ std::string beautifyInteger(const unsigned int number) { } /* Split string in a token array */ -std::vector split(const std::string & str, +std::vector kiwix::split(const std::string & str, const std::string & delims=" *-") { - string::size_type lastPos = str.find_first_not_of(delims, 0); - string::size_type pos = str.find_first_of(delims, lastPos); - vector tokens; + std::string::size_type lastPos = str.find_first_not_of(delims, 0); + std::string::size_type pos = str.find_first_of(delims, lastPos); + std::vector tokens; - while (string::npos != pos || string::npos != lastPos) + while (std::string::npos != pos || std::string::npos != lastPos) { tokens.push_back(str.substr(lastPos, pos - lastPos)); lastPos = str.find_first_not_of(delims, pos); @@ -52,15 +52,52 @@ std::vector split(const std::string & str, return tokens; } -std::vector split(const char* lhs, const char* rhs){ +std::vector kiwix::split(const char* lhs, const char* rhs){ const std::string m1 (lhs), m2 (rhs); return split(m1, m2); } -std::vector split(const char* lhs, const std::string& rhs){ +std::vector kiwix::split(const char* lhs, const std::string& rhs){ return split(lhs, rhs.c_str()); } -std::vector split(const std::string& lhs, const char* rhs){ +std::vector kiwix::split(const std::string& lhs, const char* rhs){ return split(lhs.c_str(), rhs); } + +std::string kiwix::removeAccents(const std::string &text) { + ucnv_setDefaultName("UTF-8"); + UErrorCode status = U_ZERO_ERROR; + Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); + UnicodeString ustring = UnicodeString(text.c_str()); + removeAccentsTrans->transliterate(ustring); + std::string unaccentedText; + ustring.toUTF8String(unaccentedText); + return unaccentedText; +} + +void kiwix::printStringInHexadecimal(UnicodeString s) { + std::cout << std::showbase << std::hex; + for (int i=0; i + * Copyright 2011-2012 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,16 +17,34 @@ * MA 02110-1301, USA. */ +#ifndef KIWIX_STRINGTOOLS_H +#define KIWIX_STRINGTOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include + #include #include #include #include #include -using namespace std; +namespace kiwix { + std::string removeAccents(const std::string &text); + std::string beautifyInteger(const unsigned int number); + std::vector split(const std::string&, const std::string&); + std::vector split(const char*, const char*); + std::vector split(const std::string&, const char*); + std::vector split(const char*, const std::string&); -std::string beautifyInteger(const unsigned int number); -std::vector split(const std::string&, const std::string&); -std::vector split(const char*, const char*); -std::vector split(const std::string&, const char*); -std::vector split(const char*, std::string&); + void printStringInHexadecimal(const char *s); + void printStringInHexadecimal(UnicodeString s); +} + +#endif diff --git a/src/common/unaccent.cpp b/src/common/unaccent.cpp deleted file mode 100644 index 0e7f865d7..000000000 --- a/src/common/unaccent.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2011 Emmanuel Engelhart - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#include "unaccent.h" - -UErrorCode status = U_ZERO_ERROR; -Transliterator *trans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); - -std::string removeAccents(const std::string &text) { - ucnv_setDefaultName("UTF-8"); - UnicodeString ustring = UnicodeString(text.c_str()); - trans->transliterate(ustring); - std::string unaccentedText; - ustring.toUTF8String(unaccentedText); - return unaccentedText; -} - -void printStringInHexadecimal(UnicodeString s) { - std::cout << std::showbase << std::hex; - for (int i=0; i - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -#ifndef KIWIX_UNACCENT_H -#define KIWIX_UNACCENT_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -std::string removeAccents(const std::string &text); -void printStringInHexadecimal(const char *s); -void printStringInHexadecimal(UnicodeString s); - -#endif