From 31dc311f2a1b87c66eb0638d886c1d0d8a36bbfa Mon Sep 17 00:00:00 2001 From: kelson42 Date: Fri, 16 Apr 2010 13:33:50 +0000 Subject: [PATCH] + improved version of removeAccents(9 --- src/common/kiwix/indexer.cpp | 6 +++--- src/common/kiwix/searcher.cpp | 2 +- src/common/unaccent.cpp | 21 +++++---------------- src/common/unaccent.h | 2 +- 4 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/common/kiwix/indexer.cpp b/src/common/kiwix/indexer.cpp index 33b00f4f4..d2de96b23 100644 --- a/src/common/kiwix/indexer.cpp +++ b/src/common/kiwix/indexer.cpp @@ -123,19 +123,19 @@ namespace kiwix { /* Index the title */ if (!this->htmlParser.title.empty()) { - indexer.index_text_without_positions(removeAccents(this->htmlParser.title.c_str(), this->htmlParser.title.size()), + indexer.index_text_without_positions(removeAccents(this->htmlParser.title), ((this->htmlParser.dump.size() / 100) + 1) / countWords(this->htmlParser.title) ); } /* Index the keywords */ if (!this->htmlParser.keywords.empty()) { - indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords.c_str(), this->htmlParser.keywords.size()), 3); + indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords), 3); } /* Index the content */ if (!this->htmlParser.dump.empty()) { - indexer.index_text_without_positions(removeAccents(this->htmlParser.dump.c_str(), this->htmlParser.dump.size())); + indexer.index_text_without_positions(removeAccents(this->htmlParser.dump)); } /* add to the database */ diff --git a/src/common/kiwix/searcher.cpp b/src/common/kiwix/searcher.cpp index 817940978..92a2579a1 100644 --- a/src/common/kiwix/searcher.cpp +++ b/src/common/kiwix/searcher.cpp @@ -36,7 +36,7 @@ namespace kiwix { /* Create the query term vector */ /* I have the doublequote " because bug ID: 2939690 */ - std::vector queryTerms = split(removeAccents(search.c_str(), search.size()), " #@%$0/\\_-*()[]{},;:\"´`'"); + std::vector queryTerms = split(removeAccents(search), " #@%$0/\\_-*()[]{},;:\"´`'"); /* Create query object */ Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end()); diff --git a/src/common/unaccent.cpp b/src/common/unaccent.cpp index 0dd27057c..b025ef361 100644 --- a/src/common/unaccent.cpp +++ b/src/common/unaccent.cpp @@ -57,23 +57,12 @@ void UnaccentTransliterator::handleTransliterate(Replaceable& text, /* Remove accents from a String */ UnaccentTransliterator unaccent; -char *unaccentedString = NULL; -unsigned unaccentedStringSize=0; UnicodeString unicodeAccentedString; -const char* removeAccents(const char *accentedString, const unsigned size) { - - /* Realloc memory if necessary */ - if (size > unaccentedStringSize) { - unaccentedString = (char*)realloc(unaccentedString, size+1); - unaccentedStringSize = size+1; - } - - /* Transcode the String */ - unicodeAccentedString = UnicodeString(accentedString); +std::string &removeAccents(std::string &text) { + unicodeAccentedString = UnicodeString(text.c_str()); unaccent.transliterate(unicodeAccentedString); - - /* Extract and return the result */ - unicodeAccentedString.extract(0, size, unaccentedString, size, "UTF-8"); - return unaccentedString; + text.clear(); + unicodeAccentedString.toUTF8String(text); + return text; } diff --git a/src/common/unaccent.h b/src/common/unaccent.h index c9de5309c..2d096717a 100644 --- a/src/common/unaccent.h +++ b/src/common/unaccent.h @@ -95,4 +95,4 @@ private: static const char fgClassID; }; -const char* removeAccents(const char *, const unsigned); +std::string &removeAccents(std::string &text);