+ improved version of removeAccents(9

2010-04-16 13:33:50 +00:00 · 2010-04-16 13:33:50 +00:00 · 31dc311f2a
parent 27ee27bbe9
commit 31dc311f2a
4 changed files with 10 additions and 21 deletions
--- a/src/common/kiwix/indexer.cpp
+++ b/src/common/kiwix/indexer.cpp
@ -123,19 +123,19 @@ namespace kiwix {
 	  /* Index the title */
 	  if (!this->htmlParser.title.empty()) {
-	    indexer.index_text_without_positions(removeAccents(this->htmlParser.title.c_str(), this->htmlParser.title.size()), 
+	    indexer.index_text_without_positions(removeAccents(this->htmlParser.title), 
 						 ((this->htmlParser.dump.size() / 100) + 1) / 
 						 countWords(this->htmlParser.title) );
 	  }
 	  /* Index the keywords */
 	  if (!this->htmlParser.keywords.empty()) {
-	    indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords.c_str(), this->htmlParser.keywords.size()), 3);
+	    indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords), 3);
 	  }
 	  /* Index the content */
 	  if (!this->htmlParser.dump.empty()) {
-	    indexer.index_text_without_positions(removeAccents(this->htmlParser.dump.c_str(), this->htmlParser.dump.size()));
+	    indexer.index_text_without_positions(removeAccents(this->htmlParser.dump));
 	  }
 	  /* add to the database */
--- a/src/common/kiwix/searcher.cpp
+++ b/src/common/kiwix/searcher.cpp
@ -36,7 +36,7 @@ namespace kiwix {
    /* Create the query term vector */
    /* I have the doublequote " because bug ID: 2939690 */
-    std::vector<std::string> queryTerms = split(removeAccents(search.c_str(), search.size()), " #@%$0/\\_-*()[]{},;:\"´`'");
+    std::vector<std::string> queryTerms = split(removeAccents(search), " #@%$0/\\_-*()[]{},;:\"´`'");
    /* Create query object */
    Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end());
--- a/src/common/unaccent.cpp
+++ b/src/common/unaccent.cpp
@ -57,23 +57,12 @@ void UnaccentTransliterator::handleTransliterate(Replaceable& text,
 /* Remove accents from a String */
 UnaccentTransliterator unaccent;
 char *unaccentedString = NULL;
 unsigned unaccentedStringSize=0;
 UnicodeString unicodeAccentedString;
-const char* removeAccents(const char *accentedString, const unsigned size) {
+std::string &removeAccents(std::string &text) {
-
+  unicodeAccentedString = UnicodeString(text.c_str());
  /* Realloc memory if necessary */
  if (size > unaccentedStringSize) {
    unaccentedString = (char*)realloc(unaccentedString, size+1);
    unaccentedStringSize = size+1;
  }
  /* Transcode the String */
  unicodeAccentedString = UnicodeString(accentedString);
  unaccent.transliterate(unicodeAccentedString);
-  
+  text.clear();
-  /* Extract and return the result */
+  unicodeAccentedString.toUTF8String(text);
-  unicodeAccentedString.extract(0, size, unaccentedString, size, "UTF-8");
+  return text;
  return unaccentedString;
 }
--- a/src/common/unaccent.h
+++ b/src/common/unaccent.h
@ -95,4 +95,4 @@ private:
    static const char fgClassID;
 };
-const char* removeAccents(const char *, const unsigned);
+std::string &removeAccents(std::string &text);