+ improved version of removeAccents(9

This commit is contained in:
kelson42 2010-04-16 13:33:50 +00:00
parent 27ee27bbe9
commit 31dc311f2a
4 changed files with 10 additions and 21 deletions

View File

@ -123,19 +123,19 @@ namespace kiwix {
/* Index the title */ /* Index the title */
if (!this->htmlParser.title.empty()) { if (!this->htmlParser.title.empty()) {
indexer.index_text_without_positions(removeAccents(this->htmlParser.title.c_str(), this->htmlParser.title.size()), indexer.index_text_without_positions(removeAccents(this->htmlParser.title),
((this->htmlParser.dump.size() / 100) + 1) / ((this->htmlParser.dump.size() / 100) + 1) /
countWords(this->htmlParser.title) ); countWords(this->htmlParser.title) );
} }
/* Index the keywords */ /* Index the keywords */
if (!this->htmlParser.keywords.empty()) { if (!this->htmlParser.keywords.empty()) {
indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords.c_str(), this->htmlParser.keywords.size()), 3); indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords), 3);
} }
/* Index the content */ /* Index the content */
if (!this->htmlParser.dump.empty()) { if (!this->htmlParser.dump.empty()) {
indexer.index_text_without_positions(removeAccents(this->htmlParser.dump.c_str(), this->htmlParser.dump.size())); indexer.index_text_without_positions(removeAccents(this->htmlParser.dump));
} }
/* add to the database */ /* add to the database */

View File

@ -36,7 +36,7 @@ namespace kiwix {
/* Create the query term vector */ /* Create the query term vector */
/* I have the doublequote " because bug ID: 2939690 */ /* I have the doublequote " because bug ID: 2939690 */
std::vector<std::string> queryTerms = split(removeAccents(search.c_str(), search.size()), " #@%$0/\\_-*()[]{},;:\"´`'"); std::vector<std::string> queryTerms = split(removeAccents(search), " #@%$0/\\_-*()[]{},;:\"´`'");
/* Create query object */ /* Create query object */
Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end()); Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end());

View File

@ -57,23 +57,12 @@ void UnaccentTransliterator::handleTransliterate(Replaceable& text,
/* Remove accents from a String */ /* Remove accents from a String */
UnaccentTransliterator unaccent; UnaccentTransliterator unaccent;
char *unaccentedString = NULL;
unsigned unaccentedStringSize=0;
UnicodeString unicodeAccentedString; UnicodeString unicodeAccentedString;
const char* removeAccents(const char *accentedString, const unsigned size) { std::string &removeAccents(std::string &text) {
unicodeAccentedString = UnicodeString(text.c_str());
/* Realloc memory if necessary */
if (size > unaccentedStringSize) {
unaccentedString = (char*)realloc(unaccentedString, size+1);
unaccentedStringSize = size+1;
}
/* Transcode the String */
unicodeAccentedString = UnicodeString(accentedString);
unaccent.transliterate(unicodeAccentedString); unaccent.transliterate(unicodeAccentedString);
text.clear();
/* Extract and return the result */ unicodeAccentedString.toUTF8String(text);
unicodeAccentedString.extract(0, size, unaccentedString, size, "UTF-8"); return text;
return unaccentedString;
} }

View File

@ -95,4 +95,4 @@ private:
static const char fgClassID; static const char fgClassID;
}; };
const char* removeAccents(const char *, const unsigned); std::string &removeAccents(std::string &text);