+ improved version of removeAccents(9

This commit is contained in:
kelson42 2010-04-16 13:33:50 +00:00
parent 27ee27bbe9
commit 31dc311f2a
4 changed files with 10 additions and 21 deletions

View File

@ -123,19 +123,19 @@ namespace kiwix {
/* Index the title */
if (!this->htmlParser.title.empty()) {
indexer.index_text_without_positions(removeAccents(this->htmlParser.title.c_str(), this->htmlParser.title.size()),
indexer.index_text_without_positions(removeAccents(this->htmlParser.title),
((this->htmlParser.dump.size() / 100) + 1) /
countWords(this->htmlParser.title) );
}
/* Index the keywords */
if (!this->htmlParser.keywords.empty()) {
indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords.c_str(), this->htmlParser.keywords.size()), 3);
indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords), 3);
}
/* Index the content */
if (!this->htmlParser.dump.empty()) {
indexer.index_text_without_positions(removeAccents(this->htmlParser.dump.c_str(), this->htmlParser.dump.size()));
indexer.index_text_without_positions(removeAccents(this->htmlParser.dump));
}
/* add to the database */

View File

@ -36,7 +36,7 @@ namespace kiwix {
/* Create the query term vector */
/* I have the doublequote " because bug ID: 2939690 */
std::vector<std::string> queryTerms = split(removeAccents(search.c_str(), search.size()), " #@%$0/\\_-*()[]{},;:\"´`'");
std::vector<std::string> queryTerms = split(removeAccents(search), " #@%$0/\\_-*()[]{},;:\"´`'");
/* Create query object */
Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end());

View File

@ -57,23 +57,12 @@ void UnaccentTransliterator::handleTransliterate(Replaceable& text,
/* Remove accents from a String */
UnaccentTransliterator unaccent;
char *unaccentedString = NULL;
unsigned unaccentedStringSize=0;
UnicodeString unicodeAccentedString;
const char* removeAccents(const char *accentedString, const unsigned size) {
/* Realloc memory if necessary */
if (size > unaccentedStringSize) {
unaccentedString = (char*)realloc(unaccentedString, size+1);
unaccentedStringSize = size+1;
}
/* Transcode the String */
unicodeAccentedString = UnicodeString(accentedString);
std::string &removeAccents(std::string &text) {
unicodeAccentedString = UnicodeString(text.c_str());
unaccent.transliterate(unicodeAccentedString);
/* Extract and return the result */
unicodeAccentedString.extract(0, size, unaccentedString, size, "UTF-8");
return unaccentedString;
text.clear();
unicodeAccentedString.toUTF8String(text);
return text;
}

View File

@ -95,4 +95,4 @@ private:
static const char fgClassID;
};
const char* removeAccents(const char *, const unsigned);
std::string &removeAccents(std::string &text);