mirror of https://github.com/kiwix/libkiwix.git
+ improved version of removeAccents(9
This commit is contained in:
parent
27ee27bbe9
commit
31dc311f2a
|
@ -123,19 +123,19 @@ namespace kiwix {
|
||||||
|
|
||||||
/* Index the title */
|
/* Index the title */
|
||||||
if (!this->htmlParser.title.empty()) {
|
if (!this->htmlParser.title.empty()) {
|
||||||
indexer.index_text_without_positions(removeAccents(this->htmlParser.title.c_str(), this->htmlParser.title.size()),
|
indexer.index_text_without_positions(removeAccents(this->htmlParser.title),
|
||||||
((this->htmlParser.dump.size() / 100) + 1) /
|
((this->htmlParser.dump.size() / 100) + 1) /
|
||||||
countWords(this->htmlParser.title) );
|
countWords(this->htmlParser.title) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Index the keywords */
|
/* Index the keywords */
|
||||||
if (!this->htmlParser.keywords.empty()) {
|
if (!this->htmlParser.keywords.empty()) {
|
||||||
indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords.c_str(), this->htmlParser.keywords.size()), 3);
|
indexer.index_text_without_positions(removeAccents(this->htmlParser.keywords), 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Index the content */
|
/* Index the content */
|
||||||
if (!this->htmlParser.dump.empty()) {
|
if (!this->htmlParser.dump.empty()) {
|
||||||
indexer.index_text_without_positions(removeAccents(this->htmlParser.dump.c_str(), this->htmlParser.dump.size()));
|
indexer.index_text_without_positions(removeAccents(this->htmlParser.dump));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* add to the database */
|
/* add to the database */
|
||||||
|
|
|
@ -36,7 +36,7 @@ namespace kiwix {
|
||||||
|
|
||||||
/* Create the query term vector */
|
/* Create the query term vector */
|
||||||
/* I have the doublequote " because bug ID: 2939690 */
|
/* I have the doublequote " because bug ID: 2939690 */
|
||||||
std::vector<std::string> queryTerms = split(removeAccents(search.c_str(), search.size()), " #@%$0/\\_-*()[]{},;:\"´`'");
|
std::vector<std::string> queryTerms = split(removeAccents(search), " #@%$0/\\_-*()[]{},;:\"´`'");
|
||||||
|
|
||||||
/* Create query object */
|
/* Create query object */
|
||||||
Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end());
|
Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end());
|
||||||
|
|
|
@ -57,23 +57,12 @@ void UnaccentTransliterator::handleTransliterate(Replaceable& text,
|
||||||
|
|
||||||
/* Remove accents from a String */
|
/* Remove accents from a String */
|
||||||
UnaccentTransliterator unaccent;
|
UnaccentTransliterator unaccent;
|
||||||
char *unaccentedString = NULL;
|
|
||||||
unsigned unaccentedStringSize=0;
|
|
||||||
UnicodeString unicodeAccentedString;
|
UnicodeString unicodeAccentedString;
|
||||||
|
|
||||||
const char* removeAccents(const char *accentedString, const unsigned size) {
|
std::string &removeAccents(std::string &text) {
|
||||||
|
unicodeAccentedString = UnicodeString(text.c_str());
|
||||||
/* Realloc memory if necessary */
|
|
||||||
if (size > unaccentedStringSize) {
|
|
||||||
unaccentedString = (char*)realloc(unaccentedString, size+1);
|
|
||||||
unaccentedStringSize = size+1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Transcode the String */
|
|
||||||
unicodeAccentedString = UnicodeString(accentedString);
|
|
||||||
unaccent.transliterate(unicodeAccentedString);
|
unaccent.transliterate(unicodeAccentedString);
|
||||||
|
text.clear();
|
||||||
/* Extract and return the result */
|
unicodeAccentedString.toUTF8String(text);
|
||||||
unicodeAccentedString.extract(0, size, unaccentedString, size, "UTF-8");
|
return text;
|
||||||
return unaccentedString;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,4 +95,4 @@ private:
|
||||||
static const char fgClassID;
|
static const char fgClassID;
|
||||||
};
|
};
|
||||||
|
|
||||||
const char* removeAccents(const char *, const unsigned);
|
std::string &removeAccents(std::string &text);
|
||||||
|
|
Loading…
Reference in New Issue