From 19ac4741e6e4f57883eea8f26a53677955f08cca Mon Sep 17 00:00:00 2001 From: kelson42 Date: Tue, 25 May 2010 17:11:05 +0000 Subject: [PATCH] + fix the problem with failing accented searches on windows --- src/common/kiwix/searcher.cpp | 5 +- src/common/unaccent.cpp | 71 +++----------------------- src/common/unaccent.h | 95 +++-------------------------------- 3 files changed, 16 insertions(+), 155 deletions(-) diff --git a/src/common/kiwix/searcher.cpp b/src/common/kiwix/searcher.cpp index 92a2579a1..5a2d0c5fe 100644 --- a/src/common/kiwix/searcher.cpp +++ b/src/common/kiwix/searcher.cpp @@ -31,13 +31,10 @@ namespace kiwix { /* Create the enquire object */ Xapian::Enquire enquire(this->readableDatabase); - /* lowercase the search pattern */ - std::transform(search.begin(), search.end(), search.begin(), ::tolower); - /* Create the query term vector */ /* I have the doublequote " because bug ID: 2939690 */ std::vector queryTerms = split(removeAccents(search), " #@%$0/\\_-*()[]{},;:\"ยด`'"); - + /* Create query object */ Xapian::Query query(Xapian::Query::OP_OR, queryTerms.begin(), queryTerms.end()); diff --git a/src/common/unaccent.cpp b/src/common/unaccent.cpp index b0474413a..f3a417e34 100644 --- a/src/common/unaccent.cpp +++ b/src/common/unaccent.cpp @@ -1,69 +1,14 @@ -/******************************************************************** - * COPYRIGHT: - * Copyright (c) 1999-2003, International Business Machines Corporation and - * others. All Rights Reserved. - ********************************************************************/ - #include "unaccent.h" -const char UnaccentTransliterator::fgClassID = 0; - -/** - * Constructor - */ -UnaccentTransliterator::UnaccentTransliterator() : - normalizer("", UNORM_NFD), - Transliterator("Unaccent", 0) { -} - -/** - * Destructor - */ -UnaccentTransliterator::~UnaccentTransliterator() { -} - -/** - * Remove accents from a character using Normalizer. - */ -UChar UnaccentTransliterator::unaccent(UChar c) const { - UnicodeString str(c); - UErrorCode status = U_ZERO_ERROR; - UnaccentTransliterator* t = (UnaccentTransliterator*)this; - - t->normalizer.setText(str, status); - if (U_FAILURE(status)) { - return c; - } - return (UChar) t->normalizer.next(); -} - -/** - * Implement Transliterator API - */ -void UnaccentTransliterator::handleTransliterate(Replaceable& text, - UTransPosition& index, - UBool incremental) const { - UnicodeString str("a"); - while (index.start < index.limit) { - UChar c = text.charAt(index.start); - UChar d = unaccent(c); - if (c != d) { - str.setCharAt(0, d); - text.handleReplaceBetween(index.start, index.start+1, str); - } - index.start++; - } -} - -/* Remove accents from a String */ -UnaccentTransliterator unaccent; -UnicodeString unicodeAccentedString; +UErrorCode status = U_ZERO_ERROR; +Transliterator *trans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); std::string &removeAccents(std::string &text) { - unicodeAccentedString = UnicodeString(text.c_str()); - unaccent.transliterate(unicodeAccentedString); + ucnv_setDefaultName("UTF-8"); + UnicodeString ustring = UnicodeString(text.c_str()); + trans->transliterate(ustring); text.clear(); - unicodeAccentedString.toUTF8String(text); + ustring.toUTF8String(text); return text; } @@ -72,7 +17,7 @@ void printStringInHexadecimal(UnicodeString s) { for (int i=0; i #include @@ -10,92 +7,14 @@ #include #include #include +#include +#include + #include #include -//#define U_CHARSET_IS_UTF8 1 - -class UnaccentTransliterator : public Transliterator { - - public: - - /** - * Constructor - */ - UnaccentTransliterator(); - - /** - * Destructor - */ - virtual ~UnaccentTransliterator(); - - protected: - - /** - * Implement Transliterator API - */ - virtual void handleTransliterate(Replaceable& text, - UTransPosition& index, - UBool incremental) const; - - private: - - /** - * Unaccent a single character using normalizer. - */ - UChar unaccent(UChar c) const; - - Normalizer normalizer; - -public: - - /** - * Return the class ID for this class. This is useful only for - * comparing to a return value from getDynamicClassID(). For example: - *
-     * .      Base* polymorphic_pointer = createPolymorphicObject();
-     * .      if (polymorphic_pointer->getDynamicClassID() ==
-     * .          Derived::getStaticClassID()) ...
-     * 
- * @return The class ID for all objects of this class. - * @stable ICU 2.0 - */ - static inline UClassID getStaticClassID(void) { return (UClassID)&fgClassID; }; - - /** - * Returns a unique class ID polymorphically. This method - * is to implement a simple version of RTTI, since not all C++ - * compilers support genuine RTTI. Polymorphic operator==() and - * clone() methods call this method. - * - *

Concrete subclasses of Transliterator that wish clients to - * be able to identify them should implement getDynamicClassID() - * and also a static method and data member: - * - *

-     * static UClassID getStaticClassID() { return (UClassID)&fgClassID; }
-     * static char fgClassID;
-     * 
- * - * Subclasses that do not implement this method will have a - * dynamic class ID of Transliterator::getStatisClassID(). - * - * @return The class ID for this object. All objects of a given - * class have the same class ID. Objects of other classes have - * different class IDs. - * @stable ICU 2.0 - */ - virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }; - -private: - - /** - * Class identifier for subclasses of Transliterator that do not - * define their class (anonymous subclasses). - */ - static const char fgClassID; -}; - std::string &removeAccents(std::string &text); void printStringInHexadecimal(const char *s); void printStringInHexadecimal(UnicodeString s); + +#endif