From e214efecd434cff696020d7dacff56119222e5f1 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 10 Mar 2021 16:38:08 +0400 Subject: [PATCH] Language code conversion via ICU Language code is converted from ISO 639-3 to ISO 639 (which is understood by Xapian) via ICU. The previous approach via an explicit map had its advantages since Xapian has more than one stemmer implementations for some languages (selectable via Xapian-specific identifiers). This commit relies on the defaults associated with the ISO 639 language codes. --- src/library.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/library.cpp b/src/library.cpp index 52ca72670..cdfc9a246 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -30,6 +30,7 @@ #include #include #include +#include namespace kiwix { @@ -37,13 +38,8 @@ namespace kiwix namespace { -const std::map iso639_3ToXapian { - {"deu", "german" }, - {"eng", "english" }, - {"fra", "french" }, - {"hye", "armenian"}, - {"rus", "russian" }, - {"spa", "spanish" }, +std::string iso639_3ToXapian(const std::string& lang) { + return icu::Locale(lang.c_str()).getLanguage(); }; std::string normalizeText(const std::string& text, const std::string& language) @@ -260,7 +256,7 @@ void Library::updateBookDB(const Book& book) Xapian::TermGenerator indexer; const std::string lang = book.getLanguage(); try { - stemmer = Xapian::Stem(iso639_3ToXapian.at(lang)); + stemmer = Xapian::Stem(iso639_3ToXapian(lang)); indexer.set_stemmer(stemmer); indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME); } catch (...) {} @@ -301,7 +297,7 @@ Library::BookIdCollection Library::getBooksByTitleOrDescription(const Filter& fi : 0; // Language assumed for the query is not known for sure so stemming // is not applied - //queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian.at(???))); + //queryParser.set_stemmer(Xapian::Stem(iso639_3ToXapian(???))); //queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME); const auto flags = Xapian::QueryParser::FLAG_PHRASE | Xapian::QueryParser::FLAG_BOOLEAN