mirror of https://github.com/kiwix/libkiwix.git
Move getLanguageSelfName to tools.h
This is a general utility which other ports can get use of. Added tests
This commit is contained in:
parent
8726de494c
commit
385931f229
|
@ -244,5 +244,13 @@ FeedLanguages readLanguagesFromFeed(const std::string& content);
|
|||
* @return vector containing category strings.
|
||||
*/
|
||||
FeedCategories readCategoriesFromFeed(const std::string& content);
|
||||
|
||||
/**
|
||||
* Retrieve the full language name associated with a given ISO 639-3 language code.
|
||||
*
|
||||
* @param lang ISO 639-3 language code.
|
||||
* @return full language name.
|
||||
*/
|
||||
std::string getLanguageSelfName(const std::string& lang);
|
||||
}
|
||||
#endif // KIWIX_TOOLS_H
|
||||
|
|
|
@ -23,65 +23,6 @@ void LibraryDumper::setOpenSearchInfo(int totalResults, int startIndex, int coun
|
|||
m_count = count;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
std::map<std::string, std::string> iso639_3 = {
|
||||
{"atj", "atikamekw"},
|
||||
{"azb", "آذربایجان دیلی"},
|
||||
{"bcl", "central bikol"},
|
||||
{"bgs", "tagabawa"},
|
||||
{"bxr", "буряад хэлэн"},
|
||||
{"cbk", "chavacano"},
|
||||
{"cdo", "閩東語"},
|
||||
{"dag", "Dagbani"},
|
||||
{"diq", "dimli"},
|
||||
{"dty", "डोटेली"},
|
||||
{"eml", "emiliân-rumagnōl"},
|
||||
{"fbs", "српскохрватски"},
|
||||
{"guw", "Gungbe"},
|
||||
{"hbs", "srpskohrvatski"},
|
||||
{"ido", "ido"},
|
||||
{"kbp", "kabɩyɛ"},
|
||||
{"kld", "Gamilaraay"},
|
||||
{"lbe", "лакку маз"},
|
||||
{"lbj", "ལ་དྭགས་སྐད་"},
|
||||
{"map", "Austronesian"},
|
||||
{"mhr", "марий йылме"},
|
||||
{"mnw", "ဘာသာမန်"},
|
||||
{"myn", "mayan"},
|
||||
{"nah", "nahuatl"},
|
||||
{"nai", "north American Indian"},
|
||||
{"nds", "plattdütsch"},
|
||||
{"nrm", "bhasa narom"},
|
||||
{"olo", "livvi"},
|
||||
{"pih", "Pitcairn-Norfolk"},
|
||||
{"pnb", "Western Panjabi"},
|
||||
{"rmr", "Caló"},
|
||||
{"rmy", "romani shib"},
|
||||
{"roa", "romance languages"},
|
||||
{"twi", "twi"},
|
||||
};
|
||||
|
||||
std::once_flag fillLanguagesFlag;
|
||||
|
||||
void fillLanguagesMap()
|
||||
{
|
||||
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
|
||||
const ICULanguageInfo lang(*icuLangPtr);
|
||||
iso639_3.insert({lang.iso3Code(), lang.selfName()});
|
||||
}
|
||||
}
|
||||
|
||||
std::string getLanguageSelfName(const std::string& lang) {
|
||||
const auto itr = iso639_3.find(lang);
|
||||
if (itr != iso639_3.end()) {
|
||||
return itr->second;
|
||||
}
|
||||
return lang;
|
||||
};
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
kainjow::mustache::list LibraryDumper::getCategoryData() const
|
||||
{
|
||||
const auto now = gen_date_str();
|
||||
|
@ -102,7 +43,6 @@ kainjow::mustache::list LibraryDumper::getLanguageData() const
|
|||
{
|
||||
const auto now = gen_date_str();
|
||||
kainjow::mustache::list languageData;
|
||||
std::call_once(fillLanguagesFlag, fillLanguagesMap);
|
||||
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
|
||||
const std::string languageCode = langAndBookCount.first;
|
||||
const int bookCount = langAndBookCount.second;
|
||||
|
|
|
@ -18,6 +18,7 @@ kiwix_sources = [
|
|||
'tools/stringTools.cpp',
|
||||
'tools/networkTools.cpp',
|
||||
'tools/opdsParsingTools.cpp',
|
||||
'tools/languageTools.cpp',
|
||||
'tools/otherTools.cpp',
|
||||
'tools/archiveTools.cpp',
|
||||
'kiwixserve.cpp',
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
#include "tools.h"
|
||||
#include "stringTools.h"
|
||||
#include <mutex>
|
||||
|
||||
namespace kiwix
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
// These mappings are not provided by the ICU library, any such mappings can be manually added here
|
||||
std::map<std::string, std::string> iso639_3 = {
|
||||
{"atj", "atikamekw"},
|
||||
{"azb", "آذربایجان دیلی"},
|
||||
{"bcl", "central bikol"},
|
||||
{"bgs", "tagabawa"},
|
||||
{"bxr", "буряад хэлэн"},
|
||||
{"cbk", "chavacano"},
|
||||
{"cdo", "閩東語"},
|
||||
{"dag", "Dagbani"},
|
||||
{"diq", "dimli"},
|
||||
{"dty", "डोटेली"},
|
||||
{"eml", "emiliân-rumagnōl"},
|
||||
{"fbs", "српскохрватски"},
|
||||
{"guw", "Gungbe"},
|
||||
{"hbs", "srpskohrvatski"},
|
||||
{"ido", "ido"},
|
||||
{"kbp", "kabɩyɛ"},
|
||||
{"kld", "Gamilaraay"},
|
||||
{"lbe", "лакку маз"},
|
||||
{"lbj", "ལ་དྭགས་སྐད་"},
|
||||
{"map", "Austronesian"},
|
||||
{"mhr", "марий йылме"},
|
||||
{"mnw", "ဘာသာမန်"},
|
||||
{"myn", "mayan"},
|
||||
{"nah", "nahuatl"},
|
||||
{"nai", "north American Indian"},
|
||||
{"nds", "plattdütsch"},
|
||||
{"nrm", "bhasa narom"},
|
||||
{"olo", "livvi"},
|
||||
{"pih", "Pitcairn-Norfolk"},
|
||||
{"pnb", "Western Panjabi"},
|
||||
{"rmr", "Caló"},
|
||||
{"rmy", "romani shib"},
|
||||
{"roa", "romance languages"},
|
||||
{"twi", "twi"},
|
||||
// ICU for Ubuntu versions <= focal (20.04) returns "" for the language code ""
|
||||
// unlike the later versions - which returns "und". We map this value to "Undetermined" for a common ground.
|
||||
{"", "Undetermined"},
|
||||
};
|
||||
|
||||
std::once_flag fillLanguagesFlag;
|
||||
|
||||
void fillLanguagesMap()
|
||||
{
|
||||
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
|
||||
const kiwix::ICULanguageInfo lang(*icuLangPtr);
|
||||
iso639_3.insert({lang.iso3Code(), lang.selfName()});
|
||||
}
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
std::string getLanguageSelfName(const std::string& lang)
|
||||
{
|
||||
std::call_once(fillLanguagesFlag, fillLanguagesMap);
|
||||
const auto itr = iso639_3.find(lang);
|
||||
if (itr != iso639_3.end()) {
|
||||
return itr->second;
|
||||
}
|
||||
return lang;
|
||||
};
|
||||
|
||||
} // namespace kiwix
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (C) 2023 Nikhil Tanwar (2002nikhiltanwar@gmail.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*
|
||||
*/
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "../include/tools.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
TEST(LanguageToolsTest, englishTest)
|
||||
{
|
||||
EXPECT_EQ(kiwix::getLanguageSelfName("eng"), "English");
|
||||
}
|
||||
|
||||
TEST(LanguageToolsTest, manualValuesTest)
|
||||
{
|
||||
EXPECT_EQ(kiwix::getLanguageSelfName("dty"), "डोटेली");
|
||||
}
|
||||
|
||||
TEST(LanguageToolsTest, emptyStringTest)
|
||||
{
|
||||
EXPECT_EQ(kiwix::getLanguageSelfName(""), "Undetermined");
|
||||
}
|
||||
|
||||
}
|
|
@ -6,6 +6,7 @@ tests = [
|
|||
'pathTools',
|
||||
'otherTools',
|
||||
'opdsParsingTools',
|
||||
'languageTools',
|
||||
'kiwixserve',
|
||||
'book',
|
||||
'manager',
|
||||
|
|
Loading…
Reference in New Issue