mirror of https://github.com/kiwix/libkiwix.git
Move getLanguageSelfName to tools.h
This is a general utility which other ports can get use of. Added tests
This commit is contained in:
parent
8726de494c
commit
385931f229
|
@ -244,5 +244,13 @@ FeedLanguages readLanguagesFromFeed(const std::string& content);
|
||||||
* @return vector containing category strings.
|
* @return vector containing category strings.
|
||||||
*/
|
*/
|
||||||
FeedCategories readCategoriesFromFeed(const std::string& content);
|
FeedCategories readCategoriesFromFeed(const std::string& content);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the full language name associated with a given ISO 639-3 language code.
|
||||||
|
*
|
||||||
|
* @param lang ISO 639-3 language code.
|
||||||
|
* @return full language name.
|
||||||
|
*/
|
||||||
|
std::string getLanguageSelfName(const std::string& lang);
|
||||||
}
|
}
|
||||||
#endif // KIWIX_TOOLS_H
|
#endif // KIWIX_TOOLS_H
|
||||||
|
|
|
@ -23,65 +23,6 @@ void LibraryDumper::setOpenSearchInfo(int totalResults, int startIndex, int coun
|
||||||
m_count = count;
|
m_count = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
std::map<std::string, std::string> iso639_3 = {
|
|
||||||
{"atj", "atikamekw"},
|
|
||||||
{"azb", "آذربایجان دیلی"},
|
|
||||||
{"bcl", "central bikol"},
|
|
||||||
{"bgs", "tagabawa"},
|
|
||||||
{"bxr", "буряад хэлэн"},
|
|
||||||
{"cbk", "chavacano"},
|
|
||||||
{"cdo", "閩東語"},
|
|
||||||
{"dag", "Dagbani"},
|
|
||||||
{"diq", "dimli"},
|
|
||||||
{"dty", "डोटेली"},
|
|
||||||
{"eml", "emiliân-rumagnōl"},
|
|
||||||
{"fbs", "српскохрватски"},
|
|
||||||
{"guw", "Gungbe"},
|
|
||||||
{"hbs", "srpskohrvatski"},
|
|
||||||
{"ido", "ido"},
|
|
||||||
{"kbp", "kabɩyɛ"},
|
|
||||||
{"kld", "Gamilaraay"},
|
|
||||||
{"lbe", "лакку маз"},
|
|
||||||
{"lbj", "ལ་དྭགས་སྐད་"},
|
|
||||||
{"map", "Austronesian"},
|
|
||||||
{"mhr", "марий йылме"},
|
|
||||||
{"mnw", "ဘာသာမန်"},
|
|
||||||
{"myn", "mayan"},
|
|
||||||
{"nah", "nahuatl"},
|
|
||||||
{"nai", "north American Indian"},
|
|
||||||
{"nds", "plattdütsch"},
|
|
||||||
{"nrm", "bhasa narom"},
|
|
||||||
{"olo", "livvi"},
|
|
||||||
{"pih", "Pitcairn-Norfolk"},
|
|
||||||
{"pnb", "Western Panjabi"},
|
|
||||||
{"rmr", "Caló"},
|
|
||||||
{"rmy", "romani shib"},
|
|
||||||
{"roa", "romance languages"},
|
|
||||||
{"twi", "twi"},
|
|
||||||
};
|
|
||||||
|
|
||||||
std::once_flag fillLanguagesFlag;
|
|
||||||
|
|
||||||
void fillLanguagesMap()
|
|
||||||
{
|
|
||||||
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
|
|
||||||
const ICULanguageInfo lang(*icuLangPtr);
|
|
||||||
iso639_3.insert({lang.iso3Code(), lang.selfName()});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string getLanguageSelfName(const std::string& lang) {
|
|
||||||
const auto itr = iso639_3.find(lang);
|
|
||||||
if (itr != iso639_3.end()) {
|
|
||||||
return itr->second;
|
|
||||||
}
|
|
||||||
return lang;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // unnamed namespace
|
|
||||||
|
|
||||||
kainjow::mustache::list LibraryDumper::getCategoryData() const
|
kainjow::mustache::list LibraryDumper::getCategoryData() const
|
||||||
{
|
{
|
||||||
const auto now = gen_date_str();
|
const auto now = gen_date_str();
|
||||||
|
@ -102,7 +43,6 @@ kainjow::mustache::list LibraryDumper::getLanguageData() const
|
||||||
{
|
{
|
||||||
const auto now = gen_date_str();
|
const auto now = gen_date_str();
|
||||||
kainjow::mustache::list languageData;
|
kainjow::mustache::list languageData;
|
||||||
std::call_once(fillLanguagesFlag, fillLanguagesMap);
|
|
||||||
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
|
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
|
||||||
const std::string languageCode = langAndBookCount.first;
|
const std::string languageCode = langAndBookCount.first;
|
||||||
const int bookCount = langAndBookCount.second;
|
const int bookCount = langAndBookCount.second;
|
||||||
|
|
|
@ -18,6 +18,7 @@ kiwix_sources = [
|
||||||
'tools/stringTools.cpp',
|
'tools/stringTools.cpp',
|
||||||
'tools/networkTools.cpp',
|
'tools/networkTools.cpp',
|
||||||
'tools/opdsParsingTools.cpp',
|
'tools/opdsParsingTools.cpp',
|
||||||
|
'tools/languageTools.cpp',
|
||||||
'tools/otherTools.cpp',
|
'tools/otherTools.cpp',
|
||||||
'tools/archiveTools.cpp',
|
'tools/archiveTools.cpp',
|
||||||
'kiwixserve.cpp',
|
'kiwixserve.cpp',
|
||||||
|
|
|
@ -0,0 +1,74 @@
|
||||||
|
#include "tools.h"
|
||||||
|
#include "stringTools.h"
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace kiwix
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
// These mappings are not provided by the ICU library, any such mappings can be manually added here
|
||||||
|
std::map<std::string, std::string> iso639_3 = {
|
||||||
|
{"atj", "atikamekw"},
|
||||||
|
{"azb", "آذربایجان دیلی"},
|
||||||
|
{"bcl", "central bikol"},
|
||||||
|
{"bgs", "tagabawa"},
|
||||||
|
{"bxr", "буряад хэлэн"},
|
||||||
|
{"cbk", "chavacano"},
|
||||||
|
{"cdo", "閩東語"},
|
||||||
|
{"dag", "Dagbani"},
|
||||||
|
{"diq", "dimli"},
|
||||||
|
{"dty", "डोटेली"},
|
||||||
|
{"eml", "emiliân-rumagnōl"},
|
||||||
|
{"fbs", "српскохрватски"},
|
||||||
|
{"guw", "Gungbe"},
|
||||||
|
{"hbs", "srpskohrvatski"},
|
||||||
|
{"ido", "ido"},
|
||||||
|
{"kbp", "kabɩyɛ"},
|
||||||
|
{"kld", "Gamilaraay"},
|
||||||
|
{"lbe", "лакку маз"},
|
||||||
|
{"lbj", "ལ་དྭགས་སྐད་"},
|
||||||
|
{"map", "Austronesian"},
|
||||||
|
{"mhr", "марий йылме"},
|
||||||
|
{"mnw", "ဘာသာမန်"},
|
||||||
|
{"myn", "mayan"},
|
||||||
|
{"nah", "nahuatl"},
|
||||||
|
{"nai", "north American Indian"},
|
||||||
|
{"nds", "plattdütsch"},
|
||||||
|
{"nrm", "bhasa narom"},
|
||||||
|
{"olo", "livvi"},
|
||||||
|
{"pih", "Pitcairn-Norfolk"},
|
||||||
|
{"pnb", "Western Panjabi"},
|
||||||
|
{"rmr", "Caló"},
|
||||||
|
{"rmy", "romani shib"},
|
||||||
|
{"roa", "romance languages"},
|
||||||
|
{"twi", "twi"},
|
||||||
|
// ICU for Ubuntu versions <= focal (20.04) returns "" for the language code ""
|
||||||
|
// unlike the later versions - which returns "und". We map this value to "Undetermined" for a common ground.
|
||||||
|
{"", "Undetermined"},
|
||||||
|
};
|
||||||
|
|
||||||
|
std::once_flag fillLanguagesFlag;
|
||||||
|
|
||||||
|
void fillLanguagesMap()
|
||||||
|
{
|
||||||
|
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
|
||||||
|
const kiwix::ICULanguageInfo lang(*icuLangPtr);
|
||||||
|
iso639_3.insert({lang.iso3Code(), lang.selfName()});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // unnamed namespace
|
||||||
|
|
||||||
|
std::string getLanguageSelfName(const std::string& lang)
|
||||||
|
{
|
||||||
|
std::call_once(fillLanguagesFlag, fillLanguagesMap);
|
||||||
|
const auto itr = iso639_3.find(lang);
|
||||||
|
if (itr != iso639_3.end()) {
|
||||||
|
return itr->second;
|
||||||
|
}
|
||||||
|
return lang;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace kiwix
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023 Nikhil Tanwar (2002nikhiltanwar@gmail.com)
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License as
|
||||||
|
* published by the Free Software Foundation; either version 2 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful, but
|
||||||
|
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
|
||||||
|
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
|
||||||
|
* NON-INFRINGEMENT. See the GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "../include/tools.h"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
TEST(LanguageToolsTest, englishTest)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(kiwix::getLanguageSelfName("eng"), "English");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LanguageToolsTest, manualValuesTest)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(kiwix::getLanguageSelfName("dty"), "डोटेली");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(LanguageToolsTest, emptyStringTest)
|
||||||
|
{
|
||||||
|
EXPECT_EQ(kiwix::getLanguageSelfName(""), "Undetermined");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -6,6 +6,7 @@ tests = [
|
||||||
'pathTools',
|
'pathTools',
|
||||||
'otherTools',
|
'otherTools',
|
||||||
'opdsParsingTools',
|
'opdsParsingTools',
|
||||||
|
'languageTools',
|
||||||
'kiwixserve',
|
'kiwixserve',
|
||||||
'book',
|
'book',
|
||||||
'manager',
|
'manager',
|
||||||
|
|
Loading…
Reference in New Issue