i18n data is kept in and generated from JSON files

Introduced a new resource compiler script kiwix-compile-i18n that
processes i18n string data stored in JSON files and generates sorted C++
tables of string keys and values for all languages.
This commit is contained in:
Veloman Yunkan 2022-01-16 23:33:44 +04:00 committed by Matthieu Gautier
parent d029c2b8d5
commit 507e111f34
11 changed files with 230 additions and 14 deletions

View File

@ -1 +1,2 @@
usr/share/man/man1/kiwix-compile-resources.1*
usr/share/man/man1/kiwix-compile-i18n.1*

161
scripts/kiwix-compile-i18n Executable file
View File

@ -0,0 +1,161 @@
#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import os.path
import re
import json
def to_identifier(name):
ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
if ident[0].isnumeric():
return "_"+ident
return ident
def lang_code(filename):
filename = os.path.basename(filename)
lang = to_identifier(os.path.splitext(filename)[0])
print(filename, '->', lang)
return lang
from string import Template
def expand_cxx_template(t, **kwargs):
return Template(t).substitute(**kwargs)
def cxx_string_literal(s):
# Taking advantage of the fact the JSON string escape rules match
# those of C++
return 'u8' + json.dumps(s)
string_table_cxx_template = '''
const I18nString $TABLE_NAME[] = {
$TABLE_ENTRIES
};
'''
lang_table_entry_cxx_template = '''
{
$LANG_STRING_LITERAL,
ARRAY_ELEMENT_COUNT($STRING_TABLE_NAME),
$STRING_TABLE_NAME
}'''
cxxfile_template = '''// This file is automatically generated. Do not modify it.
#include "server/i18n.h"
namespace kiwix {
namespace i18n {
namespace
{
$STRING_DATA
} // unnamed namespace
#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0]))
extern const I18nStringTable stringTables[] = {
$LANG_TABLE
};
extern const size_t langCount = $LANG_COUNT;
} // namespace i18n
} // namespace kiwix
'''
class Resource:
def __init__(self, base_dirs, filename):
filename = filename.strip()
self.filename = filename
self.lang_code = lang_code(filename)
found = False
for base_dir in base_dirs:
try:
with open(os.path.join(base_dir, filename), 'r') as f:
self.data = f.read()
found = True
break
except FileNotFoundError:
continue
if not found:
raise Exception("Impossible to find {}".format(filename))
def get_string_table_name(self):
return "string_table_for_" + self.lang_code
def get_string_table(self):
table_entries = ",\n ".join(self.get_string_table_entries())
return expand_cxx_template(string_table_cxx_template,
TABLE_NAME=self.get_string_table_name(),
TABLE_ENTRIES=table_entries)
def get_string_table_entries(self):
d = json.loads(self.data)
for k in sorted(d.keys()):
if k != "@metadata":
key_string = cxx_string_literal(k)
value_string = cxx_string_literal(d[k])
yield '{ ' + key_string + ', ' + value_string + ' }'
def get_lang_table_entry(self):
return expand_cxx_template(lang_table_entry_cxx_template,
LANG_STRING_LITERAL=cxx_string_literal(self.lang_code),
STRING_TABLE_NAME=self.get_string_table_name())
def gen_c_file(resources):
string_data = []
lang_table = []
for r in resources:
string_data.append(r.get_string_table())
lang_table.append(r.get_lang_table_entry())
return expand_cxx_template(cxxfile_template,
STRING_DATA="\n".join(string_data),
LANG_TABLE=",\n ".join(lang_table),
LANG_COUNT=len(resources)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--cxxfile',
required=True,
help='The Cpp file name to generate')
parser.add_argument('i18n_resource_file',
help='The list of resources to compile.')
args = parser.parse_args()
base_dir = os.path.dirname(os.path.realpath(args.i18n_resource_file))
with open(args.i18n_resource_file, 'r') as f:
resources = [Resource([base_dir], filename)
for filename in f.readlines()]
with open(args.cxxfile, 'w') as f:
f.write(gen_c_file(resources))

View File

@ -0,0 +1,18 @@
.TH KIWIX-COMPILE-I18N "1" "January 2022" "Kiwix" "User Commands"
.SH NAME
kiwix-compile-i18n \- helper to compile Kiwix i18n (internationalization) data
.SH SYNOPSIS
\fBkiwix\-compile\-i18n\fR [\-h] \-\-cxxfile CXXFILE i18n_resource_file\fR
.SH DESCRIPTION
.TP
i18n_resource_file
The list of i18n resources to compile.
.TP
\fB\-h\fR, \fB\-\-help\fR
show a help message and exit
.TP
\fB\-\-cxxfile\fR CXXFILE
The Cpp file name to generate
.TP
.SH AUTHOR
Veloman Yunkan <veloman.yunkan@gmail.com>

View File

@ -102,7 +102,7 @@ class Resource:
master_c_template = """//This file is automaically generated. Do not modify it.
master_c_template = """//This file is automatically generated. Do not modify it.
#include <stdlib.h>
#include <fstream>

View File

@ -4,3 +4,9 @@ res_compiler = find_program('kiwix-compile-resources')
install_data(res_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-resources.1')
i18n_compiler = find_program('kiwix-compile-i18n')
install_data(i18n_compiler.path(), install_dir:get_option('bindir'))
install_man('kiwix-compile-i18n.1')

View File

@ -33,6 +33,7 @@ kiwix_sources = [
'version.cpp'
]
kiwix_sources += lib_resources
kiwix_sources += i18n_resources
if host_machine.system() == 'windows'
kiwix_sources += 'subprocess_windows.cpp'

View File

@ -36,26 +36,22 @@ const char* I18nStringTable::get(const std::string& key) const
return (found == end || found->key != key) ? nullptr : found->value;
}
namespace i18n
{
// this data is generated by the i18n resource compiler
extern const I18nStringTable stringTables[];
extern const size_t langCount;
}
namespace
{
const I18nString enStrings[] = {
// must be sorted by key
{ "suggest-full-text-search", "containing '{{{SEARCH_TERMS}}}'..."}
};
#define ARRAY_ELEMENT_COUNT(a) (sizeof(a)/sizeof(a[0]))
const I18nStringTable i18nStringTables[] = {
{ "en", ARRAY_ELEMENT_COUNT(enStrings), enStrings }
};
class I18nStringDB
{
public: // functions
I18nStringDB() {
for ( size_t i = 0; i < ARRAY_ELEMENT_COUNT(i18nStringTables); ++i ) {
const auto& t = i18nStringTables[i];
for ( size_t i = 0; i < kiwix::i18n::langCount; ++i ) {
const auto& t = kiwix::i18n::stringTables[i];
lang2TableMap[t.lang] = &t;
}
enStrings = lang2TableMap.at("en");

8
static/i18n/en.json Normal file
View File

@ -0,0 +1,8 @@
{
"@metadata": {
"authors": [
]
},
"name":"English",
"suggest-full-text-search": "containing '{{{SEARCH_TERMS}}}'..."
}

9
static/i18n/qqq.json Normal file
View File

@ -0,0 +1,9 @@
{
"@metadata": {
"authors": [
"Veloman Yunkan"
]
},
"name": "Current language to which the string is being translated to.",
"suggest-full-text-search": "Text appearing in the suggestion list that, when selected, runs a full text search instead of the title search"
}

View File

@ -0,0 +1 @@
i18n/en.json

View File

@ -14,3 +14,18 @@ lib_resources = custom_target('resources',
'@INPUT@'],
depend_files: resource_files
)
i18n_resource_files = run_command(find_program('python3'),
'-c',
'import sys; f=open(sys.argv[1]); print(f.read())',
files('i18n_resources_list.txt')
).stdout().strip().split('\n')
i18n_resources = custom_target('i18n_resources',
input: 'i18n_resources_list.txt',
output: ['libkiwix-i18n-resources.cpp'],
command:[i18n_compiler,
'--cxxfile', '@OUTPUT0@',
'@INPUT@'],
depend_files: i18n_resource_files
)