Files
libkiwix/scripts/kiwix-compile-resources
Veloman Yunkan b9f60ecfe9 Handling of cacheid when serving static resources
During static resource preprocessing and compilation their cacheid
values are embedded into libkiwix and can be accessed at runtime.

If a static resource is requsted without specifying any cacheid
it is served as dynamic content (with short TTL and the library id
used for the ETag, though using the cacheid for the ETag would
be better).

If a cacheid is supplied in the request it must match the cacheid of the
resource (otherwise a 404 Not Found error is returned) whereupon the
resource is served as immutable content.

Known issues:

- One issue is caused by the fact that some static resources don't get a
  cacheid; this is resolved in the next commit.

- Interaction of this change with the support for dynamically customizing
  static resources (via KIWIX_SERVE_CUSTOMIZED_RESOURCES env var) was
  not addressed.
2022-10-19 19:26:04 +04:00

222 lines
6.8 KiB
Python
Executable File

#!/usr/bin/env python3
'''
Copyright 2016 Matthieu Gautier <mgautier@kymeria.fr>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import os.path
import re
def full_identifier(filename):
parts = os.path.normpath(filename).split(os.sep)
parts = [to_identifier(part) for part in parts]
print(filename, parts)
return parts
def to_identifier(name):
ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
if ident[0].isnumeric():
return "_"+ident
return ident
resource_impl_template = """
static const unsigned char {data_identifier}[] = {{
{resource_content}
}};
namespace RESOURCE {{
{namespaces_open}
const std::string {identifier} = init_resource("{env_identifier}", {data_identifier}, {resource_len});
{namespaces_close}
}}
"""
resource_getter_template = """
if (name == "{common_name}")
return RESOURCE::{identifier};
"""
resource_cacheid_getter_template = """
if (name == "{common_name}")
return "{cacheid}";
"""
resource_decl_template = """{namespaces_open}
extern const std::string {identifier};
{namespaces_close}"""
class Resource:
def __init__(self, base_dirs, filename, cacheid=None):
filename = filename
self.filename = filename
self.identifier = full_identifier(filename)
self.cacheid = cacheid
found = False
for base_dir in base_dirs:
try:
with open(os.path.join(base_dir, filename), 'rb') as f:
self.data = f.read()
found = True
break
except FileNotFoundError:
continue
if not found:
raise Exception("Resource not found: {}".format(filename))
def dump_impl(self):
nb_row = len(self.data)//16 + (1 if len(self.data) % 16 else 0)
sliced = (self.data[i*16:(i+1)*16] for i in range(nb_row))
return resource_impl_template.format(
data_identifier="_".join([""]+self.identifier),
resource_content=",\n ".join(", ".join("{:#04x}".format(i) for i in r) for r in sliced),
resource_len=len(self.data),
namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]),
namespaces_close=" ".join(["}"]*(len(self.identifier)-1)),
identifier=self.identifier[-1],
env_identifier="RES_"+"_".join(self.identifier)+"_PATH"
)
def dump_getter(self):
return resource_getter_template.format(
common_name=self.filename,
identifier="::".join(self.identifier)
)
def dump_cacheid_getter(self):
return resource_cacheid_getter_template.format(
common_name=self.filename,
cacheid=self.cacheid
)
def dump_decl(self):
return resource_decl_template.format(
namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]),
namespaces_close=" ".join(["}"]*(len(self.identifier)-1)),
identifier=self.identifier[-1]
)
master_c_template = """//This file is automatically generated. Do not modify it.
#include <stdlib.h>
#include <fstream>
#include "{include_file}"
static std::string init_resource(const char* name, const unsigned char* content, int len)
{{
char * resPath = getenv(name);
if (NULL == resPath)
return std::string(reinterpret_cast<const char*>(content), len);
std::ifstream ifs(resPath);
if (!ifs.good())
return std::string(reinterpret_cast<const char*>(content), len);
return std::string( (std::istreambuf_iterator<char>(ifs)),
(std::istreambuf_iterator<char>() ));
}}
const std::string& getResource_{basename}(const std::string& name) {{
{RESOURCES_GETTER}
throw ResourceNotFound("Resource not found: " + name);
}}
const char* getResourceCacheId_{basename}(const std::string& name) {{
{RESOURCE_CACHEID_GETTER}
return nullptr;
}}
{RESOURCES}
"""
def gen_c_file(resources, basename):
return master_c_template.format(
RESOURCES="\n\n".join(r.dump_impl() for r in resources),
RESOURCES_GETTER="\n\n".join(r.dump_getter() for r in resources),
RESOURCE_CACHEID_GETTER="\n\n".join(r.dump_cacheid_getter() for r in resources if r.cacheid is not None),
include_file=basename,
basename=to_identifier(basename)
)
master_h_template = """//This file is automaically generated. Do not modify it.
#ifndef KIWIX_{BASENAME}
#define KIWIX_{BASENAME}
#include <string>
#include <stdexcept>
namespace RESOURCE {{
{RESOURCES}
}};
class ResourceNotFound : public std::runtime_error {{
public:
ResourceNotFound(const std::string& what_arg):
std::runtime_error(what_arg)
{{ }};
}};
const std::string& getResource_{basename}(const std::string& name);
const char* getResourceCacheId_{basename}(const std::string& name);
#define getResource(a) (getResource_{basename}(a))
#define getResourceCacheId(a) (getResourceCacheId_{basename}(a))
#endif // KIWIX_{BASENAME}
"""
def gen_h_file(resources, basename):
return master_h_template.format(
RESOURCES="\n ".join(r.dump_decl() for r in resources),
BASENAME=basename.upper(),
basename=basename,
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--cxxfile',
help='The Cpp file name to generate')
parser.add_argument('--hfile',
help='The h file name to generate')
parser.add_argument('--source_dir',
help="Additional directory where to look for resources.",
action='append')
parser.add_argument('resource_file',
help='The list of resources to compile.')
args = parser.parse_args()
base_dir = os.path.dirname(os.path.realpath(args.resource_file))
source_dir = args.source_dir or []
with open(args.resource_file, 'r') as f:
resources = [Resource([base_dir]+source_dir, *line.strip().split())
for line in f.readlines()]
h_identifier = to_identifier(os.path.basename(args.hfile))
with open(args.hfile, 'w') as f:
f.write(gen_h_file(resources, h_identifier))
with open(args.cxxfile, 'w') as f:
f.write(gen_c_file(resources, os.path.basename(args.hfile)))