libkiwix/scripts/kiwix-resources

126 lines
4.4 KiB
Python
Executable File

#!/usr/bin/env python3
'''
Copyright 2022 Veloman Yunkan <veloman.yunkan@gmail.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or any
later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
'''
import argparse
import hashlib
import os.path
import re
def read_resource_file(resource_file_path):
with open(resource_file_path, 'r') as f:
return [line.strip() for line in f]
def list_resources(resource_file_path):
for resource_path in read_resource_file(resource_file_path):
print(resource_path)
def compute_resource_revision(resource_path):
with open(os.path.join(BASE_DIR, resource_path), 'rb') as f:
return hashlib.sha1(f.read()).hexdigest()[:8]
resource_revisions = {}
def get_resource_revision(res):
if not res in resource_revisions:
resource_revisions[res] = compute_resource_revision(res)
return resource_revisions[res]
RESOURCE_WITH_CACHEID_URL_PATTERN=r'((.*)/skin/([^"?]+))\?KIWIXCACHEID([^"]*)'
def set_cacheid(resource_matchobj):
path = resource_matchobj.group(1)
resource = 'skin/' + resource_matchobj.group(3)
extra_query = resource_matchobj.group(4)
cacheid = 'cacheid=' + get_resource_revision(resource)
return path + '?' + cacheid + extra_query
def preprocess_line(line):
if 'KIWIXCACHEID' in line:
line = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, line)
assert not 'KIWIXCACHEID' in line
return line
def get_preprocessed_resource(srcpath):
modified_line_count = 0
preprocessed_lines = []
try:
with open(srcpath, 'r') as source:
for line in source:
ppline = preprocess_line(line)
if ppline != line:
modified_line_count += 1
preprocessed_lines.append(ppline)
return "".join(preprocessed_lines), modified_line_count
except UnicodeDecodeError:
# It was a binary resource
return None, 0
def symlink_resource(src, resource_path):
if os.path.exists(resource_path):
if os.path.islink(resource_path) and os.readlink(resource_path) == src:
return
os.remove(resource_path)
os.symlink(src, resource_path)
def preprocess_resource(srcdir, resource_path, outdir):
print('Preprocessing', resource_path, '...')
resource_dir = os.path.dirname(resource_path)
if resource_dir != '':
os.makedirs(os.path.join(outdir, resource_dir), exist_ok=True)
srcpath = os.path.join(srcdir, resource_path)
outpath = os.path.join(outdir, resource_path)
if os.path.exists(outpath):
os.remove(outpath)
preprocessed_content, modified_line_count = get_preprocessed_resource(srcpath)
if modified_line_count == 0:
symlink_resource(srcpath, outpath)
else:
with open(outpath, 'w') as target:
print(preprocessed_content, end='', file=target)
def copy_file(src_path, dst_path):
with open(src_path, 'rb') as src:
with open(dst_path, 'wb') as dst:
dst.write(src.read())
def preprocess_resources(resource_file_path, outdir):
resource_filename = os.path.basename(resource_file_path)
for resource in read_resource_file(resource_file_path):
preprocess_resource(BASE_DIR, resource, outdir)
copy_file(resource_file_path, os.path.join(outdir, resource_filename))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
commands = parser.add_mutually_exclusive_group()
commands.add_argument('--list-all', action='store_true')
commands.add_argument('--preprocess', action='store_true')
parser.add_argument('--outdir')
parser.add_argument('resource_file')
args = parser.parse_args()
BASE_DIR = os.path.dirname(os.path.realpath(args.resource_file))
if args.list_all:
list_resources(args.resource_file)
elif args.preprocess:
preprocess_resources(args.resource_file, args.outdir)