From 091786c7d85ef9aaafcded8a65564fb4afd745c3 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 27 Apr 2022 13:38:13 +0400 Subject: [PATCH] A slight simplification of resource preprocessing Now the whole content of a resource is preprocessed with a single invocation of `re.sub()` rather than line-by-line. Also, the function `get_preprocessed_resource()` returns a single value rather than a (preprocessed_content, modification_count) pair; the situation when the preprocessed resource is identical to the source version is signalled by a return value of None. --- scripts/kiwix-resources | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/scripts/kiwix-resources b/scripts/kiwix-resources index 0d7c1f85d..e7ff2a275 100755 --- a/scripts/kiwix-resources +++ b/scripts/kiwix-resources @@ -53,26 +53,26 @@ def set_cacheid(resource_matchobj): cacheid = 'cacheid=' + get_resource_revision(resource) return path + '?' + cacheid + extra_query -def preprocess_line(line): - if 'KIWIXCACHEID' in line: - line = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, line) - assert not 'KIWIXCACHEID' in line - return line +def preprocess_text(s): + if 'KIWIXCACHEID' in s: + s = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, s) + assert not 'KIWIXCACHEID' in s + return s def get_preprocessed_resource(srcpath): - modified_line_count = 0 - preprocessed_lines = [] + """Get the transformed content of a resource + + If the resource at srcpath is modified by preprocessing then this function + returns the transformed content of the resource. Otherwise it returns None. + """ try: - with open(srcpath, 'r') as source: - for line in source: - ppline = preprocess_line(line) - if ppline != line: - modified_line_count += 1 - preprocessed_lines.append(ppline) - return "".join(preprocessed_lines), modified_line_count + with open(srcpath, 'r') as resource_file: + content = resource_file.read() + preprocessed_content = preprocess_text(content) + return preprocessed_content if preprocessed_content != content else None except UnicodeDecodeError: # It was a binary resource - return None, 0 + return None def symlink_resource(src, resource_path): @@ -91,8 +91,8 @@ def preprocess_resource(resource_path): outpath = os.path.join(OUT_DIR, resource_path) if os.path.exists(outpath): os.remove(outpath) - preprocessed_content, modified_line_count = get_preprocessed_resource(srcpath) - if modified_line_count == 0: + preprocessed_content = get_preprocessed_resource(srcpath) + if preprocessed_content is None: symlink_resource(srcpath, outpath) else: with open(outpath, 'w') as target: