From 157f01e9511e35c6100993806db8e1e72b9c6c00 Mon Sep 17 00:00:00 2001
From: Veloman Yunkan <veloman.yunkan@gmail.com>
Date: Wed, 6 Apr 2022 14:54:42 +0400
Subject: [PATCH] Preparing to handle inter-resource dependency

The current implementation of resource preprocessing contains a bug
(with respect to the problem that it tries to solve): it doesn't take
into account the dependence of static resources on each other. If
resource A refers to B and B refers to C, then a change in C would
result in its cache id being updated in the preprocessed version of B.
However the cache id of B won't change since the cache id is derived
from the source rather than from the preprocessed output.

This commit is the first step towards addressing the described issue.

Now cache-id of a resource is computed on demand rather than precomputed
for all resources. The only thing remaining is to compute the cache-id
from the preprocessed content.
---
 scripts/kiwix-resources | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/scripts/kiwix-resources b/scripts/kiwix-resources
index 840f8b080..9639fe59b 100755
--- a/scripts/kiwix-resources
+++ b/scripts/kiwix-resources
@@ -32,16 +32,16 @@ def list_resources(resource_file_path):
     for resource_path in read_resource_file(resource_file_path):
         print(resource_path)
 
-def get_resource_revision(base_dir, resource_path):
-    with open(os.path.join(base_dir, resource_path), 'rb') as f:
+def compute_resource_revision(resource_path):
+    with open(os.path.join(BASE_DIR, resource_path), 'rb') as f:
         return hashlib.sha1(f.read()).hexdigest()[:8]
 
 resource_revisions = {}
 
-def fill_resource_revisions(resource_file_path):
-    base_dir = os.path.dirname(os.path.realpath(resource_file_path))
-    for resource in read_resource_file(resource_file_path):
-        resource_revisions[resource] = get_resource_revision(base_dir, resource)
+def get_resource_revision(res):
+    if not res in resource_revisions:
+        resource_revisions[res] = compute_resource_revision(res)
+    return resource_revisions[res]
 
 RESOURCE_WITH_CACHEID_URL_PATTERN=r'((.*)/skin/([^"?]+))\?KIWIXCACHEID([^"]*)'
 
@@ -49,7 +49,7 @@ def set_cacheid(resource_matchobj):
     path = resource_matchobj.group(1)
     resource = 'skin/' + resource_matchobj.group(3)
     extra_query = resource_matchobj.group(4)
-    cacheid = 'cacheid=' + resource_revisions[resource]
+    cacheid = 'cacheid=' + get_resource_revision(resource)
     return path + '?' + cacheid + extra_query
 
 def preprocess_line(line):
@@ -104,10 +104,9 @@ def copy_file(src_path, dst_path):
             dst.write(src.read())
 
 def preprocess_resources(resource_file_path, outdir):
-    base_dir = os.path.dirname(os.path.realpath(resource_file_path))
     resource_filename = os.path.basename(resource_file_path)
     for resource in read_resource_file(resource_file_path):
-        preprocess_resource(base_dir, resource, outdir)
+        preprocess_resource(BASE_DIR, resource, outdir)
     copy_file(resource_file_path, os.path.join(outdir, resource_filename))
 
 if __name__ == "__main__":
@@ -118,9 +117,9 @@ if __name__ == "__main__":
     parser.add_argument('--outdir')
     parser.add_argument('resource_file')
     args = parser.parse_args()
+    BASE_DIR = os.path.dirname(os.path.realpath(args.resource_file))
 
     if args.list_all:
         list_resources(args.resource_file)
     elif args.preprocess:
-        fill_resource_revisions(args.resource_file)
         preprocess_resources(args.resource_file, args.outdir)