From acdc1dfb2745b529262eaccfe37b74e1a7f0e31c Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 3 Apr 2022 21:41:37 +0400 Subject: [PATCH 01/12] New unit-test ServerTest.CacheIdsOfStaticResources Introduced a new unit-test which will ensure that static resources of kiwix-serve have the cache ids applied to them in the links embedded into the HTML code. At this point there are no cache ids. The new unit-test will help to visualize how they come into existence. --- test/server.cpp | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/test/server.cpp b/test/server.cpp index 9f4356b5a..d95f15fad 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -288,6 +288,85 @@ TEST_F(ServerTest, UncompressibleContentIsNotCompressed) } } + +// Selects from text only the lines containing the specified (fixed string) +// pattern +std::string fgrep(const std::string& pattern, const std::string& text) +{ + std::istringstream iss(text); + std::string line; + std::string result; + while ( getline(iss, line) ) { + if ( line.find(pattern) != std::string::npos ) { + result += line + "\n"; + } + } + return result; +} + +TEST_F(ServerTest, CacheIdsOfStaticResources) +{ + typedef std::pair UrlAndExpectedResult; + const std::vector testData{ + { + /* url */ "/ROOT/", +R"EXPECTEDRESULT( src="/ROOT/skin/jquery-ui/external/jquery/jquery.js" + src="/ROOT/skin/jquery-ui/jquery-ui.min.js" + href="/ROOT/skin/jquery-ui/jquery-ui.min.css" + href="/ROOT/skin/jquery-ui/jquery-ui.theme.min.css" + href="/ROOT/skin/index.css" + src: url("/ROOT/skin/fonts/Poppins.ttf") format("truetype"); + src: url("/ROOT/skin/fonts/Roboto.ttf") format("truetype"); + + + +)EXPECTEDRESULT" + }, + { + /* url */ "/ROOT/skin/index.js", +R"EXPECTEDRESULT( direct download + download hash + download magnet + download torrent +)EXPECTEDRESULT" + }, + { + /* url */ "/ROOT/zimfile/A/index", +R"EXPECTEDRESULT( + + + + + + +)EXPECTEDRESULT" + }, + { + // Searching in a ZIM file without a full-text index returns + // a page rendered from static/templates/no_search_result_html + /* url */ "/ROOT/search?content=poor&pattern=whatever", +R"EXPECTEDRESULT( + + + + + + + +)EXPECTEDRESULT" + }, + }; + + for ( const auto& urlAndExpectedResult : testData ) { + const std::string url = urlAndExpectedResult.first; + const std::string expectedResult = urlAndExpectedResult.second; + const TestContext ctx{ {"url", url} }; + const auto r = zfs1_->GET(url.c_str()); + EXPECT_EQ(r->body.find("KIWIXCACHEID"), std::string::npos) << ctx; + EXPECT_EQ(fgrep("/skin/", r->body), expectedResult) << ctx; + } +} + const char* urls400[] = { "/ROOT/search", "/ROOT/search?content=zimfile", From fc85215ea0fb1e5f6922491661c32b5545ca077f Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Wed, 6 Apr 2022 19:47:37 +0400 Subject: [PATCH 02/12] Preprocessing of template resources In template resources (found under static/templates), strings of the form "PATH/TO/STATIC/RESOURCE?KIWIXCACHEID" are expanded into "PATH/TO/STATIC/RESOURCE?cacheid=CACHEIDVAL" where CACHEIDVAL is a 8-digit hexadecimal hash digest of the file at static/PATH/TO/STATIC/RESOURCE. --- scripts/kiwix-resources | 116 +++++++++++++++++++++++++++++ scripts/meson.build | 1 + static/meson.build | 19 +++-- static/templates/head_taskbar.html | 12 +-- static/templates/index.html | 20 ++--- static/templates/taskbar_part.html | 2 +- test/server.cpp | 62 +++++++-------- 7 files changed, 179 insertions(+), 53 deletions(-) create mode 100755 scripts/kiwix-resources diff --git a/scripts/kiwix-resources b/scripts/kiwix-resources new file mode 100755 index 000000000..72e029a20 --- /dev/null +++ b/scripts/kiwix-resources @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 + +''' +Copyright 2022 Veloman Yunkan + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. +''' + +import argparse +import hashlib +import os.path +import re + +def read_resource_file(resource_file_path): + with open(resource_file_path, 'r') as f: + return [line.strip() for line in f] + +def list_resources(resource_file_path): + for resource_path in read_resource_file(resource_file_path): + print(resource_path) + +def get_resource_revision(base_dir, resource_path): + with open(os.path.join(base_dir, resource_path), 'rb') as f: + return hashlib.sha1(f.read()).hexdigest()[:8] + +resource_revisions = {} + +def fill_resource_revisions(resource_file_path): + base_dir = os.path.dirname(os.path.realpath(resource_file_path)) + for resource in read_resource_file(resource_file_path): + resource_revisions[resource] = get_resource_revision(base_dir, resource) + +RESOURCE_WITH_CACHEID_URL_PATTERN=r'"([^"?]+)\?KIWIXCACHEID([^"]*)"' + +def set_cacheid(resource_matchobj): + path = resource_matchobj.group(1) + resource = path + root_prefix = '{{root}}/' + if resource.startswith(root_prefix): + resource = resource[len(root_prefix):] + extra_query = resource_matchobj.group(2) + cacheid = 'cacheid=' + resource_revisions[resource] + return f'"{path}?{cacheid}{extra_query}"' + +def preprocess_line(line): + if 'KIWIXCACHEID' in line: + line = re.sub(RESOURCE_WITH_CACHEID_URL_PATTERN, set_cacheid, line) + assert not 'KIWIXCACHEID' in line + return line + +def preprocess_template(srcpath, dstpath): + preprocessed_lines = [] + with open(srcpath, 'r') as source: + for line in source: + preprocessed_lines.append(preprocess_line(line)) + + with open(dstpath, 'w') as target: + print("".join(preprocessed_lines), end='', file=target) + +def symlink_resource(src, resource_path): + if os.path.exists(resource_path): + if os.path.islink(resource_path) and os.readlink(resource_path) == src: + return + os.remove(resource_path) + os.symlink(src, resource_path) + +def preprocess_resource(srcdir, resource_path, outdir): + resource_dir = os.path.dirname(resource_path) + if resource_dir != '': + os.makedirs(os.path.join(outdir, resource_dir), exist_ok=True) + srcpath = os.path.join(srcdir, resource_path) + outpath = os.path.join(outdir, resource_path) + if resource_path.startswith('templates/'): + preprocess_template(srcpath, outpath) + else: + symlink_resource(srcpath, outpath) + +def copy_file(src_path, dst_path): + with open(src_path, 'rb') as src: + with open(dst_path, 'wb') as dst: + dst.write(src.read()) + +def preprocess_resources(resource_file_path, outdir): + base_dir = os.path.dirname(os.path.realpath(resource_file_path)) + resource_filename = os.path.basename(resource_file_path) + for resource in read_resource_file(resource_file_path): + preprocess_resource(base_dir, resource, outdir) + copy_file(resource_file_path, os.path.join(outdir, resource_filename)) + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + commands = parser.add_mutually_exclusive_group() + commands.add_argument('--list-all', action='store_true') + commands.add_argument('--preprocess', action='store_true') + parser.add_argument('--outdir') + parser.add_argument('resource_file') + args = parser.parse_args() + + if args.list_all: + list_resources(args.resource_file) + elif args.preprocess: + fill_resource_revisions(args.resource_file) + preprocess_resources(args.resource_file, args.outdir) diff --git a/scripts/meson.build b/scripts/meson.build index c4ddec873..889a53248 100644 --- a/scripts/meson.build +++ b/scripts/meson.build @@ -1,4 +1,5 @@ +res_manager = find_program('kiwix-resources') res_compiler = find_program('kiwix-compile-resources') install_data(res_compiler.path(), install_dir:get_option('bindir')) diff --git a/static/meson.build b/static/meson.build index 6a8ce1773..965f2c11b 100644 --- a/static/meson.build +++ b/static/meson.build @@ -1,18 +1,27 @@ -resource_files = run_command(find_program('python3'), - '-c', - 'import sys; f=open(sys.argv[1]); print(f.read())', +resource_files = run_command(res_manager, + '--list-all', files('resources_list.txt') ).stdout().strip().split('\n') -lib_resources = custom_target('resources', +preprocessed_resources = custom_target('preprocessed_resource_files', input: 'resources_list.txt', + output: ['resources_list.txt'], + command:[res_manager, + '--preprocess', + '--outdir', '@OUTDIR@', + '@INPUT@'], + depend_files: resource_files +) + +lib_resources = custom_target('resources', + input: preprocessed_resources, output: ['kiwixlib-resources.cpp', 'kiwixlib-resources.h'], command:[res_compiler, '--cxxfile', '@OUTPUT0@', '--hfile', '@OUTPUT1@', '--source_dir', '@OUTDIR@', '@INPUT@'], - depend_files: resource_files + depends: preprocessed_resources ) i18n_resource_files = run_command(find_program('python3'), diff --git a/static/templates/head_taskbar.html b/static/templates/head_taskbar.html index 1ed843638..e595ac138 100644 --- a/static/templates/head_taskbar.html +++ b/static/templates/head_taskbar.html @@ -1,6 +1,6 @@ - - - - - - + + + + + + diff --git a/static/templates/index.html b/static/templates/index.html index b28f58085..39d5afc9e 100644 --- a/static/templates/index.html +++ b/static/templates/index.html @@ -6,41 +6,41 @@ Welcome to Kiwix Server - - - + + +
diff --git a/static/templates/taskbar_part.html b/static/templates/taskbar_part.html index 42c76ed4b..c056d48d0 100644 --- a/static/templates/taskbar_part.html +++ b/static/templates/taskbar_part.html @@ -9,7 +9,7 @@
- +
{{#withlibrarybutton}} diff --git a/test/server.cpp b/test/server.cpp index d95f15fad..422cc2620 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -310,16 +310,16 @@ TEST_F(ServerTest, CacheIdsOfStaticResources) const std::vector testData{ { /* url */ "/ROOT/", -R"EXPECTEDRESULT( src="/ROOT/skin/jquery-ui/external/jquery/jquery.js" - src="/ROOT/skin/jquery-ui/jquery-ui.min.js" - href="/ROOT/skin/jquery-ui/jquery-ui.min.css" - href="/ROOT/skin/jquery-ui/jquery-ui.theme.min.css" - href="/ROOT/skin/index.css" - src: url("/ROOT/skin/fonts/Poppins.ttf") format("truetype"); - src: url("/ROOT/skin/fonts/Roboto.ttf") format("truetype"); - - - +R"EXPECTEDRESULT( src="/ROOT/skin/jquery-ui/external/jquery/jquery.js?cacheid=1d85f0f3" + src="/ROOT/skin/jquery-ui/jquery-ui.min.js?cacheid=d927c2ff" + href="/ROOT/skin/jquery-ui/jquery-ui.min.css?cacheid=e1de77b3" + href="/ROOT/skin/jquery-ui/jquery-ui.theme.min.css?cacheid=2a5841f9" + href="/ROOT/skin/index.css?cacheid=1aca980a" + src: url("/ROOT/skin/fonts/Poppins.ttf?cacheid=af705837") format("truetype"); + src: url("/ROOT/skin/fonts/Roboto.ttf?cacheid=84d10248") format("truetype"); + + + )EXPECTEDRESULT" }, { @@ -332,13 +332,13 @@ R"EXPECTEDRESULT( - - - - - - +R"EXPECTEDRESULT( + + + + + + )EXPECTEDRESULT" }, { @@ -346,13 +346,13 @@ R"EXPECTEDRESULT( - - - - - - - + + + + + + + )EXPECTEDRESULT" }, }; @@ -512,12 +512,12 @@ std::string TestContentIn404HtmlResponse::expectedResponse() const )FRAG", R"FRAG( - - - - - - + + + + + + @@ -533,7 +533,7 @@ std::string TestContentIn404HtmlResponse::expectedResponse() const R"FRAG(
- +
Date: Thu, 14 Apr 2022 22:37:11 +0400 Subject: [PATCH 03/12] Applied cache-id to search_results.css The story of search_results.css static/skin/search_results.css was extracted from static/templates/no_search_result.html before the latter was dropped. static/templates/no_search_result.html in turn seems to be a copied and edited version of static/templates/search_result.html. In the context of exploratory work on the internationalization of kiwix-serve (PR #679) I noticed duplication of inline CSS across those two templates and intended to eliminated it. That goal was not fully accomplished (static/templates/search_result.html remained untouched) because by that time PR #679 grew too big and the efforts were diverted into splitting it into smaller ones. Thus search_results.css slipped into one of those small PRs, without making much sense because nothing really justifies preserving custom CSS in the "Fulltext search unavailable" error page. At the same time, it served as the only case where a link to a cacheable resource is generated in C++ code (rather than found in a template). This poses certain problems to the handling of cache-ids. A workaround is to expel the URL into a template so that it is processed by `kiwix-resources`. This commit merely demonstrates that solution. But whether it should be preserved (or rather the "Fulltext search unavailable" page should be deprived of CSS) is questionable. --- scripts/kiwix-resources | 4 ++-- src/server/internalServer.cpp | 13 ++++++++++++- static/resources_list.txt | 1 + static/templates/url_of_search_results_css | 1 + test/server.cpp | 4 ++-- 5 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 static/templates/url_of_search_results_css diff --git a/scripts/kiwix-resources b/scripts/kiwix-resources index 72e029a20..9c607dede 100755 --- a/scripts/kiwix-resources +++ b/scripts/kiwix-resources @@ -43,7 +43,7 @@ def fill_resource_revisions(resource_file_path): for resource in read_resource_file(resource_file_path): resource_revisions[resource] = get_resource_revision(base_dir, resource) -RESOURCE_WITH_CACHEID_URL_PATTERN=r'"([^"?]+)\?KIWIXCACHEID([^"]*)"' +RESOURCE_WITH_CACHEID_URL_PATTERN=r'([^"?]+)\?KIWIXCACHEID([^"]*)' def set_cacheid(resource_matchobj): path = resource_matchobj.group(1) @@ -53,7 +53,7 @@ def set_cacheid(resource_matchobj): resource = resource[len(root_prefix):] extra_query = resource_matchobj.group(2) cacheid = 'cacheid=' + resource_revisions[resource] - return f'"{path}?{cacheid}{extra_query}"' + return f'{path}?{cacheid}{extra_query}' def preprocess_line(line): if 'KIWIXCACHEID' in line: diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 748765615..95f999f99 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -442,6 +442,16 @@ SuggestionsList_t getSuggestions(SuggestionSearcherCache& cache, const zim::Arch namespace { +std::string renderUrl(const std::string& root, const std::string& urlTemplate) +{ + MustacheData data; + data.set("root", root); + auto url = kainjow::mustache::mustache(urlTemplate).render(data); + if ( url.back() == '\n' ) + url.pop_back(); + return url; +} + std::string makeFulltextSearchSuggestion(const std::string& lang, const std::string& queryString) { return i18n::expandParameterizedString(lang, "suggest-full-text-search", @@ -622,10 +632,11 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } catch(std::runtime_error& e) { // Searcher->search will throw a runtime error if there is no valid xapian database to do the search. // (in case of zim file not containing a index) + const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); return HTTPErrorHtmlResponse(*this, request, MHD_HTTP_NOT_FOUND, "fulltext-search-unavailable", "404-page-heading", - m_root + "/skin/search_results.css") + cssUrl) + nonParameterizedMessage("no-search-results") + TaskbarInfo(searchInfo.bookName, archive.get()); } diff --git a/static/resources_list.txt b/static/resources_list.txt index 15275ad19..bc1eb1af3 100644 --- a/static/resources_list.txt +++ b/static/resources_list.txt @@ -47,5 +47,6 @@ templates/catalog_v2_entries.xml templates/catalog_v2_entry.xml templates/catalog_v2_categories.xml templates/catalog_v2_languages.xml +templates/url_of_search_results_css opensearchdescription.xml catalog_v2_searchdescription.xml diff --git a/static/templates/url_of_search_results_css b/static/templates/url_of_search_results_css new file mode 100644 index 000000000..f0ecb87d8 --- /dev/null +++ b/static/templates/url_of_search_results_css @@ -0,0 +1 @@ +{{root}}/skin/search_results.css?KIWIXCACHEID diff --git a/test/server.cpp b/test/server.cpp index 422cc2620..7a6ddf307 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -345,7 +345,7 @@ R"EXPECTEDRESULT( +R"EXPECTEDRESULT( @@ -847,7 +847,7 @@ TEST_F(ServerTest, 404WithBodyTesting) { /* url */ "/ROOT/search?content=poor&pattern=whatever", expected_page_title=="Fulltext search unavailable" && - expected_css_url=="/ROOT/skin/search_results.css" && + expected_css_url=="/ROOT/skin/search_results.css?cacheid=76d39c84" && book_name=="poor" && book_title=="poor" && expected_body==R"( From 150851b33d6da47cc9294607ec0a193b5b86826e Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 27 Feb 2022 20:14:19 +0400 Subject: [PATCH 04/12] kiwix-resources preprocesses all resources kiwix-resources preprocesses all resources rather than only templates. At this point this doesn't change anything since only (some) template resources contain KIWIXCACHEID placeholders. But this enhancement opens the door to the preprocessing of static/skin/index.js (after preprocessing is able to handle relative links, which comes in the next commit). --- scripts/kiwix-resources | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/scripts/kiwix-resources b/scripts/kiwix-resources index 9c607dede..af30fa577 100755 --- a/scripts/kiwix-resources +++ b/scripts/kiwix-resources @@ -61,14 +61,21 @@ def preprocess_line(line): assert not 'KIWIXCACHEID' in line return line -def preprocess_template(srcpath, dstpath): +def get_preprocessed_resource(srcpath): + modified_line_count = 0 preprocessed_lines = [] - with open(srcpath, 'r') as source: - for line in source: - preprocessed_lines.append(preprocess_line(line)) + try: + with open(srcpath, 'r') as source: + for line in source: + ppline = preprocess_line(line) + if ppline != line: + modified_line_count += 1 + preprocessed_lines.append(ppline) + return "".join(preprocessed_lines), modified_line_count + except UnicodeDecodeError: + # It was a binary resource + return None, 0 - with open(dstpath, 'w') as target: - print("".join(preprocessed_lines), end='', file=target) def symlink_resource(src, resource_path): if os.path.exists(resource_path): @@ -78,15 +85,19 @@ def symlink_resource(src, resource_path): os.symlink(src, resource_path) def preprocess_resource(srcdir, resource_path, outdir): + print(f'Preprocessing {resource_path}...') resource_dir = os.path.dirname(resource_path) if resource_dir != '': os.makedirs(os.path.join(outdir, resource_dir), exist_ok=True) srcpath = os.path.join(srcdir, resource_path) outpath = os.path.join(outdir, resource_path) - if resource_path.startswith('templates/'): - preprocess_template(srcpath, outpath) - else: + preprocessed_content, modified_line_count = get_preprocessed_resource(srcpath) + if modified_line_count == 0: symlink_resource(srcpath, outpath) + else: + with open(outpath, 'w') as target: + print(preprocessed_content, end='', file=target) + def copy_file(src_path, dst_path): with open(src_path, 'rb') as src: From c016dfd2ce201b834c1097248e390f993aa4bf30 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Sun, 27 Feb 2022 20:32:57 +0400 Subject: [PATCH 05/12] Resource preprocessing handles relative links ... but only if they contain "/skin/" as a substring. --- scripts/kiwix-resources | 9 +++------ static/skin/index.js | 8 ++++---- test/server.cpp | 10 +++++----- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/scripts/kiwix-resources b/scripts/kiwix-resources index af30fa577..08295c384 100755 --- a/scripts/kiwix-resources +++ b/scripts/kiwix-resources @@ -43,15 +43,12 @@ def fill_resource_revisions(resource_file_path): for resource in read_resource_file(resource_file_path): resource_revisions[resource] = get_resource_revision(base_dir, resource) -RESOURCE_WITH_CACHEID_URL_PATTERN=r'([^"?]+)\?KIWIXCACHEID([^"]*)' +RESOURCE_WITH_CACHEID_URL_PATTERN=r'((.*)/skin/([^"?]+))\?KIWIXCACHEID([^"]*)' def set_cacheid(resource_matchobj): path = resource_matchobj.group(1) - resource = path - root_prefix = '{{root}}/' - if resource.startswith(root_prefix): - resource = resource[len(root_prefix):] - extra_query = resource_matchobj.group(2) + resource = 'skin/' + resource_matchobj.group(3) + extra_query = resource_matchobj.group(4) cacheid = 'cacheid=' + resource_revisions[resource] return f'{path}?{cacheid}{extra_query}' diff --git a/static/skin/index.js b/static/skin/index.js index 267da4098..5c435ac6b 100644 --- a/static/skin/index.js +++ b/static/skin/index.js @@ -171,25 +171,25 @@