diff --git a/src/warc2zim/content_rewriting/html.py b/src/warc2zim/content_rewriting/html.py index 2758d28..07add06 100644 --- a/src/warc2zim/content_rewriting/html.py +++ b/src/warc2zim/content_rewriting/html.py @@ -88,9 +88,14 @@ def handle_starttag(self, tag: str, attrs: AttrsList, *, auto_close: bool = Fals self.html_rewrite_context = tag # default value if not overriden later on if tag == "script": script_type = self.extract_attr(attrs, "type") - self.html_rewrite_context = {"json": "json", "module": "js-module"}.get( - script_type or "", "js-classic" - ) + self.html_rewrite_context = { + "application/json": "json", + "json": "json", + "module": "js-module", + "application/javascript": "js-classic", + "text/javascript": "js-classic", + "": "js-classic", + }.get(script_type or "", "unknown") elif tag == "link": link_rel = self.extract_attr(attrs, "rel") if link_rel == "modulepreload": diff --git a/tests/test_html_rewriting.py b/tests/test_html_rewriting.py index 6a46d02..2c4be17 100644 --- a/tests/test_html_rewriting.py +++ b/tests/test_html_rewriting.py @@ -40,6 +40,14 @@ ContentForTests( '' ), + ContentForTests( + '" + ), + ContentForTests( + '' + ), ] ) def no_rewrite_content(request): @@ -126,6 +134,57 @@ def test_escaped_content(escaped_content, no_js_notify): "}" ), ), + ContentForTests( + '', + ( + """" + ), + ), + ContentForTests( + '', + ( + """" + ), + ), ] ) def js_rewrites(request):