Skip to content

Commit

Permalink
Add/fix support for various script types in HTML documents
Browse files Browse the repository at this point in the history
- Add support for application/javascript and text/javascript
- Fix support for application/json
- Add support for all unknown script types
  • Loading branch information
benoit74 committed May 31, 2024
1 parent 58e94db commit 246b5dd
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 3 deletions.
11 changes: 8 additions & 3 deletions src/warc2zim/content_rewriting/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,14 @@ def handle_starttag(self, tag: str, attrs: AttrsList, *, auto_close: bool = Fals
self.html_rewrite_context = tag # default value if not overriden later on
if tag == "script":
script_type = self.extract_attr(attrs, "type")
self.html_rewrite_context = {"json": "json", "module": "js-module"}.get(
script_type or "", "js-classic"
)
self.html_rewrite_context = {
"application/json": "json",
"json": "json",
"module": "js-module",
"application/javascript": "js-classic",
"text/javascript": "js-classic",
"": "js-classic",
}.get(script_type or "", "unknown")
elif tag == "link":
link_rel = self.extract_attr(attrs, "rel")
if link_rel == "modulepreload":
Expand Down
59 changes: 59 additions & 0 deletions tests/test_html_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@
ContentForTests(
'<script type="json">{"window": "https://kiwix.org/path"}</script>'
),
ContentForTests(
'<script type="application/json">{"window": "https://kiwix.org/path"}'
"</script>"
),
ContentForTests(
'<script type="application/i_dont_know_you">'
'{"window": "https://kiwix.org/path"}</script>'
),
]
)
def no_rewrite_content(request):
Expand Down Expand Up @@ -126,6 +134,57 @@ def test_escaped_content(escaped_content, no_js_notify):
"}</script>"
),
),
ContentForTests(
'<script type="application/javascript">document.title="HELLO";</script>',
(
"""<script type="application/javascript">"""
"var _____WB$wombat$assign$function_____ = function(name) "
""
"{return (self._wb_wombat && self._wb_wombat.local_init && "
"self._wb_wombat.local_init(name)) || self[name]; };\n"
"if (!self.__WB_pmw) { self.__WB_pmw = function(obj) "
"{ this.__WB_source = obj; return this; } }\n"
"{\n"
"""let window = _____WB$wombat$assign$function_____("window");\n"""
"let globalThis = _____WB$wombat$assign$function_____"
"""("globalThis");\n"""
"""let self = _____WB$wombat$assign$function_____("self");\n"""
"""let document = _____WB$wombat$assign$function_____("document");\n"""
"""let location = _____WB$wombat$assign$function_____("location");\n"""
"""let top = _____WB$wombat$assign$function_____("top");\n"""
"""let parent = _____WB$wombat$assign$function_____("parent");\n"""
"""let frames = _____WB$wombat$assign$function_____("frames");\n"""
"""let opener = _____WB$wombat$assign$function_____("opener");\n"""
"let arguments;\n\n"
"""document.title="HELLO";\n"""
"}</script>"
),
),
ContentForTests(
'<script type="text/javascript">document.title="HELLO";</script>',
(
"""<script type="text/javascript">"""
"var _____WB$wombat$assign$function_____ = function(name) "
"{return (self._wb_wombat && self._wb_wombat.local_init && "
"self._wb_wombat.local_init(name)) || self[name]; };\n"
"if (!self.__WB_pmw) { self.__WB_pmw = function(obj) "
"{ this.__WB_source = obj; return this; } }\n"
"{\n"
"""let window = _____WB$wombat$assign$function_____("window");\n"""
"let globalThis = _____WB$wombat$assign$function_____"
"""("globalThis");\n"""
"""let self = _____WB$wombat$assign$function_____("self");\n"""
"""let document = _____WB$wombat$assign$function_____("document");\n"""
"""let location = _____WB$wombat$assign$function_____("location");\n"""
"""let top = _____WB$wombat$assign$function_____("top");\n"""
"""let parent = _____WB$wombat$assign$function_____("parent");\n"""
"""let frames = _____WB$wombat$assign$function_____("frames");\n"""
"""let opener = _____WB$wombat$assign$function_____("opener");\n"""
"let arguments;\n\n"
"""document.title="HELLO";\n"""
"}</script>"
),
),
]
)
def js_rewrites(request):
Expand Down

0 comments on commit 246b5dd

Please sign in to comment.