Skip to content

Commit

Permalink
Properly parse the content disposition filename (#46)
Browse files Browse the repository at this point in the history
If the content disposition header filename uses quotes, `pyodide-build`
includes the quotes in the literal filename, which e.g. confuses
`shutil.unpack_archive` in determining the file type.

This PR includes a simple fix to check and strip the quotes using the
`email.message.Message` parser to properly extract the filename (see
https://stackoverflow.com/a/78073510).
  • Loading branch information
juntyr authored Nov 22, 2024
1 parent bfc74a5 commit eb09824
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 9 deletions.
25 changes: 16 additions & 9 deletions pyodide_build/buildpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
import fnmatch
import http.client
import os
import re
import shutil
import subprocess
import sys
import warnings
from collections.abc import Iterator
from datetime import datetime
from email.message import Message
from pathlib import Path
from typing import Any, cast

Expand Down Expand Up @@ -59,6 +59,20 @@ def _make_whlfile(
)


def _extract_tarballname(url: str, headers: dict) -> str:
tarballname = url.split("/")[-1]

if "Content-Disposition" in headers:
msg = Message()
msg["Content-Disposition"] = headers["Content-Disposition"]

filename = msg.get_filename()
if filename is not None:
tarballname = filename

return tarballname


class RecipeBuilder:
"""
A class to build a Pyodide meta.yaml recipe.
Expand Down Expand Up @@ -323,14 +337,7 @@ def _download_and_extract(self) -> None:

self.build_dir.mkdir(parents=True, exist_ok=True)

tarballname = url.split("/")[-1]
if "Content-Disposition" in response.headers:
filenames = re.findall(
"filename=(.+)", response.headers["Content-Disposition"]
)
if filenames:
tarballname = filenames[0]

tarballname = _extract_tarballname(url, response.headers)
tarballpath = self.build_dir / tarballname
tarballpath.write_bytes(response.content)

Expand Down
21 changes: 21 additions & 0 deletions pyodide_build/tests/test_buildpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,24 @@ def test_copy_sharedlib(tmp_path):
deps = ("sharedlib-test.so", "sharedlib-test-dep.so", "sharedlib-test-dep2.so")
for dep in deps:
assert dep in dep_map


def test_extract_tarballname():
url = "https://www.test.com/ball.tar.gz"
headers = [
{},
{"Content-Disposition": "inline"},
{"Content-Disposition": "attachment"},
{"Content-Disposition": 'attachment; filename="ball 2.tar.gz"'},
{"Content-Disposition": "attachment; filename*=UTF-8''ball%203.tar.gz"},
]
tarballnames = [
"ball.tar.gz",
"ball.tar.gz",
"ball.tar.gz",
"ball 2.tar.gz",
"ball 3.tar.gz",
]

for header, tarballname in zip(headers, tarballnames, strict=True):
assert buildpkg._extract_tarballname(url, header) == tarballname

0 comments on commit eb09824

Please sign in to comment.