Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into enh/docker_alpine
Browse files Browse the repository at this point in the history
  • Loading branch information
bpinsard committed Oct 30, 2024
2 parents 0807931 + 2eb5291 commit bb7c286
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 41 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ jobs:
- '3.9'
- '3.10'
- '3.11'
- '3.12'
# Seems needs work in traits: https://github.com/nipy/heudiconv/pull/799#issuecomment-2447298795
# - '3.13'
steps:
- name: Check out repository
uses: actions/checkout@v4
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
# v1.3.1 (Fri Oct 25 2024)

#### 🐛 Bug Fix

- Fix assignment of sensitive git-annex metadata data via glob patterns (regression introduced by #739) [#793](https://github.com/nipy/heudiconv/pull/793) ([@bpinsard](https://github.com/bpinsard))

#### Authors: 1

- Basile ([@bpinsard](https://github.com/bpinsard))

---

# v1.3.0 (Wed Oct 02 2024)

#### 🚀 Enhancement
Expand Down
189 changes: 157 additions & 32 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,34 +1,159 @@
FROM python:3.10-alpine AS builder

RUN apk add bash \
gcc \
g++ \
libc-dev \
make \
cmake \
util-linux-dev \
curl \
git
RUN pip install --no-cache-dir pylibjpeg-libjpeg traits==6.3.2

ARG DCM2NIIX_VERSION=v1.0.20240202
RUN git clone https://github.com/rordenlab/dcm2niix /tmp/dcm2niix \
&& cd /tmp/dcm2niix \
&& git fetch --tags \
&& git checkout $DCM2NIIX_VERSION \
&& mkdir /tmp/dcm2niix/build \
&& cd /tmp/dcm2niix/build \
&& cmake -DZLIB_IMPLEMENTATION=Cloudflare -DUSE_JPEGLS=ON -DUSE_OPENJPEG=ON -DCMAKE_INSTALL_PREFIX:PATH=/usr/ .. \
&& make -j1 \
&& make install \
&& rm -rf /tmp/dcm2niix

FROM python:3.10-alpine
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY --from=builder /usr/bin/dcm2niix /usr/bin/dcm2niix

RUN apk update && apk add --no-cache git git-annex pigz gcompat

RUN pip install --no-cache-dir heudiconv
# Generated by Neurodocker and Reproenv.

FROM neurodebian:bookworm
ENV PATH="/opt/dcm2niix-v1.0.20240202/bin:$PATH"
RUN apt-get update -qq \
&& apt-get install -y -q --no-install-recommends \
ca-certificates \
cmake \
g++ \
gcc \
git \
make \
pigz \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/* \
&& git clone https://github.com/rordenlab/dcm2niix /tmp/dcm2niix \
&& cd /tmp/dcm2niix \
&& git fetch --tags \
&& git checkout v1.0.20240202 \
&& mkdir /tmp/dcm2niix/build \
&& cd /tmp/dcm2niix/build \
&& cmake -DZLIB_IMPLEMENTATION=Cloudflare -DUSE_JPEGLS=ON -DUSE_OPENJPEG=ON -DCMAKE_INSTALL_PREFIX:PATH=/opt/dcm2niix-v1.0.20240202 .. \
&& make -j1 \
&& make install \
&& rm -rf /tmp/dcm2niix
RUN apt-get update -qq \
&& apt-get install -y -q --no-install-recommends \
gcc \
git \
git-annex-standalone \
libc-dev \
liblzma-dev \
netbase \
pigz \
&& rm -rf /var/lib/apt/lists/*
COPY [".", \
"/src/heudiconv"]
ENV CONDA_DIR="/opt/miniconda-py39_4.12.0" \
PATH="/opt/miniconda-py39_4.12.0/bin:$PATH"
RUN apt-get update -qq \
&& apt-get install -y -q --no-install-recommends \
bzip2 \
ca-certificates \
curl \
&& rm -rf /var/lib/apt/lists/* \
# Install dependencies.
&& export PATH="/opt/miniconda-py39_4.12.0/bin:$PATH" \
&& echo "Downloading Miniconda installer ..." \
&& conda_installer="/tmp/miniconda.sh" \
&& curl -fsSL -o "$conda_installer" https://repo.continuum.io/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh \
&& bash "$conda_installer" -b -p /opt/miniconda-py39_4.12.0 \
&& rm -f "$conda_installer" \
# Prefer packages in conda-forge
&& conda config --system --prepend channels conda-forge \
# Packages in lower-priority channels not considered if a package with the same
# name exists in a higher priority channel. Can dramatically speed up installations.
# Conda recommends this as a default
# https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-channels.html
&& conda config --set channel_priority strict \
&& conda config --system --set auto_update_conda false \
&& conda config --system --set show_channel_urls true \
# Enable `conda activate`
&& conda init bash \
&& conda install -y --name base \
"python=3.9" \
"traits>=4.6.0" \
"scipy" \
"numpy" \
"nomkl" \
"pandas" \
"gdcm" \
&& bash -c "source activate base \
&& python -m pip install --no-cache-dir --editable \
"/src/heudiconv[all]"" \
# Clean up
&& sync && conda clean --all --yes && sync \
&& rm -rf ~/.cache/pip/*
ENTRYPOINT ["heudiconv"]

# Save specification to JSON.
RUN printf '{ \
"pkg_manager": "apt", \
"existing_users": [ \
"root" \
], \
"instructions": [ \
{ \
"name": "from_", \
"kwds": { \
"base_image": "neurodebian:bookworm" \
} \
}, \
{ \
"name": "env", \
"kwds": { \
"PATH": "/opt/dcm2niix-v1.0.20240202/bin:$PATH" \
} \
}, \
{ \
"name": "run", \
"kwds": { \
"command": "apt-get update -qq\\napt-get install -y -q --no-install-recommends \\\\\\n ca-certificates \\\\\\n cmake \\\\\\n g++ \\\\\\n gcc \\\\\\n git \\\\\\n make \\\\\\n pigz \\\\\\n zlib1g-dev\\nrm -rf /var/lib/apt/lists/*\\ngit clone https://github.com/rordenlab/dcm2niix /tmp/dcm2niix\\ncd /tmp/dcm2niix\\ngit fetch --tags\\ngit checkout v1.0.20240202\\nmkdir /tmp/dcm2niix/build\\ncd /tmp/dcm2niix/build\\ncmake -DZLIB_IMPLEMENTATION=Cloudflare -DUSE_JPEGLS=ON -DUSE_OPENJPEG=ON -DCMAKE_INSTALL_PREFIX:PATH=/opt/dcm2niix-v1.0.20240202 ..\\nmake -j1\\nmake install\\nrm -rf /tmp/dcm2niix" \
} \
}, \
{ \
"name": "install", \
"kwds": { \
"pkgs": [ \
"git", \
"gcc", \
"pigz", \
"liblzma-dev", \
"libc-dev", \
"git-annex-standalone", \
"netbase" \
], \
"opts": null \
} \
}, \
{ \
"name": "run", \
"kwds": { \
"command": "apt-get update -qq \\\\\\n && apt-get install -y -q --no-install-recommends \\\\\\n gcc \\\\\\n git \\\\\\n git-annex-standalone \\\\\\n libc-dev \\\\\\n liblzma-dev \\\\\\n netbase \\\\\\n pigz \\\\\\n && rm -rf /var/lib/apt/lists/*" \
} \
}, \
{ \
"name": "copy", \
"kwds": { \
"source": [ \
".", \
"/src/heudiconv" \
], \
"destination": "/src/heudiconv" \
} \
}, \
{ \
"name": "env", \
"kwds": { \
"CONDA_DIR": "/opt/miniconda-py39_4.12.0", \
"PATH": "/opt/miniconda-py39_4.12.0/bin:$PATH" \
} \
}, \
{ \
"name": "run", \
"kwds": { \
"command": "apt-get update -qq\\napt-get install -y -q --no-install-recommends \\\\\\n bzip2 \\\\\\n ca-certificates \\\\\\n curl\\nrm -rf /var/lib/apt/lists/*\\n# Install dependencies.\\nexport PATH=\\"/opt/miniconda-py39_4.12.0/bin:$PATH\\"\\necho \\"Downloading Miniconda installer ...\\"\\nconda_installer=\\"/tmp/miniconda.sh\\"\\ncurl -fsSL -o \\"$conda_installer\\" https://repo.continuum.io/miniconda/Miniconda3-py39_4.12.0-Linux-x86_64.sh\\nbash \\"$conda_installer\\" -b -p /opt/miniconda-py39_4.12.0\\nrm -f \\"$conda_installer\\"\\n# Prefer packages in conda-forge\\nconda config --system --prepend channels conda-forge\\n# Packages in lower-priority channels not considered if a package with the same\\n# name exists in a higher priority channel. Can dramatically speed up installations.\\n# Conda recommends this as a default\\n# https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-channels.html\\nconda config --set channel_priority strict\\nconda config --system --set auto_update_conda false\\nconda config --system --set show_channel_urls true\\n# Enable `conda activate`\\nconda init bash\\nconda install -y --name base \\\\\\n \\"python=3.9\\" \\\\\\n \\"traits>=4.6.0\\" \\\\\\n \\"scipy\\" \\\\\\n \\"numpy\\" \\\\\\n \\"nomkl\\" \\\\\\n \\"pandas\\" \\\\\\n \\"gdcm\\"\\nbash -c \\"source activate base\\n python -m pip install --no-cache-dir --editable \\\\\\n \\"/src/heudiconv[all]\\"\\"\\n# Clean up\\nsync && conda clean --all --yes && sync\\nrm -rf ~/.cache/pip/*" \
} \
}, \
{ \
"name": "entrypoint", \
"kwds": { \
"args": [ \
"heudiconv" \
] \
} \
} \
] \
}' > /.reproenv.json
# End saving to specification to JSON.
34 changes: 34 additions & 0 deletions Dockerfile.alpine
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM python:3.10-alpine AS builder

RUN apk add bash \
gcc \
g++ \
libc-dev \
make \
cmake \
util-linux-dev \
curl \
git
RUN pip install --no-cache-dir pylibjpeg-libjpeg traits==6.3.2

ARG DCM2NIIX_VERSION=v1.0.20240202
RUN git clone https://github.com/rordenlab/dcm2niix /tmp/dcm2niix \
&& cd /tmp/dcm2niix \
&& git fetch --tags \
&& git checkout $DCM2NIIX_VERSION \
&& mkdir /tmp/dcm2niix/build \
&& cd /tmp/dcm2niix/build \
&& cmake -DZLIB_IMPLEMENTATION=Cloudflare -DUSE_JPEGLS=ON -DUSE_OPENJPEG=ON -DCMAKE_INSTALL_PREFIX:PATH=/usr/ .. \
&& make -j1 \
&& make install \
&& rm -rf /tmp/dcm2niix

FROM python:3.10-alpine
COPY --from=builder /usr/local/lib/python3.10/site-packages /usr/local/lib/python3.10/site-packages
COPY --from=builder /usr/bin/dcm2niix /usr/bin/dcm2niix

RUN apk update && apk add --no-cache git git-annex pigz gcompat

RUN pip install --no-cache-dir heudiconv

ENTRYPOINT ["heudiconv"]
4 changes: 4 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
:target: https://repology.org/project/python:heudiconv/versions
:alt: PyPI

.. image:: https://img.shields.io/badge/RRID-SCR__017427-blue
:target: https://identifiers.org/RRID:SCR_017427
:alt: RRID

About
-----

Expand Down
6 changes: 3 additions & 3 deletions heudiconv/external/dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,12 @@ def add_to_datalad(

# Provide metadata for sensitive information
sensitive_patterns = [
"sourcedata",
"sourcedata/**",
"*_scans.tsv", # top level
"*/*_scans.tsv", # within subj
"*/*/*_scans.tsv", # within sess/subj
"*/anat", # within subj
"*/*/anat", # within ses/subj
"*/anat/*", # within subj
"*/*/anat/*", # within ses/subj
]
for sp in sensitive_patterns:
mark_sensitive(ds, sp, annexed_files)
Expand Down
25 changes: 22 additions & 3 deletions heudiconv/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os.path as op
import re
import shutil
import sys
from types import ModuleType
from typing import Optional

Expand All @@ -22,7 +23,18 @@

_VCS_REGEX = r"%s\.(?:git|gitattributes|svn|bzr|hg)(?:%s|$)" % (op.sep, op.sep)

_UNPACK_FORMATS = tuple(sum((x[1] for x in shutil.get_unpack_formats()), []))

def _get_unpack_formats() -> dict[str, bool]:
"""For each extension return if it is a tar"""
out = {}
for _, exts, d in shutil.get_unpack_formats():
for e in exts:
out[e] = bool(re.search(r"\btar\b", d.lower()))
return out


_UNPACK_FORMATS = _get_unpack_formats()
_TAR_UNPACK_FORMATS = tuple(k for k, is_tar in _UNPACK_FORMATS.items() if is_tar)


@docstring_parameter(_VCS_REGEX)
Expand Down Expand Up @@ -114,7 +126,7 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str

# needs sorting to keep the generated "session" label deterministic
for _, t in enumerate(sorted(fl)):
if not t.endswith(_UNPACK_FORMATS):
if not t.endswith(tuple(_UNPACK_FORMATS)):
sessions[None].append(t)
continue

Expand All @@ -127,7 +139,14 @@ def get_extracted_dicoms(fl: Iterable[str]) -> ItemsView[Optional[str], list[str

# check content and sanitize permission bits before extraction
os.chmod(tmpdir, mode=0o700)
shutil.unpack_archive(t, extract_dir=tmpdir)
# For tar (only!) starting with 3.12 we should provide filter
# (enforced in 3.14) on how to filter/safe-guard filenames.
kws: dict[str, str] = {}
if sys.version_info >= (3, 12) and t.endswith(_TAR_UNPACK_FORMATS):
# Allow for a user-workaround if would be desired
# see e.g. https://docs.python.org/3.12/library/tarfile.html#extraction-filters
kws["filter"] = os.environ.get("HEUDICONV_TAR_FILTER", "tar")
shutil.unpack_archive(t, extract_dir=tmpdir, **kws) # type: ignore[arg-type]

archive_content = list(find_files(regex=".*", topdir=tmpdir))

Expand Down
13 changes: 13 additions & 0 deletions heudiconv/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_conversion(
heuristic,
anon_cmd,
template="sourcedata/sub-{subject}/*/*/*.tgz",
xargs=["--datalad"],
)
runner(args) # run conversion

Expand Down Expand Up @@ -96,6 +97,18 @@ def test_conversion(
for key in keys:
assert orig[key] == conv[key]

# validate sensitive marking
from datalad.api import Dataset

ds = Dataset(outdir)
all_meta = dict(ds.repo.get_metadata("."))
target_rec = {"distribution-restrictions": ["sensitive"]}
for pth, meta in all_meta.items():
if "anat" in pth or "scans.tsv" in pth:
assert meta == target_rec
else:
assert meta == {}


@pytest.mark.skipif(not have_datalad, reason="no datalad")
def test_multiecho(
Expand Down
15 changes: 12 additions & 3 deletions utils/gen-docker-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,20 @@ VER=$(grep -Po '(?<=^__version__ = ).*' $thisd/../heudiconv/info.py | sed 's/"//

image="kaczmarj/neurodocker:0.9.1"

docker run --rm $image generate docker \
--base-image neurodebian:bullseye \
if hash podman; then
OCI_BINARY=podman
elif hash docker; then
OCI_BINARY=docker
else
echo "ERROR: no podman or docker found" >&2
exit 1
fi

${OCI_BINARY:-docker} run --rm $image generate docker \
--base-image neurodebian:bookworm \
--pkg-manager apt \
--dcm2niix \
version=v1.0.20220720 \
version=v1.0.20240202 \
method=source \
cmake_opts="-DZLIB_IMPLEMENTATION=Cloudflare -DUSE_JPEGLS=ON -DUSE_OPENJPEG=ON" \
--install \
Expand Down

0 comments on commit bb7c286

Please sign in to comment.