diff --git a/.all-contributorsrc b/.all-contributorsrc index 54a6a36e65..20c0996d58 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -609,6 +609,15 @@ "bug", "doc" ] + }, + { + "login": "mattwestby", + "name": "mattwestby", + "avatar_url": "https://avatars.githubusercontent.com/u/91054185?v=4", + "profile": "https://github.com/mattwestby", + "contributions": [ + "bug" + ] } ], "contributorsSortAlphabetically": true, diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index b2ac959fe6..52eb8aa1f3 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -6,8 +6,7 @@ RUN apt-get update \ && export DEBIAN_FRONTEND=noninteractive \ && apt-get -y install --no-install-recommends \ git \ - libssl-dev \ - python3-sphinx + libssl-dev # Install Azure-CLI RUN apt-get update \ @@ -22,7 +21,7 @@ RUN mkdir -p /etc/apt/keyrings \ && chmod go+r /etc/apt/keyrings/microsoft.gpg # Set package versions -ARG AZURE_CLI_VERSION="2.59.0" +ARG AZURE_CLI_VERSION="2.64.0" ARG DISTRIBUTION # Add the Azure CLI repository @@ -52,10 +51,6 @@ RUN groupadd --gid $USER_GID $USERNAME \ # Set the default user USER $USERNAME -# Install Sphinx dependencies -COPY ./docs/requirements.txt /build/requirements.txt -RUN pip3 install -r /build/requirements.txt - # Set PATH for pulumi - pulumi installed as feature to work round installing as root ENV PATH=$PATH:/home/${USERNAME}/.pulumi/bin diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index f8e49feed9..6055634d9f 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,7 +10,6 @@ }, "extensions": [ "github.vscode-pull-request-github", - "ms-vscode.azure-account", "ms-python.python", "christian-kohler.path-intellisense" ], diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 732ed0c6b7..6afa43a77b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,3 +7,15 @@ updates: directory: "/" # The exact logic is unclear, but it recursively searches at least .github/workflows/ schedule: interval: "weekly" + # Python package update PRs + - package-ecosystem: pip # This will update 'pyproject.toml' + directory: "/" + ignore: + - dependency-name: "psycopg" # 3.1.19 is the latest version to support on older MacOS versions + groups: + production-dependencies: + dependency-type: "production" + development-dependencies: + dependency-type: "development" + schedule: + interval: weekly diff --git a/.github/scripts/update_python_dependencies.sh b/.github/scripts/update_python_dependencies.sh deleted file mode 100755 index b7134ea54d..0000000000 --- a/.github/scripts/update_python_dependencies.sh +++ /dev/null @@ -1,23 +0,0 @@ -#! /usr/bin/env sh -set -e - -# Check for required arguments -if [ "$#" -ne 2 ]; then - echo "Usage: update_python_dependencies [environment_name] [target]" - exit 1 -fi -ENV_NAME=$1 -TARGET=$2 - -# Check for pip-compile -if ! command -v pip-compile > /dev/null; then - echo "pip-compile could not be found" - exit 1 -fi - -# Run pip-compile -if [ "$ENV_NAME" = "default" ]; then - pip-compile -U pyproject.toml -c requirements-constraints.txt -o "$TARGET" -else - hatch env show --json | jq -r ".${ENV_NAME}.dependencies | .[]" | pip-compile - -U -c requirements-constraints.txt -o "$TARGET" -fi diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml index 8243cfdc97..5350075540 100644 --- a/.github/workflows/build_documentation.yaml +++ b/.github/workflows/build_documentation.yaml @@ -46,7 +46,7 @@ jobs: run: hatch run docs:build - name: Link Checker - uses: lycheeverse/lychee-action@v1.10.0 + uses: lycheeverse/lychee-action@v2.0.2 with: args: --config='./.lychee.toml' --no-progress './docs/build/html/**/*.html' fail: true # fail on broken links diff --git a/.github/workflows/dependabot_amend.yaml b/.github/workflows/dependabot_amend.yaml new file mode 100644 index 0000000000..4064ab08ba --- /dev/null +++ b/.github/workflows/dependabot_amend.yaml @@ -0,0 +1,46 @@ +--- +name: Amend Dependabot PRs + +on: # yamllint disable-line rule:truthy + push: + branches: + - dependabot/pip/** + pull_request: + branches: + - dependabot/pip/** + workflow_dispatch: # allow this workflow to be manually triggered + +# checkout needs 'contents:read' +# pull request needs 'pull-requests:write' and 'contents:write' +permissions: + contents: write + pull-requests: write + +jobs: + amend_dependabot_prs: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install hatch + run: pip install hatch + + - name: Update hatch requirements + run: | + rm .hatch/requirements*.txt + hatch run true + hatch -e docs run true + hatch -e lint run true + hatch -e test run true + + - name: Commit changes + uses: stefanzweifel/git-auto-commit-action@v5.0.1 + with: + commit_message: "[dependabot skip] :wrench: Update Python requirements files" + branch: ${{ github.head_ref }} diff --git a/.github/workflows/test_code.yaml b/.github/workflows/test_code.yaml index 52bce85ef6..0119816136 100644 --- a/.github/workflows/test_code.yaml +++ b/.github/workflows/test_code.yaml @@ -55,7 +55,7 @@ jobs: shell: bash run: npm install -g markdown-link-check - name: Link Checker - uses: lycheeverse/lychee-action@v1.10.0 + uses: lycheeverse/lychee-action@v2.0.2 with: args: --config='./.lychee.toml' --no-progress --offline '**/*.md' --exclude-path './docs' fail: true # fail on broken links diff --git a/.github/workflows/update_docker_versions.yaml b/.github/workflows/update_docker_versions.yaml index 6c26183892..7dcdb9d155 100644 --- a/.github/workflows/update_docker_versions.yaml +++ b/.github/workflows/update_docker_versions.yaml @@ -40,7 +40,7 @@ jobs: - name: Create pull request if: ${{ ! env.ACT }} id: pull-request - uses: peter-evans/create-pull-request@v6.1.0 + uses: peter-evans/create-pull-request@v7.0.5 with: author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com> base: develop diff --git a/.github/workflows/update_python_dependencies.yaml b/.github/workflows/update_python_dependencies.yaml deleted file mode 100644 index 7342b6a309..0000000000 --- a/.github/workflows/update_python_dependencies.yaml +++ /dev/null @@ -1,67 +0,0 @@ ---- -name: Update Python dependencies - -# Run workflow on pushes to matching branches -on: # yamllint disable-line rule:truthy - schedule: - - cron: "0 3 * * 1" # run at 3:00 every Monday - workflow_dispatch: # allow this workflow to be manually triggered - - -jobs: - update_python_dependencies: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.12 - - - name: Install dependencies - run: pip install hatch pip-tools - - - name: Update 'default' dependencies - run: .github/scripts/update_python_dependencies.sh default requirements.txt - - - name: Update 'docs' dependencies - run: .github/scripts/update_python_dependencies.sh docs docs/requirements.txt - - - name: Check for changes - shell: bash - run: git --no-pager diff -- . - - - name: Get current date - id: date - run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - - - name: Create pull request - if: ${{ ! env.ACT }} - id: pull-request - uses: peter-evans/create-pull-request@v6.1.0 - with: - author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com> - base: develop - body: | - :warning: In order for CI to run on this PR it needs to be manually closed and re-opened :warning: - - ### :arrow_heading_up: Summary - - Update Python dependencies from ${{ github.sha }} on ${{ steps.date.outputs.date }} - - ### :closed_umbrella: Related issues - None - - ### :microscope: Tests - Package versions only - branch: python-dependencies - commit-message: ":arrow_up: Update Python dependencies" - committer: GitHub Actions - delete-branch: true - draft: false - labels: | - affected: developers - severity: minor - type: enhancement - title: ":arrow_up: Update Python dependencies" diff --git a/.gitignore b/.gitignore index 15607a9be1..889c80a9b4 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ environment_configs/package_lists/dependency-cache.json # Python build caches __pycache__/ .venv/ +dist/ # Development tools .vscode @@ -50,3 +51,5 @@ expanded.yaml # ruff cache .ruff_cache + +**/venv diff --git a/docs/requirements.txt b/.hatch/requirements-docs.txt similarity index 69% rename from docs/requirements.txt rename to .hatch/requirements-docs.txt index 0db8257f18..3f9d070931 100644 --- a/docs/requirements.txt +++ b/.hatch/requirements-docs.txt @@ -1,9 +1,13 @@ # -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: +# This file is autogenerated by hatch-pip-compile with Python 3.12 # -# pip-compile --constraint=requirements-constraints.txt --output-file=docs/requirements.txt - +# - emoji==2.14.0 +# - myst-parser==4.0.0 +# - pydata-sphinx-theme==0.15.4 +# - sphinx-togglebutton==0.3.2 +# - sphinx==8.1.3 # + accessible-pygments==0.0.5 # via pydata-sphinx-theme alabaster==1.0.0 @@ -14,9 +18,9 @@ babel==2.16.0 # sphinx beautifulsoup4==4.12.3 # via pydata-sphinx-theme -certifi==2024.7.4 +certifi==2024.8.30 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests docutils==0.21.2 # via @@ -24,12 +28,10 @@ docutils==0.21.2 # pydata-sphinx-theme # sphinx # sphinx-togglebutton -emoji==2.12.1 - # via -r - -idna==3.7 - # via - # -c requirements-constraints.txt - # requests +emoji==2.14.0 + # via hatch.envs.docs +idna==3.10 + # via requests imagesize==1.4.1 # via sphinx jinja2==3.1.4 @@ -40,20 +42,20 @@ markdown-it-py==3.0.0 # via # mdit-py-plugins # myst-parser -markupsafe==2.1.5 +markupsafe==3.0.2 # via jinja2 -mdit-py-plugins==0.4.1 +mdit-py-plugins==0.4.2 # via myst-parser mdurl==0.1.2 # via markdown-it-py myst-parser==4.0.0 - # via -r - + # via hatch.envs.docs packaging==24.1 # via # pydata-sphinx-theme # sphinx pydata-sphinx-theme==0.15.4 - # via -r - + # via hatch.envs.docs pygments==2.18.0 # via # accessible-pygments @@ -62,21 +64,19 @@ pygments==2.18.0 pyyaml==6.0.2 # via myst-parser requests==2.32.3 - # via - # -c requirements-constraints.txt - # sphinx + # via sphinx snowballstemmer==2.2.0 # via sphinx -soupsieve==2.5 +soupsieve==2.6 # via beautifulsoup4 -sphinx==8.0.2 +sphinx==8.1.3 # via - # -r - + # hatch.envs.docs # myst-parser # pydata-sphinx-theme # sphinx-togglebutton sphinx-togglebutton==0.3.2 - # via -r - + # via hatch.envs.docs sphinxcontrib-applehelp==2.0.0 # via sphinx sphinxcontrib-devhelp==2.0.0 @@ -90,13 +90,9 @@ sphinxcontrib-qthelp==2.0.0 sphinxcontrib-serializinghtml==2.0.0 # via sphinx typing-extensions==4.12.2 - # via - # emoji - # pydata-sphinx-theme -urllib3==2.2.2 - # via - # -c requirements-constraints.txt - # requests + # via pydata-sphinx-theme +urllib3==2.2.3 + # via requests wheel==0.44.0 # via sphinx-togglebutton diff --git a/.hatch/requirements-lint.txt b/.hatch/requirements-lint.txt new file mode 100644 index 0000000000..1e636503c1 --- /dev/null +++ b/.hatch/requirements-lint.txt @@ -0,0 +1,282 @@ +# +# This file is autogenerated by hatch-pip-compile with Python 3.12 +# +# - ansible-dev-tools==24.9.0 +# - ansible==10.5.0 +# - black==24.10.0 +# - mypy==1.11.2 +# - pandas-stubs==2.2.3.241009 +# - pydantic==2.9.2 +# - ruff==0.6.9 +# - types-appdirs==1.4.3.5 +# - types-chevron==0.14.2.20240310 +# - types-pytz==2024.2.0.20241003 +# - types-pyyaml==6.0.12.20240917 +# - types-requests==2.32.0.20240914 +# + +annotated-types==0.7.0 + # via pydantic +ansible==10.5.0 + # via hatch.envs.lint +ansible-builder==3.1.0 + # via + # ansible-dev-environment + # ansible-dev-tools + # ansible-navigator +ansible-compat==24.9.1 + # via + # ansible-lint + # molecule + # pytest-ansible +ansible-core==2.17.5 + # via + # ansible + # ansible-compat + # ansible-lint + # molecule + # pytest-ansible +ansible-creator==24.10.1 + # via ansible-dev-tools +ansible-dev-environment==24.9.0 + # via ansible-dev-tools +ansible-dev-tools==24.9.0 + # via hatch.envs.lint +ansible-lint==24.9.2 + # via + # ansible-dev-tools + # ansible-navigator +ansible-navigator==24.9.0 + # via ansible-dev-tools +ansible-runner==2.4.0 + # via ansible-navigator +ansible-sign==0.1.1 + # via ansible-dev-tools +attrs==24.2.0 + # via + # jsonschema + # referencing +bindep==2.11.0 + # via ansible-builder +black==24.10.0 + # via + # hatch.envs.lint + # ansible-lint +bracex==2.5.post1 + # via wcmatch +cachetools==5.5.0 + # via tox +cffi==1.17.1 + # via + # cryptography + # onigurumacffi +chardet==5.2.0 + # via tox +click==8.1.7 + # via + # black + # click-help-colors + # molecule +click-help-colors==0.9.4 + # via molecule +colorama==0.4.6 + # via tox +cryptography==43.0.3 + # via ansible-core +distlib==0.3.9 + # via + # ansible-sign + # virtualenv +distro==1.9.0 + # via bindep +docutils==0.21.2 + # via python-daemon +enrich==1.2.7 + # via molecule +execnet==2.1.1 + # via pytest-xdist +filelock==3.16.1 + # via + # ansible-lint + # tox + # virtualenv +importlib-metadata==8.5.0 + # via ansible-lint +iniconfig==2.0.0 + # via pytest +jinja2==3.1.4 + # via + # ansible-core + # ansible-creator + # ansible-navigator + # molecule +jsonschema==4.23.0 + # via + # ansible-builder + # ansible-compat + # ansible-lint + # ansible-navigator + # molecule +jsonschema-specifications==2024.10.1 + # via jsonschema +lockfile==0.12.2 + # via python-daemon +markdown-it-py==3.0.0 + # via rich +markupsafe==3.0.2 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py +molecule==24.9.0 + # via ansible-dev-tools +mypy==1.11.2 + # via hatch.envs.lint +mypy-extensions==1.0.0 + # via + # black + # mypy +numpy==2.1.2 + # via pandas-stubs +onigurumacffi==1.3.0 + # via ansible-navigator +packaging==24.1 + # via + # ansible-builder + # ansible-compat + # ansible-core + # ansible-lint + # ansible-runner + # bindep + # black + # molecule + # pyproject-api + # pytest + # pytest-ansible + # tox +pandas-stubs==2.2.3.241009 + # via hatch.envs.lint +parsley==1.3 + # via bindep +pathspec==0.12.1 + # via + # ansible-lint + # black + # yamllint +pbr==6.1.0 + # via bindep +pexpect==4.9.0 + # via ansible-runner +platformdirs==4.3.6 + # via + # black + # tox + # virtualenv +pluggy==1.5.0 + # via + # molecule + # pytest + # tox +ptyprocess==0.7.0 + # via pexpect +pycparser==2.22 + # via cffi +pydantic==2.9.2 + # via hatch.envs.lint +pydantic-core==2.23.4 + # via pydantic +pygments==2.18.0 + # via rich +pyproject-api==1.8.0 + # via tox +pytest==8.3.3 + # via + # pytest-ansible + # pytest-xdist + # tox-ansible +pytest-ansible==24.9.0 + # via + # ansible-dev-tools + # tox-ansible +pytest-xdist==3.6.1 + # via tox-ansible +python-daemon==3.0.1 + # via ansible-runner +python-gnupg==0.5.3 + # via ansible-sign +pyyaml==6.0.2 + # via + # ansible-builder + # ansible-compat + # ansible-core + # ansible-creator + # ansible-dev-environment + # ansible-lint + # ansible-navigator + # ansible-runner + # molecule + # tox-ansible + # yamllint +referencing==0.35.1 + # via + # jsonschema + # jsonschema-specifications +resolvelib==1.0.1 + # via ansible-core +rich==13.9.2 + # via + # ansible-lint + # enrich + # molecule +rpds-py==0.20.0 + # via + # jsonschema + # referencing +ruamel-yaml==0.18.6 + # via ansible-lint +ruamel-yaml-clib==0.2.12 + # via ruamel-yaml +ruff==0.6.9 + # via hatch.envs.lint +subprocess-tee==0.4.2 + # via + # ansible-compat + # ansible-dev-environment + # ansible-lint +tox==4.23.0 + # via tox-ansible +tox-ansible==24.9.0 + # via ansible-dev-tools +types-appdirs==1.4.3.5 + # via hatch.envs.lint +types-chevron==0.14.2.20240310 + # via hatch.envs.lint +types-pytz==2024.2.0.20241003 + # via + # hatch.envs.lint + # pandas-stubs +types-pyyaml==6.0.12.20240917 + # via hatch.envs.lint +types-requests==2.32.0.20240914 + # via hatch.envs.lint +typing-extensions==4.12.2 + # via + # mypy + # pydantic + # pydantic-core +tzdata==2024.2 + # via ansible-navigator +urllib3==2.2.3 + # via types-requests +virtualenv==20.27.0 + # via tox +wcmatch==10.0 + # via + # ansible-lint + # molecule +yamllint==1.35.1 + # via ansible-lint +zipp==3.20.2 + # via importlib-metadata + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/.hatch/requirements-test.txt b/.hatch/requirements-test.txt new file mode 100644 index 0000000000..a14759b25b --- /dev/null +++ b/.hatch/requirements-test.txt @@ -0,0 +1,442 @@ +# +# This file is autogenerated by hatch-pip-compile with Python 3.12 +# +# [constraints] .hatch/requirements.txt (SHA256: 12cb2eff6268d97a3d9d63d3ec5d670c6f13a571befda8d279cb7ce6ab9f5bb5) +# +# - appdirs==1.4.4 +# - azure-core==1.31.0 +# - azure-identity==1.19.0 +# - azure-keyvault-certificates==4.8.0 +# - azure-keyvault-keys==4.9.0 +# - azure-keyvault-secrets==4.8.0 +# - azure-mgmt-compute==33.0.0 +# - azure-mgmt-containerinstance==10.1.0 +# - azure-mgmt-dns==8.1.0 +# - azure-mgmt-keyvault==10.3.1 +# - azure-mgmt-msi==7.0.0 +# - azure-mgmt-rdbms==10.1.0 +# - azure-mgmt-resource==23.1.1 +# - azure-mgmt-storage==21.2.1 +# - azure-storage-blob==12.23.1 +# - azure-storage-file-datalake==12.17.0 +# - azure-storage-file-share==12.19.0 +# - chevron==0.14.0 +# - cryptography==43.0.1 +# - fqdn==1.5.1 +# - psycopg[binary]==3.1.19 +# - pulumi-azure-native==2.66.0 +# - pulumi-azuread==6.0.0 +# - pulumi-random==4.16.6 +# - pulumi==3.136.1 +# - pydantic==2.9.2 +# - pyjwt[crypto]==2.9.0 +# - pytz==2024.2 +# - pyyaml==6.0.2 +# - rich==13.9.2 +# - simple-acme-dns==3.1.0 +# - typer==0.12.5 +# - websocket-client==1.8.0 +# - coverage==7.6.3 +# - freezegun==1.5.1 +# - pytest-mock==3.14.0 +# - pytest==8.3.3 +# - requests-mock==1.12.1 +# + +acme==2.10.0 + # via + # -c .hatch/requirements.txt + # simple-acme-dns +annotated-types==0.7.0 + # via + # -c .hatch/requirements.txt + # pydantic +appdirs==1.4.4 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +arpeggio==2.0.2 + # via + # -c .hatch/requirements.txt + # parver +attrs==24.2.0 + # via + # -c .hatch/requirements.txt + # parver +azure-common==1.1.28 + # via + # -c .hatch/requirements.txt + # azure-mgmt-compute + # azure-mgmt-containerinstance + # azure-mgmt-dns + # azure-mgmt-keyvault + # azure-mgmt-msi + # azure-mgmt-rdbms + # azure-mgmt-resource + # azure-mgmt-storage +azure-core==1.31.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # azure-identity + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-mgmt-core + # azure-storage-blob + # azure-storage-file-datalake + # azure-storage-file-share + # msrest +azure-identity==1.19.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-keyvault-certificates==4.8.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-keyvault-keys==4.9.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-keyvault-secrets==4.8.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-compute==33.0.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-containerinstance==10.1.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-core==1.4.0 + # via + # -c .hatch/requirements.txt + # azure-mgmt-compute + # azure-mgmt-containerinstance + # azure-mgmt-dns + # azure-mgmt-keyvault + # azure-mgmt-msi + # azure-mgmt-rdbms + # azure-mgmt-resource + # azure-mgmt-storage +azure-mgmt-dns==8.1.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-keyvault==10.3.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-msi==7.0.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-rdbms==10.1.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-resource==23.1.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-mgmt-storage==21.2.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-storage-blob==12.23.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # azure-storage-file-datalake +azure-storage-file-datalake==12.17.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +azure-storage-file-share==12.19.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +certifi==2024.8.30 + # via + # -c .hatch/requirements.txt + # msrest + # requests +cffi==1.17.1 + # via + # -c .hatch/requirements.txt + # cryptography +charset-normalizer==3.4.0 + # via + # -c .hatch/requirements.txt + # requests +chevron==0.14.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +click==8.1.7 + # via + # -c .hatch/requirements.txt + # typer +coverage==7.6.3 + # via hatch.envs.test +cryptography==43.0.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # acme + # azure-identity + # azure-keyvault-keys + # azure-storage-blob + # azure-storage-file-share + # josepy + # msal + # pyjwt + # pyopenssl +debugpy==1.8.7 + # via + # -c .hatch/requirements.txt + # pulumi +dill==0.3.9 + # via + # -c .hatch/requirements.txt + # pulumi +dnspython==2.6.1 + # via + # -c .hatch/requirements.txt + # simple-acme-dns +fqdn==1.5.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +freezegun==1.5.1 + # via hatch.envs.test +grpcio==1.66.2 + # via + # -c .hatch/requirements.txt + # pulumi +idna==3.10 + # via + # -c .hatch/requirements.txt + # requests +iniconfig==2.0.0 + # via pytest +isodate==0.7.2 + # via + # -c .hatch/requirements.txt + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-mgmt-compute + # azure-mgmt-containerinstance + # azure-mgmt-dns + # azure-mgmt-keyvault + # azure-mgmt-resource + # azure-mgmt-storage + # azure-storage-blob + # azure-storage-file-datalake + # azure-storage-file-share + # msrest +josepy==1.14.0 + # via + # -c .hatch/requirements.txt + # acme +markdown-it-py==3.0.0 + # via + # -c .hatch/requirements.txt + # rich +mdurl==0.1.2 + # via + # -c .hatch/requirements.txt + # markdown-it-py +msal==1.31.0 + # via + # -c .hatch/requirements.txt + # azure-identity + # msal-extensions +msal-extensions==1.2.0 + # via + # -c .hatch/requirements.txt + # azure-identity +msrest==0.7.1 + # via + # -c .hatch/requirements.txt + # azure-mgmt-msi + # azure-mgmt-rdbms +oauthlib==3.2.2 + # via + # -c .hatch/requirements.txt + # requests-oauthlib +packaging==24.1 + # via pytest +parver==0.5 + # via + # -c .hatch/requirements.txt + # pulumi-azure-native + # pulumi-azuread + # pulumi-random +pluggy==1.5.0 + # via pytest +portalocker==2.10.1 + # via + # -c .hatch/requirements.txt + # msal-extensions +protobuf==4.25.5 + # via + # -c .hatch/requirements.txt + # pulumi +psycopg==3.1.19 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +psycopg-binary==3.1.19 + # via + # -c .hatch/requirements.txt + # psycopg +pulumi==3.136.1 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # pulumi-azure-native + # pulumi-azuread + # pulumi-random +pulumi-azure-native==2.66.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +pulumi-azuread==6.0.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +pulumi-random==4.16.6 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +pycparser==2.22 + # via + # -c .hatch/requirements.txt + # cffi +pydantic==2.9.2 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +pydantic-core==2.23.4 + # via + # -c .hatch/requirements.txt + # pydantic +pygments==2.18.0 + # via + # -c .hatch/requirements.txt + # rich +pyjwt==2.9.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # msal +pyopenssl==24.2.1 + # via + # -c .hatch/requirements.txt + # acme + # josepy +pyrfc3339==1.1 + # via + # -c .hatch/requirements.txt + # acme +pytest==8.3.3 + # via + # hatch.envs.test + # pytest-mock +pytest-mock==3.14.0 + # via hatch.envs.test +python-dateutil==2.9.0.post0 + # via freezegun +pytz==2024.2 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # acme + # pyrfc3339 +pyyaml==6.0.2 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # pulumi +requests==2.32.3 + # via + # -c .hatch/requirements.txt + # acme + # azure-core + # msal + # msrest + # requests-mock + # requests-oauthlib +requests-mock==1.12.1 + # via hatch.envs.test +requests-oauthlib==2.0.0 + # via + # -c .hatch/requirements.txt + # msrest +rich==13.9.2 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + # typer +semver==2.13.0 + # via + # -c .hatch/requirements.txt + # pulumi + # pulumi-azure-native + # pulumi-azuread + # pulumi-random +shellingham==1.5.4 + # via + # -c .hatch/requirements.txt + # typer +simple-acme-dns==3.1.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +six==1.16.0 + # via + # -c .hatch/requirements.txt + # azure-core + # pulumi + # python-dateutil +typer==0.12.5 + # via + # -c .hatch/requirements.txt + # hatch.envs.test +typing-extensions==4.12.2 + # via + # -c .hatch/requirements.txt + # azure-core + # azure-identity + # azure-keyvault-certificates + # azure-keyvault-keys + # azure-keyvault-secrets + # azure-mgmt-compute + # azure-mgmt-keyvault + # azure-storage-blob + # azure-storage-file-datalake + # azure-storage-file-share + # psycopg + # pydantic + # pydantic-core + # typer +urllib3==2.2.3 + # via + # -c .hatch/requirements.txt + # requests +validators==0.28.3 + # via + # -c .hatch/requirements.txt + # simple-acme-dns +websocket-client==1.8.0 + # via + # -c .hatch/requirements.txt + # hatch.envs.test + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements.txt b/.hatch/requirements.txt similarity index 56% rename from requirements.txt rename to .hatch/requirements.txt index 16dd58c348..eb2f71c0e8 100644 --- a/requirements.txt +++ b/.hatch/requirements.txt @@ -1,33 +1,64 @@ # -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: +# This file is autogenerated by hatch-pip-compile with Python 3.12 # -# pip-compile --constraint=requirements-constraints.txt --output-file=requirements.txt pyproject.toml +# - appdirs==1.4.4 +# - azure-core==1.31.0 +# - azure-identity==1.19.0 +# - azure-keyvault-certificates==4.8.0 +# - azure-keyvault-keys==4.9.0 +# - azure-keyvault-secrets==4.8.0 +# - azure-mgmt-compute==33.0.0 +# - azure-mgmt-containerinstance==10.1.0 +# - azure-mgmt-dns==8.1.0 +# - azure-mgmt-keyvault==10.3.1 +# - azure-mgmt-msi==7.0.0 +# - azure-mgmt-rdbms==10.1.0 +# - azure-mgmt-resource==23.1.1 +# - azure-mgmt-storage==21.2.1 +# - azure-storage-blob==12.23.1 +# - azure-storage-file-datalake==12.17.0 +# - azure-storage-file-share==12.19.0 +# - chevron==0.14.0 +# - cryptography==43.0.1 +# - fqdn==1.5.1 +# - psycopg[binary]==3.1.19 +# - pulumi-azure-native==2.66.0 +# - pulumi-azuread==6.0.0 +# - pulumi-random==4.16.6 +# - pulumi==3.136.1 +# - pydantic==2.9.2 +# - pyjwt[crypto]==2.9.0 +# - pytz==2024.2 +# - pyyaml==6.0.2 +# - rich==13.9.2 +# - simple-acme-dns==3.1.0 +# - typer==0.12.5 +# - websocket-client==1.8.0 # + acme==2.10.0 # via simple-acme-dns annotated-types==0.7.0 # via pydantic appdirs==1.4.4 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default arpeggio==2.0.2 # via parver attrs==24.2.0 # via parver azure-common==1.1.28 # via - # azure-mgmt-automation # azure-mgmt-compute # azure-mgmt-containerinstance # azure-mgmt-dns # azure-mgmt-keyvault # azure-mgmt-msi - # azure-mgmt-network # azure-mgmt-rdbms # azure-mgmt-resource # azure-mgmt-storage -azure-core==1.30.2 +azure-core==1.31.0 # via + # hatch.envs.default # azure-identity # azure-keyvault-certificates # azure-keyvault-keys @@ -36,98 +67,86 @@ azure-core==1.30.2 # azure-storage-blob # azure-storage-file-datalake # azure-storage-file-share - # data-safe-haven (pyproject.toml) # msrest -azure-identity==1.17.1 - # via - # -c requirements-constraints.txt - # data-safe-haven (pyproject.toml) +azure-identity==1.19.0 + # via hatch.envs.default azure-keyvault-certificates==4.8.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-keyvault-keys==4.9.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-keyvault-secrets==4.8.0 - # via data-safe-haven (pyproject.toml) -azure-mgmt-automation==1.0.0 - # via data-safe-haven (pyproject.toml) -azure-mgmt-compute==32.0.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default +azure-mgmt-compute==33.0.0 + # via hatch.envs.default azure-mgmt-containerinstance==10.1.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-mgmt-core==1.4.0 # via - # azure-mgmt-automation # azure-mgmt-compute # azure-mgmt-containerinstance # azure-mgmt-dns # azure-mgmt-keyvault # azure-mgmt-msi - # azure-mgmt-network # azure-mgmt-rdbms # azure-mgmt-resource # azure-mgmt-storage azure-mgmt-dns==8.1.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-mgmt-keyvault==10.3.1 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-mgmt-msi==7.0.0 - # via data-safe-haven (pyproject.toml) -azure-mgmt-network==26.0.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-mgmt-rdbms==10.1.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-mgmt-resource==23.1.1 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default azure-mgmt-storage==21.2.1 - # via data-safe-haven (pyproject.toml) -azure-storage-blob==12.22.0 + # via hatch.envs.default +azure-storage-blob==12.23.1 # via + # hatch.envs.default # azure-storage-file-datalake - # data-safe-haven (pyproject.toml) -azure-storage-file-datalake==12.16.0 - # via data-safe-haven (pyproject.toml) -azure-storage-file-share==12.17.0 - # via data-safe-haven (pyproject.toml) -certifi==2024.7.4 +azure-storage-file-datalake==12.17.0 + # via hatch.envs.default +azure-storage-file-share==12.19.0 + # via hatch.envs.default +certifi==2024.8.30 # via # msrest # requests -cffi==1.17.0 +cffi==1.17.1 # via cryptography -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests chevron==0.14.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default click==8.1.7 # via typer -cryptography==43.0.0 +cryptography==43.0.1 # via - # -c requirements-constraints.txt + # hatch.envs.default # acme # azure-identity # azure-keyvault-keys # azure-storage-blob # azure-storage-file-share - # data-safe-haven (pyproject.toml) # josepy # msal # pyjwt # pyopenssl -dill==0.3.8 +debugpy==1.8.7 + # via pulumi +dill==0.3.9 # via pulumi dnspython==2.6.1 - # via - # -c requirements-constraints.txt - # simple-acme-dns + # via simple-acme-dns fqdn==1.5.1 - # via data-safe-haven (pyproject.toml) -grpcio==1.60.2 + # via hatch.envs.default +grpcio==1.66.2 # via pulumi -idna==3.7 - # via - # -c requirements-constraints.txt - # requests -isodate==0.6.1 +idna==3.10 + # via requests +isodate==0.7.2 # via # azure-keyvault-certificates # azure-keyvault-keys @@ -136,7 +155,6 @@ isodate==0.6.1 # azure-mgmt-containerinstance # azure-mgmt-dns # azure-mgmt-keyvault - # azure-mgmt-network # azure-mgmt-resource # azure-mgmt-storage # azure-storage-blob @@ -149,16 +167,14 @@ markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -msal==1.30.0 +msal==1.31.0 # via # azure-identity - # data-safe-haven (pyproject.toml) # msal-extensions msal-extensions==1.2.0 # via azure-identity msrest==0.7.1 # via - # azure-mgmt-automation # azure-mgmt-msi # azure-mgmt-rdbms oauthlib==3.2.2 @@ -166,37 +182,39 @@ oauthlib==3.2.2 parver==0.5 # via # pulumi-azure-native + # pulumi-azuread # pulumi-random - # pulumi-tls portalocker==2.10.1 # via msal-extensions -protobuf==4.25.4 +protobuf==4.25.5 # via pulumi -psycopg==3.2.1 - # via data-safe-haven (pyproject.toml) -pulumi==3.128.0 +psycopg==3.1.19 + # via hatch.envs.default +psycopg-binary==3.1.19 + # via psycopg +pulumi==3.136.1 # via - # data-safe-haven (pyproject.toml) + # hatch.envs.default # pulumi-azure-native + # pulumi-azuread # pulumi-random - # pulumi-tls -pulumi-azure-native==2.55.0 - # via data-safe-haven (pyproject.toml) -pulumi-random==4.16.3 - # via data-safe-haven (pyproject.toml) -pulumi-tls==5.0.4 - # via data-safe-haven (pyproject.toml) +pulumi-azure-native==2.66.0 + # via hatch.envs.default +pulumi-azuread==6.0.0 + # via hatch.envs.default +pulumi-random==4.16.6 + # via hatch.envs.default pycparser==2.22 # via cffi -pydantic==2.8.2 - # via data-safe-haven (pyproject.toml) -pydantic-core==2.20.1 +pydantic==2.9.2 + # via hatch.envs.default +pydantic-core==2.23.4 # via pydantic pygments==2.18.0 # via rich -pyjwt[crypto]==2.9.0 +pyjwt==2.9.0 # via - # data-safe-haven (pyproject.toml) + # hatch.envs.default # msal pyopenssl==24.2.1 # via @@ -204,18 +222,17 @@ pyopenssl==24.2.1 # josepy pyrfc3339==1.1 # via acme -pytz==2024.1 +pytz==2024.2 # via + # hatch.envs.default # acme - # data-safe-haven (pyproject.toml) # pyrfc3339 pyyaml==6.0.2 # via - # data-safe-haven (pyproject.toml) + # hatch.envs.default # pulumi requests==2.32.3 # via - # -c requirements-constraints.txt # acme # azure-core # msal @@ -223,27 +240,26 @@ requests==2.32.3 # requests-oauthlib requests-oauthlib==2.0.0 # via msrest -rich==13.7.1 +rich==13.9.2 # via - # data-safe-haven (pyproject.toml) + # hatch.envs.default # typer semver==2.13.0 # via # pulumi # pulumi-azure-native + # pulumi-azuread # pulumi-random - # pulumi-tls shellingham==1.5.4 # via typer simple-acme-dns==3.1.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default six==1.16.0 # via # azure-core - # isodate # pulumi -typer==0.12.3 - # via data-safe-haven (pyproject.toml) +typer==0.12.5 + # via hatch.envs.default typing-extensions==4.12.2 # via # azure-core @@ -260,14 +276,12 @@ typing-extensions==4.12.2 # pydantic # pydantic-core # typer -urllib3==2.2.2 - # via - # -c requirements-constraints.txt - # requests +urllib3==2.2.3 + # via requests validators==0.28.3 # via simple-acme-dns websocket-client==1.8.0 - # via data-safe-haven (pyproject.toml) + # via hatch.envs.default # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/.lychee.toml b/.lychee.toml index 7902f32ad0..2265ab5386 100644 --- a/.lychee.toml +++ b/.lychee.toml @@ -90,6 +90,7 @@ glob_ignore_case = false # Exclude URLs and mail addresses from checking (supports regex). # exclude = [ '.*\.github.com\.*' ] exclude = [ + 'code\.visualstudio\.com', # 403 'doi\.org', # 403 'entra.microsoft\.com', # Requires authentication (403) 'example\.org', # domain used for examples only diff --git a/.readthedocs.yaml b/.readthedocs.yaml index cc347e9fa2..145d005f83 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,10 +7,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" - -sphinx: - configuration: docs/source/conf.py + python: "3.12" formats: - htmlzip @@ -18,4 +15,7 @@ formats: python: install: - - requirements: docs/requirements.txt + - requirements: .hatch/requirements-docs.txt + +sphinx: + configuration: docs/source/conf.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index de58af3841..14c1ab607c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,27 +8,6 @@ It can truly only succeed with a interdisciplinary team working together. The point of these contributing guidelines are to help you participate as easily as possible. If you have any questions that aren't discussed below, please let us know by [opening an issue](#project-management-through-issues). -## Contents - -Been here before? -Already know what you're looking for in this guide? -Jump to the following sections: - -- [A DevOps development philosophy](#a-devops-development-philosophy) - - [Project workflow](#project-workflow) - - [Project meetings](#project-meetings) - - [Communications within the team and asking for help](#communications-within-the-team-and-asking-for-help) -- [Contributing through GitHub](#contributing-through-github) - - [Discussions vs Issues](#discussions-vs-issues) - - [Writing in markdown](#writing-in-markdown) - - [Project management through issues](#project-management-through-issues) - - [Issues as conversations](#issues-as-conversations) - - [Working in a private repository](#working-in-a-private-repository) - - [Who's involved in the project](#whos-involved-in-the-project) - - [Make a change with a pull request](#making-a-change-with-a-pull-request) - - [Make a change to the documentation](#making-a-change-to-the-documentation) - - [Adding new contributors](#adding-new-contributors) - ## A DevOps development philosophy For the Data Safe Haven project, we follow a DevOps development philosophy. @@ -55,7 +34,7 @@ The most pertinent features of the DevOps methodology for this project are: - **automation**: maximal automation is the primary goal - **quality**: full integration testing each time features are added -### Project workflow +## Project workflow Although we are not following an Agile workflow, we still think that the following features are important: @@ -75,16 +54,12 @@ Discussions around particular tasks should be conducted **when the work is being ### Communications within the team and asking for help -As this team is distributed, not working full-time on this project and often working asynchronously, we do not have any form of daily meeting or stand-up +As this team is distributed, not working full-time on this project and often working asynchronously, we do not have any form of daily meeting or stand-up. The best way to work around this absence is to **commit to sharing updates as regularly as possible**. Please see the section on [project management through issues](#project-management-through-issues) below on how to do this via GitHub. ## Contributing through GitHub -[git](https://git-scm.com) is a really useful tool for version control. [GitHub](https://github.com) sits on top of git and supports collaborative and distributed working. -We know that it can be daunting to start using `git` and `GitHub` if you haven't worked with them in the past, but the team are happy to help you figure out any of the jargon or confusing instructions you encounter! :heart: -In order to contribute via GitHub you'll need to set up a free account and sign in. Here are some [instructions](https://docs.github.com/en/get-started/signing-up-for-github/signing-up-for-a-new-github-account) to help you get going. - We use the [Gitflow Workflow](https://www.atlassian.com/git/tutorials/comparing-workflows/gitflow-workflow).

@@ -95,11 +70,19 @@ This means that: - checking out the `latest` branch, will give you the latest tagged release - the `develop` branch, which is the default branch of the repository, contains the latest cutting-edge code that has not yet made it into a release -- releases are made by branching from `develop` into a branch called `release-` - - deployment is tested from this release and any necessary integration changes are made on this branch - - the branch is then merged into `latest` (which is tagged) as the next release **and** into `develop` so that any fixes are included there - we prefer to use [merge commits](https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/about-merge-methods-on-github) in order to avoid rewriting the git history +### Issues as conversations + +If you have an idea for a piece of work to complete, please **open an issue**. + +The name `issue` comes from a concept of catching errors (bugs :bug:) in software, but for this project they are simply our **tasks**. +If an issue is growing to encompass more than one task, consider breaking it into multiple issues. + +You can think of the issues as **conversations** about a particular topic. +`GitHub`'s tagline is **social coding** and the issues are inspired by social media conversations. +Alternatively (and this is encouraged) you can use the issue to keep track of where you're up to with the task and add information about next steps and barriers. + ### Discussions vs Issues **Discussions** are the best place for informal talk about the project @@ -122,114 +105,66 @@ Good examples of issues are When opening an issue, pick a suitable template (if possible) to make the process easier. -### Writing in Markdown - -GitHub has a helpful page on [getting started with writing and formatting on GitHub](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github). - -Most of the writing that you'll do will be in [Markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax). -You can think of Markdown as a few little symbols around your text that will allow GitHub to render the text with a little bit of formatting. -For example you could write words as bold ( `**bold**` ), or in italics ( `*italics*` ), or as a [link](https://youtu.be/dQw4w9WgXcQ) ( `[link](https://youtu.be/dQw4w9WgXcQ)` ) to another webpage. - -`GitHub` issues render markdown really nicely. -The goal is to allow you to focus on the content rather than worry too much about how things are laid out! - ### Project management through issues -Please regularly check out the agreed upon tasks at the [issues list][https://github.com/alan-turing-institute/data-safe-haven/issues]. -Every issue should have labels assigned to it from the following scheme. -At least one label from each category ( `type` , `affected` and `severity` ) should be assigned to each issue - don't worry if you need to change these over time, they should reflect the current status of the issue. - -| Category | Labels | -| :------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| type | | -| affected | | -| severity | | +Please regularly check out the agreed upon tasks at the [issues list](https://github.com/alan-turing-institute/data-safe-haven/issues). +Issues should be tagged with an appropriate [label](https://github.com/alan-turing-institute/data-safe-haven/issues/labels) by a member of the development team. +Each issue should be assigned to an appropriate [milestone](https://github.com/alan-turing-institute/data-safe-haven/milestones). -Other labels which may or may not be relevant are meta labels (for collecting related issues) and the "good first issue" label for signalling issues that new contributors might like to tackle. -If an issue is closed without being completed, one of the `closed` labels should be attached to it to explain why. - -| Category | Labels | -| :------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| meta | | -| other | | -| closed | | - -If you have an idea for a piece of work to complete, please **open an issue**. If you have been assigned an issue, please be ready to explain in the [project meeting](#project-meetings) what your progress has been. In a perfect world you'll have completed the task, documented everything you need to and we'll be able to **close** the issue (to mark it as complete). -### Issues as conversations - -The name `issue` comes from a concept of catching errors (bugs :bug:) in software, but for this project they are simply our **tasks**. -They should be concrete enough to be done in a week or so. -If an issue is growing to encompass more than one task, consider breaking it into multiple issues. - -You can think of the issues as **conversations** about a particular topic. -`GitHub`'s tagline is **social coding** and the issues are inspired by social media conversations. - -You can [mention a user](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#mentioning-people-and-teams) by putting `@` infront of their github id. -For example, `@KirstieJane` will send a notification to `Kirstie Whitaker` so she knows to visit the issue and (for example) reply to your question. - -Alternatively (and this is encouraged) you can use the issue to keep track of where you're up to with the task and add information about next steps and barriers. - -

- -

- -### Working in a private repository - -As one of the goals of this project is to build a secure infrastructure for data storage and analysis, our project will very likely include some code with security vulnerabilities! -Therefore we're keeping the repository private until we're confident that our work is secure. - -Please note that the plan is to make the contents of this repository openly available. -Please be considerate of the content you add and use professional and inclusive language at all times. - -As we're working in a private repository you may not be able to see the repository if you aren't signed in. -So if you see a 404 page and you're confident you have the correct url, go back to [github.com](https://github.com) to make sure that you're signed into your account. +## Contributing your changes ### Making a change with a pull request -To contribute to the codebase you'll need to submit a **pull request**. - -If you're updating the code or other documents in the repository, the following steps are a guide to help you contribute in a way that will be easy for everyone to review and accept with ease :sunglasses:. +To contribute to the codebase you'll need to: -#### 1. Make sure there is an issue for this that is clear about what work you're going to do +- [fork the repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo) to your own GitHub profile +- make your changes [on a branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository) +- submit a [pull request](https://docs.github.com/en/get-started/quickstart/github-flow) -This allows other members of the Data Safe Haven project team to confirm that you aren't overlapping with work that's currently underway and that everyone is on the same page with the goal of the work you're going to carry out. - -[This blog](https://www.igvita.com/2011/12/19/dont-push-your-pull-requests) is a nice explanation of why putting this work in up front is so useful to everyone involved. - -#### 2. Fork Data Safe Haven repository to your profile +### Making a change to the documentation -Follow [the instructions here](https://docs.github.com/en/get-started/quickstart/fork-a-repo) to fork the [Data Safe Haven repository](https://github.com/alan-turing-institute/data-safe-haven). +The docs, including for older releases, are available [here](https://data-safe-haven.readthedocs.io). +You should follow the same instructions as above to [make a change with a pull request](#making-a-change-with-a-pull-request) when editing the documentation. -This is now your own unique copy of the Data Safe Haven repository. Changes here won't affect anyone else's work, so it's a safe space to explore edits to the code or documentation! -Make sure to [keep your fork up to date](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork) with the upstream repository, otherwise you can end up with lots of dreaded [merge conflicts](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/about-merge-conflicts). +The documentation is built from Markdown files using [Sphinx](https://www.sphinx-doc.org/) and [MyST parser](https://myst-parser.readthedocs.io/). +To preview your changes, you can build the docs locally with `hatch`: -#### 3. Make the changes you've discussed +```console +> hatch run docs:build +``` -Try to keep the changes focused. If you submit a large amount of work in all in one go it will be much more work for whomever is reviewing your pull request. [Help them help you](https://media.giphy.com/media/uRb2p09vY8lEs/giphy.gif) :wink: -If you feel tempted to "branch out" then please make a [new branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository) and a [new issue](https://github.com/alan-turing-institute/data-safe-haven/issues) to go with it. +- The generated documents will be placed under `build/html/`. +- To view the documents open `build/html/index.html` in your browser, for example: -#### 4. Submit a pull request +```console +> firefox build/html/index.html +``` -Once you submit a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request), a member of the Safe Haven project team will review your changes to confirm that they can be merged into the codebase. +## Preparing a new release -A [review](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/about-pull-request-reviews) will probably consist of a few questions to help clarify the work you've done. Keep an eye on your github notifications and be prepared to join in that conversation. +Releases are made by branching from `develop` into a branch called `release-` -You can update your [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) of the data safe haven [repository](https://github.com/alan-turing-institute/data-safe-haven) and the pull request will automatically update with those changes. **You don't need to submit a new pull request when you make a change in response to a review.** +- deployment is tested from this release and any necessary integration changes are made on this branch +- the branch is then merged into `latest` (which is tagged) as the next release **and** into `develop` so that any fixes are included there -GitHub has a [nice introduction](https://docs.github.com/en/get-started/quickstart/github-flow) to the pull request workflow, but please [get in touch](#get-in-touch) if you have any questions :balloon:. +The release can then be published to PyPI: -### Making a change to the documentation +- Build the tarball and wheel -The docs, including for older releases, are available [here](https://data-safe-haven.readthedocs.io). +```console +> hatch run build +``` -You should follow the same instructions as above to [make a change with a pull request](#making-a-change-with-a-pull-request) when editing the documentation. +- Upload to PyPI, providing your API token at the prompt -To preview your changes, you can build the docs locally. See [docs/README.md](docs/README.md). +```console +> hatch run publish --user __token__ +``` -### Who's involved in the project +## Who's involved in the project Take a look at the full list of contributors on our [README](README.md). @@ -245,7 +180,7 @@ To add new contributor to the README table, see the [all-contributors CLI docume You can get in touch with the development team at safehavendevs@turing.ac.uk. -## Thank you! +**Thank you!** You're awesome! :wave::smiley: diff --git a/README.md b/README.md index ea48d1bb19..622f76facd 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,33 @@ ![Data Safe Haven cartoon by Scriberia for The Alan Turing Institute](docs/source/_static/scriberia_diagram.jpg) -# :eyes: What is the Turing Data Safe Haven? +# ๐Ÿ‘€ What is the Turing Data Safe Haven? The **Turing Data Safe Haven** is an open-source framework for creating secure environments to analyse sensitive data. It provides a set of scripts and templates that will allow you to deploy, administer and use your own secure environment. It was developed as part of the Alan Turing Institute's [Data Safe Havens in the Cloud](https://www.turing.ac.uk/research/research-projects/data-safe-havens-cloud) project. +[![PyPI - Version](https://img.shields.io/pypi/v/data-safe-haven)](https://pypi.org/project/data-safe-haven/) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/data-safe-haven)](https://pypi.org/project/data-safe-haven/) +[![Latest version](https://img.shields.io/github/v/release/alan-turing-institute/data-safe-haven?style=flat&label=Latest&color=%234B78E6)](https://github.com/alan-turing-institute/data-safe-haven/releases) [![Documentation](https://readthedocs.org/projects/data-safe-haven/badge/?version=latest)](https://data-safe-haven.readthedocs.io/en/latest/?badge=latest) [![Lint code](https://github.com/alan-turing-institute/data-safe-haven/actions/workflows/lint_code.yaml/badge.svg)](https://github.com/alan-turing-institute/data-safe-haven/actions/workflows/lint_code.yaml) [![Test code](https://github.com/alan-turing-institute/data-safe-haven/actions/workflows/test_code.yaml/badge.svg)](https://github.com/alan-turing-institute/data-safe-haven/actions/workflows/test_code.yaml) -[![Latest version](https://img.shields.io/github/v/release/alan-turing-institute/data-safe-haven?style=flat&label=Latest&color=%234B78E6)](https://github.com/alan-turing-institute/data-safe-haven/releases) [![Slack](https://img.shields.io/badge/Join%20us!-yellow?style=flat&logo=slack&logoColor=white&labelColor=4A154B&label=Slack)](https://join.slack.com/t/turingdatasafehaven/signup) ![Licence](https://img.shields.io/github/license/alan-turing-institute/data-safe-haven) [![Citation](https://img.shields.io/badge/citation-cite%20this%20project-informational)](https://github.com/alan-turing-institute/data-safe-haven/blob/develop/CITATION.cff) -[![All Contributors](https://img.shields.io/badge/all_contributors-49-orange.svg?style=flat-square)](#contributors-) +[![All Contributors](https://img.shields.io/badge/all_contributors-50-orange.svg?style=flat-square)](#contributors-) -## :family: Community & support +## ๐Ÿง‘โ€๐Ÿง‘โ€๐Ÿง’ Community & support - Visit the [Data Safe Haven website](https://data-safe-haven.readthedocs.io) for full documentation and useful links. -- Join our [Slack server](https://join.slack.com/t/turingdatasafehaven/shared_invite/zt-104oyd8wn-DyOufeaAQFiJDlG5dDGk~w) to ask questions, discuss features, and for general API chat. +- Join our [Slack workspace](https://join.slack.com/t/turingdatasafehaven/shared_invite/zt-104oyd8wn-DyOufeaAQFiJDlG5dDGk~w) to ask questions, discuss features, and for general API chat. - Open a [discussion on GitHub](https://github.com/alan-turing-institute/data-safe-haven/discussions) for general questions, feature suggestions, and help with our deployment scripts. - Look through our [issues on GitHub](https://github.com/alan-turing-institute/data-safe-haven/issues) to see what we're working on and progress towards specific fixes. -- Subscribe to the [Data Safe Haven newsletter](https://tinyletter.com/turingdatasafehaven) for release announcements. +- Send us an [email](mailto:safehavendevs@turing.ac.uk). -## :open_hands: Contributing +## ๐Ÿ‘ Contributing We are keen to transition our implementation from being a [Turing](https://www.turing.ac.uk/) project to being a community owned platform. We have worked together with the community to develop the policy, processes and design decisions for the Data Safe Haven. @@ -99,10 +101,13 @@ See our [Code of Conduct](CODE_OF_CONDUCT.md) and our [Contributor Guide](CONTRI harisood
harisood

๐Ÿ“– ๐Ÿ› ๐Ÿค” ๐Ÿ” ๐Ÿ“‹ ๐Ÿ“† ๐Ÿ“ฃ ๐Ÿ’ฌ ๐Ÿ“ข ๐Ÿ›ก๏ธ ๐Ÿ““ kevinxufs
kevinxufs

๐Ÿ“– ๐Ÿค” ๐Ÿ›ก๏ธ + mattwestby
mattwestby

๐Ÿ› miguelmorin
miguelmorin

๐Ÿ’ป ๐Ÿ“– ๐Ÿค” โš ๏ธ oforrest
oforrest

๐Ÿ“– ๐Ÿค” ๐Ÿ“† ๐Ÿ“ฃ ๐Ÿ–‹ rwinstanley1
rwinstanley1

๐Ÿ“– ๐Ÿค” ๐Ÿ“† ๐Ÿ›ก๏ธ vollmersj
vollmersj

๐Ÿ“– ๐Ÿ› ๐Ÿค” ๐Ÿ–‹ + + warwick26
warwick26

๐Ÿ’ป ๐Ÿค” @@ -113,7 +118,7 @@ See our [Code of Conduct](CODE_OF_CONDUCT.md) and our [Contributor Guide](CONTRI -## :cake: Releases +## ๐Ÿฐ Releases If you're new to the project, why not check out our [latest release](https://github.com/alan-turing-institute/data-safe-haven/releases/latest)? @@ -124,12 +129,12 @@ Read our [versioning scheme](VERSIONING.md) for how we number and label releases When making a new release, open an issue on GitHub and choose the `Release checklist` template, which can be used to track the completion of security checks for the release. -## :mailbox_with_mail: Vulnerability disclosure +## ๐Ÿ“ฌ Vulnerability disclosure We value those who take the time and effort to report security vulnerabilities. If you believe you have found a security vulnerability, please report it as outlined in our [Security and vulnerability disclosure policy](SECURITY.md). -## :bow: Acknowledgements +## ๐Ÿ™‡ Acknowledgements We are grateful for the following support for this project: @@ -137,7 +142,7 @@ We are grateful for the following support for this project: - The UKRI Strategic Priorities Fund - AI for Science, Engineering, Health and Government programme ([EP/T001569/1](https://gow.epsrc.ukri.org/NGBOViewGrant.aspx?GrantRef=EP/T001569/1)), particularly the "Tools, Practices and Systems" theme within that grant. - Microsoft's generous [donation of Azure credits](https://www.microsoft.com/en-us/research/blog/microsoft-accelerates-data-science-at-the-alan-turing-institute-with-5m-in-cloud-computing-credits/) to the Alan Turing Institute. -## :warning: Disclaimer +## โš ๏ธ Disclaimer The Alan Turing Institute and its group companies ("we", "us", the "Turing") make no representations, warranties, or guarantees, express or implied, regarding the information contained in this repository, including but not limited to information about the use or deployment of the Data Safe Haven and/or related materials. We expressly exclude any implied warranties or representations whatsoever including without limitation regarding the use of the Data Safe Haven and related materials for any particular purpose. diff --git a/SECURITY.md b/SECURITY.md index c045852320..db056c976b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -7,8 +7,8 @@ All organisations using an earlier version in production should update to the la | Version | Supported | | --------------------------------------------------------------------------------------- | ------------------ | -| [5.0.0](https://github.com/alan-turing-institute/data-safe-haven/releases/tag/v5.0.0) | :white_check_mark: | -| < 5.0.0 | :x: | +| [5.0.1](https://github.com/alan-turing-institute/data-safe-haven/releases/tag/v5.0.1) | :white_check_mark: | +| < 5.0.1 | :x: | ## Reporting a Vulnerability diff --git a/data_safe_haven/commands/config.py b/data_safe_haven/commands/config.py index 218c735aa9..a774868516 100644 --- a/data_safe_haven/commands/config.py +++ b/data_safe_haven/commands/config.py @@ -1,21 +1,29 @@ """Command group and entrypoints for managing DSH configuration""" +from logging import Logger from pathlib import Path from typing import Annotated, Optional import typer from data_safe_haven import console -from data_safe_haven.config import ContextManager, DSHPulumiConfig, SHMConfig, SREConfig +from data_safe_haven.config import ( + ContextManager, + DSHPulumiConfig, + SHMConfig, + SREConfig, + sre_config_name, +) from data_safe_haven.exceptions import ( + DataSafeHavenAzureError, DataSafeHavenAzureStorageError, DataSafeHavenConfigError, DataSafeHavenError, - DataSafeHavenPulumiError, + DataSafeHavenTypeError, ) from data_safe_haven.external.api.azure_sdk import AzureSdk -from data_safe_haven.infrastructure import SREProjectManager from data_safe_haven.logging import get_logger +from data_safe_haven.serialisers import ContextBase config_command_group = typer.Typer() @@ -38,6 +46,7 @@ def show_shm( "or `dsh context switch` to select one." ) raise typer.Exit(1) from exc + try: config = SHMConfig.from_remote(context) except DataSafeHavenError as exc: @@ -45,7 +54,9 @@ def show_shm( "SHM must be deployed before its configuration can be displayed." ) raise typer.Exit(1) from exc + config_yaml = config.to_yaml() + if file: with open(file, "w") as outfile: outfile.write(config_yaml) @@ -66,7 +77,9 @@ def available() -> None: "or `dsh context switch` to select one." ) raise typer.Exit(1) from exc + azure_sdk = AzureSdk(context.subscription_name) + try: blobs = azure_sdk.list_blobs( container_name=context.storage_container_name, @@ -77,32 +90,17 @@ def available() -> None: except DataSafeHavenAzureStorageError as exc: logger.critical("Ensure SHM is deployed before attempting to use SRE configs.") raise typer.Exit(1) from exc + if not blobs: logger.info(f"No configurations found for context '{context.name}'.") raise typer.Exit(0) + + config_names = [blob.removeprefix("sre-").removesuffix(".yaml") for blob in blobs] pulumi_config = DSHPulumiConfig.from_remote(context) - sre_status = {} - for blob in blobs: - sre_config = SREConfig.from_remote_by_name( - context, blob.removeprefix("sre-").removesuffix(".yaml") - ) - stack = SREProjectManager( - context=context, - config=sre_config, - pulumi_config=pulumi_config, - create_project=True, - ) - try: - sre_status[sre_config.name] = ( - "No output values" not in stack.run_pulumi_command("stack output") - ) - except DataSafeHavenPulumiError as exc: - logger.error( - f"Failed to run Pulumi command querying stack outputs for SRE '{sre_config.name}'." - ) - raise typer.Exit(1) from exc + deployed = pulumi_config.project_names + headers = ["SRE Name", "Deployed"] - rows = [[name, "x" if deployed else ""] for name, deployed in sre_status.items()] + rows = [[name, "x" if name in deployed else ""] for name in config_names] console.print(f"Available SRE configurations for context '{context.name}':") console.tabulate(headers, rows) @@ -117,6 +115,7 @@ def show( ) -> None: """Print the SRE configuration for the selected SRE and Data Safe Haven context""" logger = get_logger() + try: context = ContextManager.from_file().assert_context() except DataSafeHavenConfigError as exc: @@ -125,17 +124,23 @@ def show( "or `dsh context switch` to select one." ) raise typer.Exit(1) from exc + try: sre_config = SREConfig.from_remote_by_name(context, name) except DataSafeHavenAzureStorageError as exc: logger.critical("Ensure SHM is deployed before attempting to use SRE configs.") raise typer.Exit(1) from exc - except DataSafeHavenError as exc: + except DataSafeHavenAzureError as exc: logger.critical( f"No configuration exists for an SRE named '{name}' for the selected context." ) raise typer.Exit(1) from exc + except DataSafeHavenTypeError as exc: + dump_remote_config(context, name, logger) + raise typer.Exit(1) from exc + config_yaml = sre_config.to_yaml() + if file: with open(file, "w") as outfile: outfile.write(config_yaml) @@ -160,6 +165,7 @@ def template( # Serialisation warnings are therefore suppressed to avoid misleading the users into # thinking there is a problem and contaminating the output. config_yaml = sre_config.to_yaml(warnings=False) + if file: with open(file, "w") as outfile: outfile.write(config_yaml) @@ -169,7 +175,13 @@ def template( @config_command_group.command() def upload( - file: Annotated[Path, typer.Argument(help="Path to configuration file")], + file: Annotated[Path, typer.Argument(help="Path to configuration file.")], + force: Annotated[ # noqa: FBT002 + bool, + typer.Option( + help="Skip validation and difference calculation of remote configuration." + ), + ] = False, ) -> None: """Upload an SRE configuration to the Data Safe Haven context""" context = ContextManager.from_file().assert_context() @@ -182,27 +194,52 @@ def upload( else: logger.critical(f"Configuration file '{file}' not found.") raise typer.Exit(1) - config = SREConfig.from_yaml(config_yaml) + try: + config = SREConfig.from_yaml(config_yaml) + except DataSafeHavenTypeError as exc: + logger.error("Check for missing or incorrect fields in the configuration.") + raise typer.Exit(1) from exc # Present diff to user - if SREConfig.remote_exists(context, filename=config.filename): - if diff := config.remote_yaml_diff(context, filename=config.filename): - for line in "".join(diff).splitlines(): - logger.info(line) - if not console.confirm( - ( - "Configuration has changed, " - "do you want to overwrite the remote configuration?" - ), - default_to_yes=False, - ): + if (not force) and SREConfig.remote_exists(context, filename=config.filename): + try: + if diff := config.remote_yaml_diff(context, filename=config.filename): + for line in "".join(diff).splitlines(): + logger.info(line) + if not console.confirm( + ( + "Configuration has changed, " + "do you want to overwrite the remote configuration?" + ), + default_to_yes=False, + ): + raise typer.Exit() + else: + console.print("No changes, won't upload configuration.") raise typer.Exit() - else: - console.print("No changes, won't upload configuration.") - raise typer.Exit() + except DataSafeHavenTypeError as exc: + dump_remote_config(context, config.name, logger) + console.print( + "To overwrite the remote config, use `dsh config upload --force`" + ) + raise typer.Exit(1) from exc try: config.upload(context, filename=config.filename) except DataSafeHavenError as exc: logger.critical("No infrastructure found for the selected context.") raise typer.Exit(1) from exc + + +def dump_remote_config(context: ContextBase, name: str, logger: Logger) -> None: + logger.warning( + f"Remote configuration for SRE '{name}' is not valid. Dumping remote file." + ) + azure_sdk = AzureSdk(subscription_name=context.subscription_name) + config_yaml = azure_sdk.download_blob( + sre_config_name(name), + context.resource_group_name, + context.storage_account_name, + context.storage_container_name, + ) + console.print(config_yaml) diff --git a/data_safe_haven/commands/shm.py b/data_safe_haven/commands/shm.py index b6694c7daa..522609b8ff 100644 --- a/data_safe_haven/commands/shm.py +++ b/data_safe_haven/commands/shm.py @@ -41,7 +41,7 @@ def deploy( ), ] = None, ) -> None: - """Deploy a Safe Haven Management environment.""" + """Deploy a Safe Haven Management environment using the current context.""" logger = get_logger() # Load selected context @@ -126,7 +126,7 @@ def deploy( @shm_command_group.command() def teardown() -> None: - """Tear down a deployed a Safe Haven Management environment.""" + """Tear down a deployed Safe Haven Management environment.""" logger = get_logger() try: context = ContextManager.from_file().assert_context() diff --git a/data_safe_haven/commands/sre.py b/data_safe_haven/commands/sre.py index de0d96f6b2..f03f0cc53e 100644 --- a/data_safe_haven/commands/sre.py +++ b/data_safe_haven/commands/sre.py @@ -6,7 +6,7 @@ from data_safe_haven.config import ContextManager, DSHPulumiConfig, SHMConfig, SREConfig from data_safe_haven.exceptions import DataSafeHavenConfigError, DataSafeHavenError -from data_safe_haven.external import GraphApi +from data_safe_haven.external import AzureSdk, GraphApi from data_safe_haven.functions import current_ip_address, ip_address_in_list from data_safe_haven.infrastructure import SREProjectManager from data_safe_haven.logging import get_logger @@ -80,6 +80,35 @@ def deploy( stack.add_option( "azure-native:tenantId", sre_config.azure.tenant_id, replace=False ) + # Get SRE subscription name + azure_sdk = AzureSdk(subscription_name=context.subscription_name) + sre_subscription_name = azure_sdk.get_subscription_name( + sre_config.azure.subscription_id + ) + stack.add_option( + "sre-subscription-name", + sre_subscription_name, + replace=True, + ) + logger.info( + f"SRE will be deployed to subscription '[green]{sre_subscription_name}[/]'" + f" ({sre_config.azure.subscription_id})" + ) + # Set Entra options + application = graph_api.get_application_by_name(context.entra_application_name) + if not application: + msg = f"No Entra application '{context.entra_application_name}' was found. Please redeploy your SHM." + raise DataSafeHavenConfigError(msg) + stack.add_option("azuread:clientId", application.get("appId", ""), replace=True) + if not context.entra_application_secret: + msg = f"No Entra application secret '{context.entra_application_secret_name}' was found. Please redeploy your SHM." + raise DataSafeHavenConfigError(msg) + stack.add_secret( + "azuread:clientSecret", context.entra_application_secret, replace=True + ) + stack.add_option( + "azuread:tenantId", shm_config.shm.entra_tenant_id, replace=True + ) # Load SHM outputs stack.add_option( "shm-admin-group-id", @@ -96,6 +125,24 @@ def deploy( shm_config.shm.fqdn, replace=True, ) + stack.add_option( + "shm-location", + shm_config.azure.location, + replace=True, + ) + stack.add_option( + "shm-subscription-id", + shm_config.azure.subscription_id, + replace=True, + ) + logger.info(f"SRE will be registered in SHM '[green]{shm_config.shm.fqdn}[/]'") + shm_subscription_name = azure_sdk.get_subscription_name( + shm_config.azure.subscription_id + ) + logger.info( + f"SHM is deployed to subscription '[green]{shm_subscription_name}[/]'" + f" ({shm_config.azure.subscription_id})" + ) # Deploy Azure infrastructure with Pulumi try: @@ -110,7 +157,7 @@ def deploy( location=sre_config.azure.location, sre_name=sre_config.name, sre_stack=stack, - subscription_name=context.subscription_name, + subscription_name=sre_subscription_name, timezone=sre_config.sre.timezone, ) manager.run() diff --git a/data_safe_haven/commands/users.py b/data_safe_haven/commands/users.py index e250bc9fd5..fe413fa781 100644 --- a/data_safe_haven/commands/users.py +++ b/data_safe_haven/commands/users.py @@ -83,6 +83,10 @@ def list_users( # Load Pulumi config pulumi_config = DSHPulumiConfig.from_remote(context) + if sre not in pulumi_config.project_names: + msg = f"Could not load Pulumi settings for '{sre}'. Is the SRE deployed?" + logger.error(msg) + raise typer.Exit(1) # List users from all sources users = UserHandler(context, graph_api) users.list(sre, pulumi_config) diff --git a/data_safe_haven/config/__init__.py b/data_safe_haven/config/__init__.py index b78acd6ab1..76d3bbfe67 100644 --- a/data_safe_haven/config/__init__.py +++ b/data_safe_haven/config/__init__.py @@ -3,7 +3,7 @@ from .dsh_pulumi_config import DSHPulumiConfig from .dsh_pulumi_project import DSHPulumiProject from .shm_config import SHMConfig -from .sre_config import SREConfig +from .sre_config import SREConfig, sre_config_name __all__ = [ "Context", @@ -12,4 +12,5 @@ "DSHPulumiProject", "SHMConfig", "SREConfig", + "sre_config_name", ] diff --git a/data_safe_haven/config/config_sections.py b/data_safe_haven/config/config_sections.py index 252c94e7d0..35b9570a7e 100644 --- a/data_safe_haven/config/config_sections.py +++ b/data_safe_haven/config/config_sections.py @@ -10,6 +10,7 @@ from data_safe_haven.types import ( AzureLocation, AzurePremiumFileShareSize, + AzureServiceTag, AzureVmSku, DatabaseSystem, EmailAddress, @@ -58,7 +59,7 @@ class ConfigSectionSRE(BaseModel, validate_assignment=True): databases: UniqueList[DatabaseSystem] = [] data_provider_ip_addresses: list[IpAddress] = [] remote_desktop: ConfigSubsectionRemoteDesktopOpts - research_user_ip_addresses: list[IpAddress] = [] + research_user_ip_addresses: list[IpAddress] | AzureServiceTag = [] storage_quota_gb: ConfigSubsectionStorageQuotaGB software_packages: SoftwarePackageCategory = SoftwarePackageCategory.NONE timezone: TimeZone = "Etc/UTC" @@ -67,7 +68,7 @@ class ConfigSectionSRE(BaseModel, validate_assignment=True): @field_validator( "admin_ip_addresses", "data_provider_ip_addresses", - "research_user_ip_addresses", + # "research_user_ip_addresses", mode="after", ) @classmethod @@ -78,3 +79,16 @@ def ensure_non_overlapping(cls, v: list[IpAddress]) -> list[IpAddress]: msg = "IP addresses must not overlap." raise ValueError(msg) return v + + @field_validator( + "research_user_ip_addresses", + mode="after", + ) + @classmethod + def ensure_non_overlapping_or_tag( + cls, v: list[IpAddress] | AzureServiceTag + ) -> list[IpAddress] | AzureServiceTag: + if isinstance(v, list): + return cls.ensure_non_overlapping(v) + else: + return v diff --git a/data_safe_haven/config/context.py b/data_safe_haven/config/context.py index 426795bf93..9d648c6bc8 100644 --- a/data_safe_haven/config/context.py +++ b/data_safe_haven/config/context.py @@ -9,6 +9,7 @@ from data_safe_haven import __version__ from data_safe_haven.directories import config_dir +from data_safe_haven.exceptions import DataSafeHavenAzureError from data_safe_haven.external import AzureSdk from data_safe_haven.functions import alphanumeric from data_safe_haven.serialisers import ContextBase @@ -16,39 +17,48 @@ class Context(ContextBase, BaseModel, validate_assignment=True): + """Context for a Data Safe Haven deployment.""" + + entra_application_kvsecret_name: ClassVar[str] = "pulumi-deployment-secret" + entra_application_secret_name: ClassVar[str] = "Pulumi Deployment Secret" + pulumi_encryption_key_name: ClassVar[str] = "pulumi-encryption-key" + pulumi_storage_container_name: ClassVar[str] = "pulumi" + storage_container_name: ClassVar[str] = "config" + admin_group_name: EntraGroupName description: str name: SafeString subscription_name: AzureSubscriptionName - storage_container_name: ClassVar[str] = "config" - pulumi_storage_container_name: ClassVar[str] = "pulumi" - pulumi_encryption_key_name: ClassVar[str] = "pulumi-encryption-key" _pulumi_encryption_key = None + _entra_application_secret = None @property - def tags(self) -> dict[str, str]: - return { - "description": self.description, - "project": "Data Safe Haven", - "shm_name": self.name, - "version": __version__, - } + def entra_application_name(self) -> str: + return f"Data Safe Haven ({self.name}) Pulumi Service Principal" @property - def work_directory(self) -> Path: - return config_dir() / self.name - - @property - def resource_group_name(self) -> str: - return f"shm-{self.name}-rg" - - @property - def storage_account_name(self) -> str: - # https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name - # Storage account names must be between 3 and 24 characters in length and may - # contain numbers and lowercase letters only. - return f"shm{alphanumeric(self.name)[:21]}" + def entra_application_secret(self) -> str: + if not self._entra_application_secret: + azure_sdk = AzureSdk(subscription_name=self.subscription_name) + try: + application_secret = azure_sdk.get_keyvault_secret( + secret_name=self.entra_application_kvsecret_name, + key_vault_name=self.key_vault_name, + ) + self._entra_application_secret = application_secret + except DataSafeHavenAzureError: + return "" + return self._entra_application_secret + + @entra_application_secret.setter + def entra_application_secret(self, application_secret: str) -> None: + azure_sdk = AzureSdk(subscription_name=self.subscription_name) + azure_sdk.set_keyvault_secret( + secret_name=self.entra_application_kvsecret_name, + secret_value=application_secret, + key_vault_name=self.key_vault_name, + ) @property def key_vault_name(self) -> str: @@ -83,5 +93,29 @@ def pulumi_encryption_key_version(self) -> str: def pulumi_secrets_provider_url(self) -> str: return f"azurekeyvault://{self.key_vault_name}.vault.azure.net/keys/{self.pulumi_encryption_key_name}/{self.pulumi_encryption_key_version}" + @property + def resource_group_name(self) -> str: + return f"shm-{self.name}-rg" + + @property + def storage_account_name(self) -> str: + # https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#storage-account-name + # Storage account names must be between 3 and 24 characters in length and may + # contain numbers and lowercase letters only. + return f"shm{alphanumeric(self.name)[:21]}" + + @property + def tags(self) -> dict[str, str]: + return { + "description": self.description, + "project": "Data Safe Haven", + "shm_name": self.name, + "version": __version__, + } + + @property + def work_directory(self) -> Path: + return config_dir() / self.name + def to_yaml(self) -> str: return yaml.dump(self.model_dump(), indent=2) diff --git a/data_safe_haven/config/dsh_pulumi_config.py b/data_safe_haven/config/dsh_pulumi_config.py index 63581d31dd..1aae87996c 100644 --- a/data_safe_haven/config/dsh_pulumi_config.py +++ b/data_safe_haven/config/dsh_pulumi_config.py @@ -12,6 +12,7 @@ class DSHPulumiConfig(AzureSerialisableModel): config_type: ClassVar[str] = "Pulumi" default_filename: ClassVar[str] = "pulumi.yaml" + encrypted_key: str | None projects: dict[str, DSHPulumiProject] diff --git a/data_safe_haven/config/shm_config.py b/data_safe_haven/config/shm_config.py index 2f5ef85f01..a32d06ed57 100644 --- a/data_safe_haven/config/shm_config.py +++ b/data_safe_haven/config/shm_config.py @@ -4,6 +4,7 @@ from typing import ClassVar, Self +from data_safe_haven.exceptions import DataSafeHavenMicrosoftGraphError from data_safe_haven.external import AzureSdk from data_safe_haven.serialisers import AzureSerialisableModel, ContextBase @@ -11,8 +12,11 @@ class SHMConfig(AzureSerialisableModel): + """Serialisable config for a Data Safe Haven management component.""" + config_type: ClassVar[str] = "SHMConfig" default_filename: ClassVar[str] = "shm.yaml" + azure: ConfigSectionAzure shm: ConfigSectionSHM @@ -27,10 +31,13 @@ def from_args( ) -> SHMConfig: """Construct an SHMConfig from arguments.""" azure_sdk = AzureSdk(subscription_name=context.subscription_name) - admin_group_id = ( - azure_sdk.entra_directory.get_id_from_groupname(context.admin_group_name) - or "admin-group-id-not-found" - ) + try: + admin_group_id = azure_sdk.entra_directory.validate_entra_group( + context.admin_group_name + ) + except DataSafeHavenMicrosoftGraphError as exc: + msg = f"Admin group '{context.admin_group_name}' not found. Check the group name." + raise DataSafeHavenMicrosoftGraphError(msg) from exc return SHMConfig.model_construct( azure=ConfigSectionAzure.model_construct( location=location, diff --git a/data_safe_haven/config/sre_config.py b/data_safe_haven/config/sre_config.py index 5a5d6367e1..9fba89e12f 100644 --- a/data_safe_haven/config/sre_config.py +++ b/data_safe_haven/config/sre_config.py @@ -23,8 +23,11 @@ def sre_config_name(sre_name: str) -> str: class SREConfig(AzureSerialisableModel): + """Serialisable config for a secure research environment component.""" + config_type: ClassVar[str] = "SREConfig" default_filename: ClassVar[str] = "sre.yaml" + azure: ConfigSectionAzure description: str dockerhub: ConfigSectionDockerHub @@ -95,7 +98,10 @@ def template(cls: type[Self], tier: int | None = None) -> SREConfig: allow_copy=remote_desktop_allow_copy, allow_paste=remote_desktop_allow_paste, ), - research_user_ip_addresses=["List of IP addresses belonging to users"], + research_user_ip_addresses=[ + "List of IP addresses belonging to users", + "You can also use the tag 'Internet' instead of a list", + ], software_packages=software_packages, storage_quota_gb=ConfigSubsectionStorageQuotaGB.model_construct( home="Total size in GiB across all home directories [minimum: 100].", # type: ignore diff --git a/data_safe_haven/external/api/azure_sdk.py b/data_safe_haven/external/api/azure_sdk.py index 1dce416320..1792988348 100644 --- a/data_safe_haven/external/api/azure_sdk.py +++ b/data_safe_haven/external/api/azure_sdk.py @@ -14,7 +14,7 @@ ) from azure.keyvault.certificates import CertificateClient, KeyVaultCertificate from azure.keyvault.keys import KeyClient, KeyVaultKey -from azure.keyvault.secrets import SecretClient +from azure.keyvault.secrets import KeyVaultSecret, SecretClient from azure.mgmt.compute.v2021_07_01 import ComputeManagementClient from azure.mgmt.compute.v2021_07_01.models import ( ResourceSkuCapabilities, @@ -437,6 +437,10 @@ def ensure_keyvault( return key_vaults[0] except AzureError as exc: msg = f"Failed to create key vault {key_vault_name}." + if "MissingSubscriptionRegistration" in exc.message: + msg += " Subscription is not registered to use the key vault resource provider. See https://learn.microsoft.com/en-us/azure/azure-resource-manager/troubleshooting/error-register-resource-provider" + else: + msg += " Check if a key vault with the same name already exists in a deleted state." raise DataSafeHavenAzureError(msg) from exc def ensure_keyvault_key( @@ -447,7 +451,7 @@ def ensure_keyvault_key( """Ensure that a key exists in the KeyVault Returns: - str: The key ID + KeyVaultKey: The key Raises: DataSafeHavenAzureError if the existence of the key could not be verified @@ -472,7 +476,7 @@ def ensure_keyvault_key( ) return key except AzureError as exc: - msg = f"Failed to create key {key_name}." + msg = f"Failed to create key '{key_name}' in KeyVault '{key_vault_name}'." raise DataSafeHavenAzureError(msg) from exc def ensure_managed_identity( @@ -689,7 +693,7 @@ def get_keyvault_secret(self, key_vault_name: str, secret_name: str) -> str: credential=self.credential(AzureSdkCredentialScope.KEY_VAULT), vault_url=f"https://{key_vault_name}.vault.azure.net", ) - # Ensure that secret exists + # Get secret if it exists try: secret = secret_client.get_secret(secret_name) if secret.value: @@ -774,6 +778,21 @@ def get_subscription(self, subscription_name: str) -> Subscription: msg = f"Could not find subscription '{subscription_name}'" raise DataSafeHavenValueError(msg) + def get_subscription_name(self, subscription_id: str) -> str: + """Get an Azure subscription name by id.""" + try: + subscription_client = SubscriptionClient(self.credential()) + subscription = subscription_client.subscriptions.get(subscription_id) + except ClientAuthenticationError as exc: + msg = "Failed to authenticate with Azure API." + raise DataSafeHavenAzureAPIAuthenticationError(msg) from exc + except AzureError as exc: + msg = f"Failed to get name of subscription {subscription_id}." + raise DataSafeHavenAzureError(msg) from exc + + subscription_name: str = subscription.display_name + return subscription_name + def import_keyvault_certificate( self, certificate_name: str, @@ -1283,6 +1302,38 @@ def set_blob_container_acl( msg = f"Failed to set ACL '{desired_acl}' on container '{container_name}'." raise DataSafeHavenAzureError(msg) from exc + def set_keyvault_secret( + self, + secret_name: str, + secret_value: str, + key_vault_name: str, + ) -> KeyVaultSecret: + """Ensure that a secret exists in the KeyVault + + Returns: + KeyVaultSecret: The secret + + Raises: + DataSafeHavenAzureError if the secret could not be set + """ + try: + # Connect to Azure clients + secret_client = SecretClient( + credential=self.credential(AzureSdkCredentialScope.KEY_VAULT), + vault_url=f"https://{key_vault_name}.vault.azure.net", + ) + + # Set secret to given value + self.logger.debug(f"Setting secret [green]{secret_name}[/]...") + secret = secret_client.set_secret(secret_name, secret_value) + self.logger.info(f"Set secret [green]{secret_name}[/].") + return secret + except AzureError as exc: + msg = ( + f"Failed to set secret '{secret_name}' in KeyVault '{key_vault_name}'." + ) + raise DataSafeHavenAzureError(msg) from exc + def storage_exists( self, storage_account_name: str, diff --git a/data_safe_haven/external/api/graph_api.py b/data_safe_haven/external/api/graph_api.py index ce87648fa9..66cf139a65 100644 --- a/data_safe_haven/external/api/graph_api.py +++ b/data_safe_haven/external/api/graph_api.py @@ -16,7 +16,6 @@ DataSafeHavenMicrosoftGraphError, DataSafeHavenValueError, ) -from data_safe_haven.functions import alphanumeric from data_safe_haven.logging import get_logger, get_null_logger from .credentials import DeferredCredential, GraphApiCredential @@ -140,7 +139,7 @@ def add_user_to_group( """ try: user_id = self.get_id_from_username(username) - group_id = self.get_id_from_groupname(group_name) + group_id = self.validate_entra_group(group_name) json_response = self.http_get( f"{self.base_endpoint}/groups/{group_id}/members", ).json() @@ -314,40 +313,6 @@ def create_application_secret( msg = f"Could not create application secret '{application_secret_name}'." raise DataSafeHavenMicrosoftGraphError(msg) from exc - def create_group(self, group_name: str) -> None: - """Create an Entra group if it does not already exist - - Raises: - DataSafeHavenMicrosoftGraphError if the group could not be created - """ - try: - if self.get_id_from_groupname(group_name): - self.logger.info( - f"Found existing Entra group '[green]{group_name}[/]'.", - ) - return - self.logger.debug( - f"Creating Entra group '[green]{group_name}[/]'...", - ) - request_json = { - "description": group_name, - "displayName": group_name, - "groupTypes": [], - "mailEnabled": False, - "mailNickname": alphanumeric(group_name).lower(), - "securityEnabled": True, - } - self.http_post( - f"{self.base_endpoint}/groups", - json=request_json, - ).json() - self.logger.info( - f"Created Entra group '[green]{group_name}[/]'.", - ) - except Exception as exc: - msg = f"Could not create Entra group '{group_name}'." - raise DataSafeHavenMicrosoftGraphError(msg) from exc - def ensure_application_service_principal( self, application_name: str ) -> dict[str, Any]: @@ -515,6 +480,19 @@ def get_service_principal_by_name( except (DataSafeHavenMicrosoftGraphError, StopIteration): return None + def validate_entra_group(self, group_name: str) -> str: + """ + Ensure that an Entra group exists and return its ID + + Raises: + DataSafeHavenMicrosoftGraphError if the group does not exist + """ + if group_id := self.get_id_from_groupname(group_name): + return group_id + else: + msg = f"Group '{group_name}' not found." + raise DataSafeHavenMicrosoftGraphError(msg) + def get_id_from_groupname(self, group_name: str) -> str | None: try: return str( @@ -1015,7 +993,7 @@ def remove_user_from_group( """ try: user_id = self.get_id_from_username(username) - group_id = self.get_id_from_groupname(group_name) + group_id = self.validate_entra_group(group_name) # Check whether user is in group json_response = self.http_get( f"{self.base_endpoint}/groups/{group_id}/members", @@ -1047,17 +1025,17 @@ def verify_custom_domain( DataSafeHavenMicrosoftGraphError if domain could not be verified """ try: - # Create the Entra custom domain if it does not already exist + # Check whether the domain has been added to Entra ID domains = self.read_domains() if not any(d["id"] == domain_name for d in domains): msg = f"Domain {domain_name} has not been added to Entra ID." raise DataSafeHavenMicrosoftGraphError(msg) - # Wait until domain delegation is complete + # Loop until domain delegation is complete while True: # Check whether all expected nameservers are active with suppress(resolver.NXDOMAIN): self.logger.debug( - f"Checking [green]{domain_name}[/] domain verification status ..." + f"Checking [green]{domain_name}[/] DNS delegation." ) active_nameservers = [ str(ns) for ns in iter(resolver.resolve(domain_name, "NS")) @@ -1067,28 +1045,38 @@ def verify_custom_domain( for nameserver in expected_nameservers ): self.logger.info( - f"Verified that domain [green]{domain_name}[/] is delegated to Azure." + f"[green]{domain_name}[/] DNS has been delegated correctly." ) break self.logger.warning( - f"Domain [green]{domain_name}[/] is not currently delegated to Azure." + f"[green]{domain_name}[/] DNS is not delegated correctly." ) # Prompt user to set domain delegation manually - docs_link = "https://learn.microsoft.com/en-us/azure/dns/dns-delegate-domain-azure-dns#delegate-the-domain" self.logger.info( - f"To proceed you will need to delegate [green]{domain_name}[/] to Azure ({docs_link})" + f"To proceed you will need to delegate [green]{domain_name}[/] to specific Azure nameservers" ) - ns_list = ", ".join([f"[green]{n}[/]" for n in expected_nameservers]) + domain_parent = ".".join(domain_name.split(".")[1:]) self.logger.info( - f"You will need to create an NS record pointing to: {ns_list}" + f"Create {len(expected_nameservers)} [green]NS[/] records for [green]{domain_name}[/] (for example in the zone of {domain_parent})" + ) + console.tabulate( + header=["domain", "record type", "value"], + rows=[ + [domain_name, "NS", nameserver] + for nameserver in expected_nameservers + ], + ) + docs_link = ( + "https://www.cloudflare.com/learning/dns/dns-records/dns-ns-record/" + ) + self.logger.info( + f"You can learn more about NS records here: {docs_link}" ) if not console.confirm( - f"Are you ready to check whether [green]{domain_name}[/] has been delegated to Azure?", + f"Are you ready to check whether [green]{domain_name}[/] has been delegated to the correct Azure nameservers?", default_to_yes=True, ): - self.logger.error( - "Please use `az login` to connect to the correct Azure CLI account" - ) + self.logger.error("User terminated check for domain delegation.") raise typer.Exit(1) # Send verification request if needed if not any((d["id"] == domain_name and d["isVerified"]) for d in domains): diff --git a/data_safe_haven/external/interface/pulumi_account.py b/data_safe_haven/external/interface/pulumi_account.py index 4db66c52c8..0a240749f4 100644 --- a/data_safe_haven/external/interface/pulumi_account.py +++ b/data_safe_haven/external/interface/pulumi_account.py @@ -1,5 +1,6 @@ """Manage Pulumi accounts""" +import sys from shutil import which from typing import Any @@ -39,5 +40,6 @@ def env(self) -> dict[str, Any]: "AZURE_STORAGE_ACCOUNT": self.storage_account_name, "AZURE_STORAGE_KEY": str(storage_account_keys[0].value), "AZURE_KEYVAULT_AUTH_VIA_CLI": "true", + "PULUMI_PYTHON_CMD": sys.executable, } return self._env diff --git a/data_safe_haven/infrastructure/common/ip_ranges.py b/data_safe_haven/infrastructure/common/ip_ranges.py index f0613e577a..aa201f3878 100644 --- a/data_safe_haven/infrastructure/common/ip_ranges.py +++ b/data_safe_haven/infrastructure/common/ip_ranges.py @@ -14,8 +14,8 @@ class SREIpRanges: apt_proxy_server = vnet.next_subnet(8) clamav_mirror = vnet.next_subnet(8) data_configuration = vnet.next_subnet(8) - data_desired_state = vnet.next_subnet(8) data_private = vnet.next_subnet(8) + desired_state = vnet.next_subnet(8) firewall = vnet.next_subnet(64) # 64 address minimum firewall_management = vnet.next_subnet(64) # 64 address minimum guacamole_containers = vnet.next_subnet(8) diff --git a/data_safe_haven/infrastructure/components/__init__.py b/data_safe_haven/infrastructure/components/__init__.py index cc6bcb15a4..7531491bf8 100644 --- a/data_safe_haven/infrastructure/components/__init__.py +++ b/data_safe_haven/infrastructure/components/__init__.py @@ -4,6 +4,8 @@ LocalDnsRecordProps, MicrosoftSQLDatabaseComponent, MicrosoftSQLDatabaseProps, + NFSV3BlobContainerComponent, + NFSV3BlobContainerProps, PostgresqlDatabaseComponent, PostgresqlDatabaseProps, VMComponent, @@ -20,6 +22,7 @@ ) from .wrapped import ( WrappedLogAnalyticsWorkspace, + WrappedNFSV3StorageAccount, ) __all__ = [ @@ -34,6 +37,9 @@ "LocalDnsRecordProps", "MicrosoftSQLDatabaseComponent", "MicrosoftSQLDatabaseProps", + "NFSV3BlobContainerComponent", + "NFSV3BlobContainerProps", + "WrappedNFSV3StorageAccount", "PostgresqlDatabaseComponent", "PostgresqlDatabaseProps", "SSLCertificate", diff --git a/data_safe_haven/infrastructure/components/composite/__init__.py b/data_safe_haven/infrastructure/components/composite/__init__.py index f111bab028..e4254a50ed 100644 --- a/data_safe_haven/infrastructure/components/composite/__init__.py +++ b/data_safe_haven/infrastructure/components/composite/__init__.py @@ -3,6 +3,7 @@ MicrosoftSQLDatabaseComponent, MicrosoftSQLDatabaseProps, ) +from .nfsv3_blob_container import NFSV3BlobContainerComponent, NFSV3BlobContainerProps from .postgresql_database import PostgresqlDatabaseComponent, PostgresqlDatabaseProps from .virtual_machine import LinuxVMComponentProps, VMComponent @@ -12,6 +13,8 @@ "LocalDnsRecordProps", "MicrosoftSQLDatabaseComponent", "MicrosoftSQLDatabaseProps", + "NFSV3BlobContainerComponent", + "NFSV3BlobContainerProps", "PostgresqlDatabaseComponent", "PostgresqlDatabaseProps", "VMComponent", diff --git a/data_safe_haven/infrastructure/components/composite/nfsv3_blob_container.py b/data_safe_haven/infrastructure/components/composite/nfsv3_blob_container.py new file mode 100644 index 0000000000..98564918a0 --- /dev/null +++ b/data_safe_haven/infrastructure/components/composite/nfsv3_blob_container.py @@ -0,0 +1,75 @@ +from pulumi import ComponentResource, Input, ResourceOptions +from pulumi_azure_native import storage + +from data_safe_haven.infrastructure.components.dynamic.blob_container_acl import ( + BlobContainerAcl, + BlobContainerAclProps, +) + + +class NFSV3BlobContainerProps: + def __init__( + self, + acl_user: Input[str], + acl_group: Input[str], + acl_other: Input[str], + apply_default_permissions: Input[bool], + container_name: Input[str], + resource_group_name: Input[str], + storage_account: Input[storage.StorageAccount], + subscription_name: Input[str], + ): + self.acl_user = acl_user + self.acl_group = acl_group + self.acl_other = acl_other + self.apply_default_permissions = apply_default_permissions + self.container_name = container_name + self.resource_group_name = resource_group_name + self.storage_account = storage_account + self.subscription_name = subscription_name + + +class NFSV3BlobContainerComponent(ComponentResource): + def __init__( + self, + name: str, + props: NFSV3BlobContainerProps, + opts: ResourceOptions | None = None, + ): + super().__init__("dsh:common:NFSV3BlobContainerComponent", name, {}, opts) + child_opts = ResourceOptions.merge(opts, ResourceOptions(parent=self)) + + storage_container = storage.BlobContainer( + f"{self._name}_blob_container_{props.container_name}", + account_name=props.storage_account.name, + container_name=props.container_name, + default_encryption_scope="$account-encryption-key", + deny_encryption_scope_override=False, + public_access=storage.PublicAccess.NONE, + resource_group_name=props.resource_group_name, + opts=ResourceOptions.merge( + child_opts, + ResourceOptions(parent=props.storage_account), + ), + ) + BlobContainerAcl( + f"{storage_container._name}_acl", + BlobContainerAclProps( + acl_user=props.acl_user, + acl_group=props.acl_group, + acl_other=props.acl_other, + apply_default_permissions=props.apply_default_permissions, + container_name=storage_container.name, + resource_group_name=props.resource_group_name, + storage_account_name=props.storage_account.name, + subscription_name=props.subscription_name, + ), + opts=ResourceOptions.merge( + child_opts, + ResourceOptions(parent=props.storage_account), + ), + ) + + self.name = storage_container.name + + self.register_outputs({}) diff --git a/data_safe_haven/infrastructure/components/wrapped/__init__.py b/data_safe_haven/infrastructure/components/wrapped/__init__.py index fc5f8c8f61..ef6e7374d2 100644 --- a/data_safe_haven/infrastructure/components/wrapped/__init__.py +++ b/data_safe_haven/infrastructure/components/wrapped/__init__.py @@ -1,5 +1,7 @@ from .log_analytics_workspace import WrappedLogAnalyticsWorkspace +from .nfsv3_storage_account import WrappedNFSV3StorageAccount __all__ = [ + "WrappedNFSV3StorageAccount", "WrappedLogAnalyticsWorkspace", ] diff --git a/data_safe_haven/infrastructure/components/wrapped/nfsv3_storage_account.py b/data_safe_haven/infrastructure/components/wrapped/nfsv3_storage_account.py new file mode 100644 index 0000000000..181839e71d --- /dev/null +++ b/data_safe_haven/infrastructure/components/wrapped/nfsv3_storage_account.py @@ -0,0 +1,68 @@ +from collections.abc import Mapping, Sequence + +from pulumi import Input, Output, ResourceOptions +from pulumi_azure_native import storage + +from data_safe_haven.external import AzureIPv4Range + + +class WrappedNFSV3StorageAccount(storage.StorageAccount): + encryption_args = storage.EncryptionArgs( + key_source=storage.KeySource.MICROSOFT_STORAGE, + services=storage.EncryptionServicesArgs( + blob=storage.EncryptionServiceArgs( + enabled=True, key_type=storage.KeyType.ACCOUNT + ), + file=storage.EncryptionServiceArgs( + enabled=True, key_type=storage.KeyType.ACCOUNT + ), + ), + ) + + def __init__( + self, + resource_name: str, + *, + account_name: Input[str], + allowed_ip_addresses: Input[Sequence[str]], + location: Input[str], + resource_group_name: Input[str], + subnet_id: Input[str], + opts: ResourceOptions, + tags: Input[Mapping[str, Input[str]]], + ): + self.resource_group_name_ = Output.from_input(resource_group_name) + super().__init__( + resource_name, + account_name=account_name, + enable_https_traffic_only=True, + enable_nfs_v3=True, + encryption=self.encryption_args, + is_hns_enabled=True, + kind=storage.Kind.BLOCK_BLOB_STORAGE, + location=location, + minimum_tls_version=storage.MinimumTlsVersion.TLS1_2, + network_rule_set=storage.NetworkRuleSetArgs( + bypass=storage.Bypass.AZURE_SERVICES, + default_action=storage.DefaultAction.DENY, + ip_rules=Output.from_input(allowed_ip_addresses).apply( + lambda ip_ranges: [ + storage.IPRuleArgs( + action=storage.Action.ALLOW, + i_p_address_or_range=str(ip_address), + ) + for ip_range in sorted(ip_ranges) + for ip_address in AzureIPv4Range.from_cidr(ip_range).all_ips() + ] + ), + virtual_network_rules=[ + storage.VirtualNetworkRuleArgs( + virtual_network_resource_id=subnet_id, + ) + ], + ), + resource_group_name=resource_group_name, + sku=storage.SkuArgs(name=storage.SkuName.PREMIUM_ZRS), + opts=opts, + tags=tags, + ) diff --git a/data_safe_haven/infrastructure/programs/declarative_sre.py b/data_safe_haven/infrastructure/programs/declarative_sre.py index 1db4436c1f..ce678dbb4a 100644 --- a/data_safe_haven/infrastructure/programs/declarative_sre.py +++ b/data_safe_haven/infrastructure/programs/declarative_sre.py @@ -1,6 +1,7 @@ """Pulumi declarative program""" import pulumi +from pulumi import ResourceOptions from pulumi_azure_native import resources from data_safe_haven.config import Context, SREConfig @@ -15,7 +16,9 @@ from .sre.backup import SREBackupComponent, SREBackupProps from .sre.clamav_mirror import SREClamAVMirrorComponent, SREClamAVMirrorProps from .sre.data import SREDataComponent, SREDataProps +from .sre.desired_state import SREDesiredStateComponent, SREDesiredStateProps from .sre.dns_server import SREDnsServerComponent, SREDnsServerProps +from .sre.entra import SREEntraComponent, SREEntraProps from .sre.firewall import SREFirewallComponent, SREFirewallProps from .sre.identity import SREIdentityComponent, SREIdentityProps from .sre.monitoring import SREMonitoringComponent, SREMonitoringProps @@ -51,6 +54,9 @@ def __call__(self) -> None: shm_admin_group_id = self.pulumi_opts.require("shm-admin-group-id") shm_entra_tenant_id = self.pulumi_opts.require("shm-entra-tenant-id") shm_fqdn = self.pulumi_opts.require("shm-fqdn") + shm_location = self.pulumi_opts.require("shm-location") + shm_subscription_id = self.pulumi_opts.require("shm-subscription-id") + sre_subscription_name = self.pulumi_opts.require("sre-subscription-name") # Construct DockerHubCredentials dockerhub_credentials = DockerHubCredentials( @@ -106,6 +112,14 @@ def __call__(self) -> None: ] ) + # Deploy Entra resources + SREEntraComponent( + "sre_entra", + SREEntraProps( + group_names=ldap_group_names, + ), + ) + # Deploy resource group resource_group = resources.ResourceGroup( "sre_resource_group", @@ -138,7 +152,9 @@ def __call__(self) -> None: location=self.config.azure.location, resource_group_name=resource_group.name, shm_fqdn=shm_fqdn, + shm_location=shm_location, shm_resource_group_name=self.context.resource_group_name, + shm_subscription_id=shm_subscription_id, shm_zone_name=shm_fqdn, sre_name=self.config.name, user_public_ip_ranges=self.config.sre.research_user_ip_addresses, @@ -184,10 +200,9 @@ def __call__(self) -> None: storage_quota_gb_home=self.config.sre.storage_quota_gb.home, storage_quota_gb_shared=self.config.sre.storage_quota_gb.shared, subnet_data_configuration=networking.subnet_data_configuration, - subnet_data_desired_state=networking.subnet_data_desired_state, subnet_data_private=networking.subnet_data_private, subscription_id=self.config.azure.subscription_id, - subscription_name=self.context.subscription_name, + subscription_name=sre_subscription_name, tenant_id=self.config.azure.tenant_id, ), tags=self.tags, @@ -338,17 +353,15 @@ def __call__(self) -> None: tags=self.tags, ) - # Deploy workspaces - workspaces = SREWorkspacesComponent( - "sre_workspaces", + # Deploy desired state + desired_state = SREDesiredStateComponent( + "sre_desired_state", self.stack_name, - SREWorkspacesProps( - admin_password=data.password_workspace_admin, - apt_proxy_server_hostname=apt_proxy_server.hostname, + SREDesiredStateProps( + admin_ip_addresses=self.config.sre.admin_ip_addresses, clamav_mirror_hostname=clamav_mirror.hostname, - data_collection_rule_id=monitoring.data_collection_rule_vms.id, - data_collection_endpoint_id=monitoring.data_collection_endpoint.id, database_service_admin_password=data.password_database_service_admin, + dns_private_zones=dns.private_zones, gitea_hostname=user_services.gitea_server.hostname, hedgedoc_hostname=user_services.hedgedoc_server.hostname, ldap_group_filter=ldap_group_filter, @@ -358,18 +371,35 @@ def __call__(self) -> None: ldap_user_filter=ldap_user_filter, ldap_user_search_base=ldap_user_search_base, location=self.config.azure.location, + resource_group=resource_group, + software_repository_hostname=user_services.software_repositories.hostname, + subnet_desired_state=networking.subnet_desired_state, + subscription_name=sre_subscription_name, + ), + ) + + # Deploy workspaces + workspaces = SREWorkspacesComponent( + "sre_workspaces", + self.stack_name, + SREWorkspacesProps( + admin_password=data.password_workspace_admin, + apt_proxy_server_hostname=apt_proxy_server.hostname, + data_collection_rule_id=monitoring.data_collection_rule_vms.id, + data_collection_endpoint_id=monitoring.data_collection_endpoint.id, + location=self.config.azure.location, maintenance_configuration_id=monitoring.maintenance_configuration.id, resource_group_name=resource_group.name, - software_repository_hostname=user_services.software_repositories.hostname, sre_name=self.config.name, - storage_account_data_desired_state_name=data.storage_account_data_desired_state_name, + storage_account_desired_state_name=desired_state.storage_account_name, storage_account_data_private_user_name=data.storage_account_data_private_user_name, storage_account_data_private_sensitive_name=data.storage_account_data_private_sensitive_name, subnet_workspaces=networking.subnet_workspaces, - subscription_name=self.context.subscription_name, + subscription_name=sre_subscription_name, virtual_network=networking.virtual_network, vm_details=list(enumerate(self.config.sre.workspace_skus)), ), + opts=ResourceOptions(depends_on=[desired_state]), tags=self.tags, ) diff --git a/data_safe_haven/infrastructure/programs/imperative_shm.py b/data_safe_haven/infrastructure/programs/imperative_shm.py index b13ec1680e..9b748bbdd1 100644 --- a/data_safe_haven/infrastructure/programs/imperative_shm.py +++ b/data_safe_haven/infrastructure/programs/imperative_shm.py @@ -114,17 +114,18 @@ def deploy(self) -> None: msg = "Failed to create SHM resources." raise DataSafeHavenAzureError(msg) from exc + # Connect to GraphAPI + graph_api = GraphApi.from_scopes( + scopes=[ + "Application.ReadWrite.All", + "Domain.ReadWrite.All", + "Group.ReadWrite.All", + ], + tenant_id=self.config.shm.entra_tenant_id, + ) # Add the SHM domain to the Entra ID via interactive GraphAPI try: # Generate the verification record - graph_api = GraphApi.from_scopes( - scopes=[ - "Application.ReadWrite.All", - "Domain.ReadWrite.All", - "Group.ReadWrite.All", - ], - tenant_id=self.config.shm.entra_tenant_id, - ) verification_record = graph_api.add_custom_domain(self.config.shm.fqdn) # Add the record to DNS self.azure_sdk.ensure_dns_txt_record( @@ -142,6 +143,27 @@ def deploy(self) -> None: except (DataSafeHavenMicrosoftGraphError, DataSafeHavenAzureError) as exc: msg = f"Failed to add custom domain '{self.config.shm.fqdn}' to Entra ID." raise DataSafeHavenAzureError(msg) from exc + # Create an application for use by the pulumi-azuread module + try: + graph_api.create_application( + self.context.entra_application_name, + application_scopes=["Group.ReadWrite.All"], + delegated_scopes=[], + request_json={ + "displayName": self.context.entra_application_name, + "signInAudience": "AzureADMyOrg", + }, + ) + # Always recreate the application secret. + # Otherwise the one in the key vault will be used which might be out of date + # An SRE deployment will read from the keyvault, and get the latest version + self.context.entra_application_secret = graph_api.create_application_secret( + self.context.entra_application_name, + self.context.entra_application_secret_name, + ) + except DataSafeHavenMicrosoftGraphError as exc: + msg = "Failed to create deployment application in Entra ID." + raise DataSafeHavenAzureError(msg) from exc def teardown(self) -> None: """Destroy all created resources diff --git a/data_safe_haven/infrastructure/programs/sre/data.py b/data_safe_haven/infrastructure/programs/sre/data.py index 21e8f4c29b..9e18666277 100644 --- a/data_safe_haven/infrastructure/programs/sre/data.py +++ b/data_safe_haven/infrastructure/programs/sre/data.py @@ -4,7 +4,7 @@ from typing import ClassVar import pulumi_random -from pulumi import ComponentResource, FileAsset, Input, Output, ResourceOptions +from pulumi import ComponentResource, Input, Output, ResourceOptions from pulumi_azure_native import ( authorization, keyvault, @@ -29,12 +29,12 @@ get_name_from_rg, ) from data_safe_haven.infrastructure.components import ( - BlobContainerAcl, - BlobContainerAclProps, + NFSV3BlobContainerComponent, + NFSV3BlobContainerProps, SSLCertificate, SSLCertificateProps, + WrappedNFSV3StorageAccount, ) -from data_safe_haven.resources import resources_path from data_safe_haven.types import AzureDnsZoneNames @@ -56,7 +56,6 @@ def __init__( storage_quota_gb_home: Input[int], storage_quota_gb_shared: Input[int], subnet_data_configuration: Input[network.GetSubnetResult], - subnet_data_desired_state: Input[network.GetSubnetResult], subnet_data_private: Input[network.GetSubnetResult], subscription_id: Input[str], subscription_name: Input[str], @@ -86,9 +85,6 @@ def __init__( self.subnet_data_configuration_id = Output.from_input( subnet_data_configuration ).apply(get_id_from_subnet) - self.subnet_data_desired_state_id = Output.from_input( - subnet_data_desired_state - ).apply(get_id_from_subnet) self.subnet_data_private_id = Output.from_input(subnet_data_private).apply( get_id_from_subnet ) @@ -461,271 +457,52 @@ def __init__( child_opts, ResourceOptions(parent=storage_account_data_configuration) ), ) - - # Deploy desired state storage account - # - This holds the /desired_state container that is mounted by workspaces - # - Azure blobs have worse NFS support but can be accessed with Azure Storage Explorer - storage_account_data_desired_state = storage.StorageAccount( - f"{self._name}_storage_account_data_desired_state", - # Storage account names have a maximum of 24 characters - account_name=alphanumeric( - f"{''.join(truncate_tokens(stack_name.split('-'), 11))}desiredstate{sha256hash(self._name)}" - )[:24], - enable_https_traffic_only=True, - enable_nfs_v3=True, - encryption=storage.EncryptionArgs( - key_source=storage.KeySource.MICROSOFT_STORAGE, - services=storage.EncryptionServicesArgs( - blob=storage.EncryptionServiceArgs( - enabled=True, key_type=storage.KeyType.ACCOUNT - ), - file=storage.EncryptionServiceArgs( - enabled=True, key_type=storage.KeyType.ACCOUNT - ), - ), - ), - kind=storage.Kind.BLOCK_BLOB_STORAGE, - is_hns_enabled=True, - location=props.location, - minimum_tls_version=storage.MinimumTlsVersion.TLS1_2, - network_rule_set=storage.NetworkRuleSetArgs( - bypass=storage.Bypass.AZURE_SERVICES, - default_action=storage.DefaultAction.DENY, - ip_rules=Output.from_input(props.data_configuration_ip_addresses).apply( - lambda ip_ranges: [ - storage.IPRuleArgs( - action=storage.Action.ALLOW, - i_p_address_or_range=str(ip_address), - ) - for ip_range in sorted(ip_ranges) - for ip_address in AzureIPv4Range.from_cidr(ip_range).all_ips() - ] - ), - virtual_network_rules=[ - storage.VirtualNetworkRuleArgs( - virtual_network_resource_id=props.subnet_data_desired_state_id, - ) - ], - ), - resource_group_name=props.resource_group_name, - sku=storage.SkuArgs(name=storage.SkuName.PREMIUM_ZRS), - opts=child_opts, - tags=child_tags, - ) - # Deploy desired state share - container_desired_state = storage.BlobContainer( - f"{self._name}_blob_desired_state", - account_name=storage_account_data_desired_state.name, - container_name="desiredstate", - default_encryption_scope="$account-encryption-key", - deny_encryption_scope_override=False, - public_access=storage.PublicAccess.NONE, - resource_group_name=props.resource_group_name, - opts=ResourceOptions.merge( - child_opts, - ResourceOptions(parent=storage_account_data_desired_state), - ), - ) - # Set storage container ACLs - BlobContainerAcl( - f"{container_desired_state._name}_acl", - BlobContainerAclProps( - acl_user="r-x", - acl_group="r-x", - acl_other="r-x", - # ensure that the above permissions are also set on any newly created - # files (eg. with Azure Storage Explorer) - apply_default_permissions=True, - container_name=container_desired_state.name, - resource_group_name=props.resource_group_name, - storage_account_name=storage_account_data_desired_state.name, - subscription_name=props.subscription_name, - ), - opts=ResourceOptions.merge( - child_opts, ResourceOptions(parent=container_desired_state) - ), - ) - # Create file assets to upload - desired_state_directory = (resources_path / "workspace" / "ansible").absolute() - files_desired_state = [ - ( - FileAsset(str(file_path)), - file_path.name, - str(file_path.relative_to(desired_state_directory)), - ) - for file_path in sorted(desired_state_directory.rglob("*")) - if file_path.is_file() and not file_path.name.startswith(".") - ] - # Upload file assets to desired state container - for file_asset, file_name, file_path in files_desired_state: - storage.Blob( - f"{container_desired_state._name}_blob_{file_name}", - account_name=storage_account_data_desired_state.name, - blob_name=file_path, - container_name=container_desired_state.name, - resource_group_name=props.resource_group_name, - source=file_asset, - ) - # Set up a private endpoint for the desired state storage account - storage_account_data_desired_state_endpoint = network.PrivateEndpoint( - f"{storage_account_data_desired_state._name}_private_endpoint", - location=props.location, - private_endpoint_name=f"{stack_name}-pep-storage-account-data-desired-state", - private_link_service_connections=[ - network.PrivateLinkServiceConnectionArgs( - group_ids=["blob"], - name=f"{stack_name}-cnxn-pep-storage-account-data-private-sensitive", - private_link_service_id=storage_account_data_desired_state.id, - ) - ], - resource_group_name=props.resource_group_name, - subnet=network.SubnetArgs(id=props.subnet_data_desired_state_id), - opts=ResourceOptions.merge( - child_opts, - ResourceOptions( - ignore_changes=["custom_dns_configs"], - parent=storage_account_data_desired_state, - ), - ), - tags=child_tags, - ) - # Add a private DNS record for each desired state endpoint custom DNS config - network.PrivateDnsZoneGroup( - f"{storage_account_data_desired_state._name}_private_dns_zone_group", - private_dns_zone_configs=[ - network.PrivateDnsZoneConfigArgs( - name=replace_separators( - f"{stack_name}-storage-account-data-desired-state-to-{dns_zone_name}", - "-", - ), - private_dns_zone_id=props.dns_private_zones[dns_zone_name].id, - ) - for dns_zone_name in AzureDnsZoneNames.STORAGE_ACCOUNT - ], - private_dns_zone_group_name=f"{stack_name}-dzg-storage-account-data-desired-state", - private_endpoint_name=storage_account_data_desired_state_endpoint.name, - resource_group_name=props.resource_group_name, - opts=ResourceOptions.merge( - child_opts, - ResourceOptions(parent=storage_account_data_desired_state), - ), - ) - # Deploy sensitive data blob storage account - # - This holds the /data and /output containers that are mounted by workspaces + # - This holds the /mnt/input and /mnt/output containers that are mounted by workspaces # - Azure blobs have worse NFS support but can be accessed with Azure Storage Explorer - storage_account_data_private_sensitive = storage.StorageAccount( + storage_account_data_private_sensitive = WrappedNFSV3StorageAccount( f"{self._name}_storage_account_data_private_sensitive", # Storage account names have a maximum of 24 characters account_name=alphanumeric( f"{''.join(truncate_tokens(stack_name.split('-'), 11))}sensitivedata{sha256hash(self._name)}" )[:24], - enable_https_traffic_only=True, - enable_nfs_v3=True, - encryption=storage.EncryptionArgs( - key_source=storage.KeySource.MICROSOFT_STORAGE, - services=storage.EncryptionServicesArgs( - blob=storage.EncryptionServiceArgs( - enabled=True, key_type=storage.KeyType.ACCOUNT - ), - file=storage.EncryptionServiceArgs( - enabled=True, key_type=storage.KeyType.ACCOUNT - ), - ), - ), - kind=storage.Kind.BLOCK_BLOB_STORAGE, - is_hns_enabled=True, + allowed_ip_addresses=props.data_private_sensitive_ip_addresses, location=props.location, - minimum_tls_version=storage.MinimumTlsVersion.TLS1_2, - network_rule_set=storage.NetworkRuleSetArgs( - bypass=storage.Bypass.AZURE_SERVICES, - default_action=storage.DefaultAction.DENY, - ip_rules=Output.from_input( - props.data_private_sensitive_ip_addresses - ).apply( - lambda ip_ranges: [ - storage.IPRuleArgs( - action=storage.Action.ALLOW, - i_p_address_or_range=str(ip_address), - ) - for ip_range in sorted(ip_ranges) - for ip_address in AzureIPv4Range.from_cidr(ip_range).all_ips() - ] - ), - virtual_network_rules=[ - storage.VirtualNetworkRuleArgs( - virtual_network_resource_id=props.subnet_data_private_id, - ) - ], - ), + subnet_id=props.subnet_data_private_id, resource_group_name=props.resource_group_name, - sku=storage.SkuArgs(name=storage.SkuName.PREMIUM_ZRS), opts=child_opts, tags=child_tags, ) # Deploy storage containers - storage_container_egress = storage.BlobContainer( + NFSV3BlobContainerComponent( f"{self._name}_blob_egress", - account_name=storage_account_data_private_sensitive.name, - container_name="egress", - default_encryption_scope="$account-encryption-key", - deny_encryption_scope_override=False, - public_access=storage.PublicAccess.NONE, - resource_group_name=props.resource_group_name, - opts=ResourceOptions.merge( - child_opts, - ResourceOptions(parent=storage_account_data_private_sensitive), - ), - ) - storage_container_ingress = storage.BlobContainer( - f"{self._name}_blob_ingress", - account_name=storage_account_data_private_sensitive.name, - container_name="ingress", - default_encryption_scope="$account-encryption-key", - deny_encryption_scope_override=False, - public_access=storage.PublicAccess.NONE, - resource_group_name=props.resource_group_name, - opts=ResourceOptions.merge( - child_opts, - ResourceOptions(parent=storage_account_data_private_sensitive), - ), - ) - # Set storage container ACLs - BlobContainerAcl( - f"{storage_container_egress._name}_acl", - BlobContainerAclProps( + NFSV3BlobContainerProps( acl_user="rwx", acl_group="rwx", acl_other="rwx", # due to an Azure bug `apply_default_permissions=True` also gives user # 65533 ownership of the fileshare (preventing use inside the SRE) apply_default_permissions=False, - container_name=storage_container_egress.name, + container_name="egress", resource_group_name=props.resource_group_name, - storage_account_name=storage_account_data_private_sensitive.name, + storage_account=storage_account_data_private_sensitive, subscription_name=props.subscription_name, ), - opts=ResourceOptions.merge( - child_opts, ResourceOptions(parent=storage_container_egress) - ), ) - BlobContainerAcl( - f"{storage_container_ingress._name}_acl", - BlobContainerAclProps( + NFSV3BlobContainerComponent( + f"{self._name}_blob_ingress", + NFSV3BlobContainerProps( acl_user="rwx", acl_group="r-x", acl_other="r-x", # ensure that the above permissions are also set on any newly created # files (eg. with Azure Storage Explorer) apply_default_permissions=True, - container_name=storage_container_ingress.name, + container_name="ingress", resource_group_name=props.resource_group_name, - storage_account_name=storage_account_data_private_sensitive.name, + storage_account=storage_account_data_private_sensitive, subscription_name=props.subscription_name, ), - opts=ResourceOptions.merge( - child_opts, ResourceOptions(parent=storage_container_ingress) - ), ) # Set up a private endpoint for the sensitive data storage account storage_account_data_private_sensitive_endpoint = network.PrivateEndpoint( @@ -792,7 +569,7 @@ def __init__( ) # Deploy data_private_user files storage account - # - This holds the /home and /shared containers that are mounted by workspaces + # - This holds the /home and /mnt/shared containers that are mounted by workspaces # - Azure Files has better NFS support but cannot be accessed with Azure Storage Explorer # - Allows root-squashing to be configured # From https://learn.microsoft.com/en-us/azure/storage/files/files-nfs-protocol @@ -922,9 +699,6 @@ def __init__( self.storage_account_data_configuration_name = ( storage_account_data_configuration.name ) - self.storage_account_data_desired_state_name = ( - storage_account_data_desired_state.name - ) self.managed_identity = identity_key_vault_reader self.password_nexus_admin = Output.secret(password_nexus_admin.result) self.password_database_service_admin = Output.secret( diff --git a/data_safe_haven/infrastructure/programs/sre/desired_state.py b/data_safe_haven/infrastructure/programs/sre/desired_state.py new file mode 100644 index 0000000000..73466d6c5b --- /dev/null +++ b/data_safe_haven/infrastructure/programs/sre/desired_state.py @@ -0,0 +1,224 @@ +"""Pulumi component for SRE desired state""" + +from collections.abc import Mapping, Sequence + +import yaml +from pulumi import ( + ComponentResource, + FileAsset, + Input, + Output, + ResourceOptions, + StringAsset, +) +from pulumi_azure_native import ( + network, + resources, + storage, +) + +from data_safe_haven.functions import ( + alphanumeric, + replace_separators, + sha256hash, + truncate_tokens, +) +from data_safe_haven.infrastructure.common import ( + get_id_from_rg, + get_id_from_subnet, + get_name_from_rg, +) +from data_safe_haven.infrastructure.components import ( + NFSV3BlobContainerComponent, + NFSV3BlobContainerProps, + WrappedNFSV3StorageAccount, +) +from data_safe_haven.resources import resources_path +from data_safe_haven.types import AzureDnsZoneNames + + +class SREDesiredStateProps: + """Properties for SREDesiredStateComponent""" + + def __init__( + self, + admin_ip_addresses: Input[Sequence[str]], + clamav_mirror_hostname: Input[str], + database_service_admin_password: Input[str], + dns_private_zones: Input[dict[str, network.PrivateZone]], + gitea_hostname: Input[str], + hedgedoc_hostname: Input[str], + ldap_group_filter: Input[str], + ldap_group_search_base: Input[str], + ldap_server_hostname: Input[str], + ldap_server_port: Input[int], + ldap_user_filter: Input[str], + ldap_user_search_base: Input[str], + location: Input[str], + resource_group: Input[resources.ResourceGroup], + software_repository_hostname: Input[str], + subscription_name: Input[str], + subnet_desired_state: Input[network.GetSubnetResult], + ) -> None: + self.admin_ip_addresses = admin_ip_addresses + self.clamav_mirror_hostname = clamav_mirror_hostname + self.database_service_admin_password = database_service_admin_password + self.dns_private_zones = dns_private_zones + self.gitea_hostname = gitea_hostname + self.hedgedoc_hostname = hedgedoc_hostname + self.ldap_group_filter = ldap_group_filter + self.ldap_group_search_base = ldap_group_search_base + self.ldap_server_hostname = ldap_server_hostname + self.ldap_server_port = Output.from_input(ldap_server_port).apply(str) + self.ldap_user_filter = ldap_user_filter + self.ldap_user_search_base = ldap_user_search_base + self.location = location + self.resource_group_id = Output.from_input(resource_group).apply(get_id_from_rg) + self.resource_group_name = Output.from_input(resource_group).apply( + get_name_from_rg + ) + self.software_repository_hostname = software_repository_hostname + self.subnet_desired_state_id = Output.from_input(subnet_desired_state).apply( + get_id_from_subnet + ) + self.subscription_name = subscription_name + + +class SREDesiredStateComponent(ComponentResource): + """Deploy SRE desired state with Pulumi""" + + def __init__( + self, + name: str, + stack_name: str, + props: SREDesiredStateProps, + opts: ResourceOptions | None = None, + tags: Input[Mapping[str, Input[str]]] | None = None, + ) -> None: + super().__init__("dsh:sre:DesiredStateComponent", name, {}, opts) + child_opts = ResourceOptions.merge(opts, ResourceOptions(parent=self)) + child_tags = {"component": "data"} | (tags if tags else {}) + + # Deploy desired state storage account + # - This holds the /var/local/ansible container that is mounted by workspaces + # - Azure blobs have worse NFS support but can be accessed with Azure Storage Explorer + storage_account = WrappedNFSV3StorageAccount( + f"{self._name}_storage_account", + account_name=alphanumeric( + f"{''.join(truncate_tokens(stack_name.split('-'), 11))}desiredstate{sha256hash(self._name)}" + )[:24], + allowed_ip_addresses=props.admin_ip_addresses, + location=props.location, + resource_group_name=props.resource_group_name, + subnet_id=props.subnet_desired_state_id, + opts=child_opts, + tags=child_tags, + ) + # Deploy desired state share + container_desired_state = NFSV3BlobContainerComponent( + f"{self._name}_blob_desired_state", + NFSV3BlobContainerProps( + acl_user="r-x", + acl_group="r-x", + acl_other="r-x", + # ensure that the above permissions are also set on any newly created + # files (eg. with Azure Storage Explorer) + apply_default_permissions=True, + container_name="desiredstate", + resource_group_name=props.resource_group_name, + storage_account=storage_account, + subscription_name=props.subscription_name, + ), + ) + # Create file assets to upload + desired_state_directory = (resources_path / "workspace" / "ansible").absolute() + files_desired_state = [ + ( + FileAsset(str(file_path)), + file_path.name, + str(file_path.relative_to(desired_state_directory)), + ) + for file_path in sorted(desired_state_directory.rglob("*")) + if file_path.is_file() and not file_path.name.startswith(".") + ] + # Upload file assets to desired state container + for file_asset, file_name, file_path in files_desired_state: + storage.Blob( + f"{container_desired_state._name}_blob_{file_name}", + account_name=storage_account.name, + blob_name=file_path, + container_name=container_desired_state.name, + resource_group_name=props.resource_group_name, + source=file_asset, + ) + # Upload ansible vars file + storage.Blob( + f"{container_desired_state._name}_blob_pulumi_vars", + account_name=storage_account.name, + blob_name="vars/pulumi_vars.yaml", + container_name=container_desired_state.name, + resource_group_name=props.resource_group_name, + source=Output.all( + clamav_mirror_hostname=props.clamav_mirror_hostname, + database_service_admin_password=props.database_service_admin_password, + gitea_hostname=props.gitea_hostname, + hedgedoc_hostname=props.hedgedoc_hostname, + ldap_group_filter=props.ldap_group_filter, + ldap_group_search_base=props.ldap_group_search_base, + ldap_server_hostname=props.ldap_server_hostname, + ldap_server_port=props.ldap_server_port, + ldap_user_filter=props.ldap_user_filter, + ldap_user_search_base=props.ldap_user_search_base, + software_repository_hostname=props.software_repository_hostname, + ).apply(lambda kwargs: StringAsset(self.ansible_vars_file(**kwargs))), + ) + # Set up a private endpoint for the desired state storage account + storage_account_endpoint = network.PrivateEndpoint( + f"{storage_account._name}_private_endpoint", + location=props.location, + private_endpoint_name=f"{stack_name}-pep-storage-account-desired-state", + private_link_service_connections=[ + network.PrivateLinkServiceConnectionArgs( + group_ids=["blob"], + name=f"{stack_name}-cnxn-pep-storage-account-desired-state", + private_link_service_id=storage_account.id, + ) + ], + resource_group_name=props.resource_group_name, + subnet=network.SubnetArgs(id=props.subnet_desired_state_id), + opts=ResourceOptions.merge( + child_opts, + ResourceOptions( + ignore_changes=["custom_dns_configs"], + parent=storage_account, + ), + ), + tags=child_tags, + ) + # Add a private DNS record for each desired state endpoint custom DNS config + network.PrivateDnsZoneGroup( + f"{storage_account._name}_private_dns_zone_group", + private_dns_zone_configs=[ + network.PrivateDnsZoneConfigArgs( + name=replace_separators( + f"{stack_name}-storage-account-desired-state-to-{dns_zone_name}", + "-", + ), + private_dns_zone_id=props.dns_private_zones[dns_zone_name].id, + ) + for dns_zone_name in AzureDnsZoneNames.STORAGE_ACCOUNT + ], + private_dns_zone_group_name=f"{stack_name}-dzg-storage-account-desired-state", + private_endpoint_name=storage_account_endpoint.name, + resource_group_name=props.resource_group_name, + opts=ResourceOptions.merge( + child_opts, + ResourceOptions(parent=storage_account), + ), + ) + + self.storage_account_name = storage_account.name + + @staticmethod + def ansible_vars_file(**kwargs: str) -> str: + return yaml.safe_dump(kwargs, explicit_start=True, indent=2) diff --git a/data_safe_haven/infrastructure/programs/sre/entra.py b/data_safe_haven/infrastructure/programs/sre/entra.py new file mode 100644 index 0000000000..1f44995f9f --- /dev/null +++ b/data_safe_haven/infrastructure/programs/sre/entra.py @@ -0,0 +1,40 @@ +"""Pulumi component for SRE Entra resources""" + +from collections.abc import Mapping + +from pulumi import ComponentResource, ResourceOptions +from pulumi_azuread import Group + +from data_safe_haven.functions import replace_separators + + +class SREEntraProps: + """Properties for SREEntraComponent""" + + def __init__( + self, + group_names: Mapping[str, str], + ) -> None: + self.group_names = group_names + + +class SREEntraComponent(ComponentResource): + """Deploy SRE Entra resources with Pulumi""" + + def __init__( + self, + name: str, + props: SREEntraProps, + opts: ResourceOptions | None = None, + ) -> None: + super().__init__("dsh:sre:EntraComponent", name, {}, opts) + + for group_id, group_description in props.group_names.items(): + Group( + replace_separators(f"{self._name}_group_{group_id}", "_"), + description=group_description, + display_name=group_description, + mail_enabled=False, + prevent_duplicate_names=True, + security_enabled=True, + ) diff --git a/data_safe_haven/infrastructure/programs/sre/networking.py b/data_safe_haven/infrastructure/programs/sre/networking.py index 5578d791a0..e6c308f587 100644 --- a/data_safe_haven/infrastructure/programs/sre/networking.py +++ b/data_safe_haven/infrastructure/programs/sre/networking.py @@ -2,8 +2,8 @@ from collections.abc import Mapping -from pulumi import ComponentResource, Input, Output, ResourceOptions -from pulumi_azure_native import network +from pulumi import ComponentResource, Input, InvokeOptions, Output, ResourceOptions +from pulumi_azure_native import network, provider from data_safe_haven.functions import alphanumeric, replace_separators from data_safe_haven.infrastructure.common import ( @@ -12,7 +12,7 @@ get_id_from_vnet, get_name_from_vnet, ) -from data_safe_haven.types import NetworkingPriorities, Ports +from data_safe_haven.types import AzureServiceTag, NetworkingPriorities, Ports class SRENetworkingProps: @@ -26,10 +26,12 @@ def __init__( location: Input[str], resource_group_name: Input[str], shm_fqdn: Input[str], + shm_location: Input[str], shm_resource_group_name: Input[str], + shm_subscription_id: Input[str], shm_zone_name: Input[str], sre_name: Input[str], - user_public_ip_ranges: Input[list[str]], + user_public_ip_ranges: Input[list[str]] | AzureServiceTag, ) -> None: # Other variables self.dns_private_zones = dns_private_zones @@ -43,7 +45,9 @@ def __init__( self.location = location self.resource_group_name = resource_group_name self.shm_fqdn = shm_fqdn + self.shm_location = shm_location self.shm_resource_group_name = shm_resource_group_name + self.shm_subscription_id = shm_subscription_id self.shm_zone_name = shm_zone_name self.sre_name = sre_name self.user_public_ip_ranges = user_public_ip_ranges @@ -64,6 +68,13 @@ def __init__( child_opts = ResourceOptions.merge(opts, ResourceOptions(parent=self)) child_tags = {"component": "networking"} | (tags if tags else {}) + if isinstance(props.user_public_ip_ranges, list): + user_public_ip_ranges = props.user_public_ip_ranges + user_service_tag = None + else: + user_public_ip_ranges = None + user_service_tag = props.user_public_ip_ranges + # Define route table route_table = network.RouteTable( f"{self._name}_route_table", @@ -121,7 +132,8 @@ def __init__( name="AllowUsersInternetInbound", priority=NetworkingPriorities.AUTHORISED_EXTERNAL_USER_IPS, protocol=network.SecurityRuleProtocol.TCP, - source_address_prefixes=props.user_public_ip_ranges, + source_address_prefix=user_service_tag, + source_address_prefixes=user_public_ip_ranges, source_port_range="*", ), network.SecurityRuleArgs( @@ -483,10 +495,10 @@ def __init__( opts=child_opts, tags=child_tags, ) - nsg_data_desired_state = network.NetworkSecurityGroup( - f"{self._name}_nsg_data_desired_state", + nsg_desired_state = network.NetworkSecurityGroup( + f"{self._name}_nsg_desired_state", location=props.location, - network_security_group_name=f"{stack_name}-nsg-data-desired-state", + network_security_group_name=f"{stack_name}-nsg-desired-state", resource_group_name=props.resource_group_name, security_rules=[ # Inbound @@ -1454,7 +1466,7 @@ def __init__( network.SecurityRuleArgs( access=network.SecurityRuleAccess.ALLOW, description="Allow outbound connections to desired state data endpoints.", - destination_address_prefix=SREIpRanges.data_desired_state.prefix, + destination_address_prefix=SREIpRanges.desired_state.prefix, destination_port_range="*", direction=network.SecurityRuleDirection.OUTBOUND, name="AllowDataDesiredStateEndpointsOutbound", @@ -1558,7 +1570,7 @@ def __init__( subnet_apt_proxy_server_name = "AptProxyServerSubnet" subnet_clamav_mirror_name = "ClamAVMirrorSubnet" subnet_data_configuration_name = "DataConfigurationSubnet" - subnet_data_desired_state_name = "DataDesiredStateSubnet" + subnet_desired_state_name = "DataDesiredStateSubnet" subnet_data_private_name = "DataPrivateSubnet" subnet_firewall_name = "AzureFirewallSubnet" subnet_firewall_management_name = "AzureFirewallManagementSubnet" @@ -1643,10 +1655,10 @@ def __init__( ), # Desired state data subnet network.SubnetArgs( - address_prefix=SREIpRanges.data_desired_state.prefix, - name=subnet_data_desired_state_name, + address_prefix=SREIpRanges.desired_state.prefix, + name=subnet_desired_state_name, network_security_group=network.NetworkSecurityGroupArgs( - id=nsg_data_desired_state.id + id=nsg_desired_state.id ), route_table=network.RouteTableArgs(id=route_table.id), service_endpoints=[ @@ -1834,6 +1846,13 @@ def __init__( ) # Define SRE DNS zone + shm_provider = provider.Provider( + "shm_provider", + provider.ProviderArgs( + location=props.shm_location, + subscription_id=props.shm_subscription_id, + ), + ) shm_dns_zone = Output.all( resource_group_name=props.shm_resource_group_name, zone_name=props.shm_zone_name, @@ -1841,6 +1860,9 @@ def __init__( lambda kwargs: network.get_zone( resource_group_name=kwargs["resource_group_name"], zone_name=kwargs["zone_name"], + opts=InvokeOptions( + provider=shm_provider, + ), ) ) sre_subdomain = Output.from_input(props.sre_name).apply( @@ -1867,7 +1889,11 @@ def __init__( ttl=3600, zone_name=shm_dns_zone.name, opts=ResourceOptions.merge( - child_opts, ResourceOptions(parent=sre_dns_zone) + child_opts, + ResourceOptions( + parent=sre_dns_zone, + provider=shm_provider, + ), ), ) network.RecordSet( @@ -1966,13 +1992,13 @@ def __init__( resource_group_name=props.resource_group_name, virtual_network_name=sre_virtual_network.name, ) - self.subnet_data_desired_state = network.get_subnet_output( - subnet_name=subnet_data_desired_state_name, + self.subnet_desired_state = network.get_subnet_output( + subnet_name=subnet_desired_state_name, resource_group_name=props.resource_group_name, virtual_network_name=sre_virtual_network.name, ) - self.subnet_data_desired_state = network.get_subnet_output( - subnet_name=subnet_data_desired_state_name, + self.subnet_desired_state = network.get_subnet_output( + subnet_name=subnet_desired_state_name, resource_group_name=props.resource_group_name, virtual_network_name=sre_virtual_network.name, ) diff --git a/data_safe_haven/infrastructure/programs/sre/remote_desktop.py b/data_safe_haven/infrastructure/programs/sre/remote_desktop.py index 89ef40d7f5..3be1207c77 100644 --- a/data_safe_haven/infrastructure/programs/sre/remote_desktop.py +++ b/data_safe_haven/infrastructure/programs/sre/remote_desktop.py @@ -297,7 +297,7 @@ def __init__( ), ), containerinstance.ContainerArgs( - image="ghcr.io/alan-turing-institute/guacamole-user-sync:v0.5.0", + image="ghcr.io/alan-turing-institute/guacamole-user-sync:v0.6.0", name="guacamole-user-sync"[:63], environment_variables=[ containerinstance.EnvironmentVariableArgs( diff --git a/data_safe_haven/infrastructure/programs/sre/workspaces.py b/data_safe_haven/infrastructure/programs/sre/workspaces.py index b48de97668..d6c22bce53 100644 --- a/data_safe_haven/infrastructure/programs/sre/workspaces.py +++ b/data_safe_haven/infrastructure/programs/sre/workspaces.py @@ -24,24 +24,13 @@ def __init__( self, admin_password: Input[str], apt_proxy_server_hostname: Input[str], - clamav_mirror_hostname: Input[str], data_collection_endpoint_id: Input[str], data_collection_rule_id: Input[str], - database_service_admin_password: Input[str], - gitea_hostname: Input[str], - hedgedoc_hostname: Input[str], - ldap_group_filter: Input[str], - ldap_group_search_base: Input[str], - ldap_server_hostname: Input[str], - ldap_server_port: Input[int], - ldap_user_filter: Input[str], - ldap_user_search_base: Input[str], location: Input[str], maintenance_configuration_id: Input[str], resource_group_name: Input[str], - software_repository_hostname: Input[str], sre_name: Input[str], - storage_account_data_desired_state_name: Input[str], + storage_account_desired_state_name: Input[str], storage_account_data_private_sensitive_name: Input[str], storage_account_data_private_user_name: Input[str], subnet_workspaces: Input[network.GetSubnetResult], @@ -52,26 +41,13 @@ def __init__( self.admin_password = Output.secret(admin_password) self.admin_username = "dshadmin" self.apt_proxy_server_hostname = apt_proxy_server_hostname - self.clamav_mirror_hostname = clamav_mirror_hostname self.data_collection_rule_id = data_collection_rule_id self.data_collection_endpoint_id = data_collection_endpoint_id - self.database_service_admin_password = database_service_admin_password - self.gitea_hostname = gitea_hostname - self.hedgedoc_hostname = hedgedoc_hostname - self.ldap_group_filter = ldap_group_filter - self.ldap_group_search_base = ldap_group_search_base - self.ldap_server_hostname = ldap_server_hostname - self.ldap_server_port = Output.from_input(ldap_server_port).apply(str) - self.ldap_user_filter = ldap_user_filter - self.ldap_user_search_base = ldap_user_search_base self.location = location self.maintenance_configuration_id = maintenance_configuration_id self.resource_group_name = resource_group_name - self.software_repository_hostname = software_repository_hostname self.sre_name = sre_name - self.storage_account_data_desired_state_name = ( - storage_account_data_desired_state_name - ) + self.storage_account_desired_state_name = storage_account_desired_state_name self.storage_account_data_private_user_name = ( storage_account_data_private_user_name ) @@ -118,18 +94,7 @@ def __init__( # Load cloud-init file cloudinit = Output.all( apt_proxy_server_hostname=props.apt_proxy_server_hostname, - clamav_mirror_hostname=props.clamav_mirror_hostname, - database_service_admin_password=props.database_service_admin_password, - gitea_hostname=props.gitea_hostname, - hedgedoc_hostname=props.hedgedoc_hostname, - ldap_group_filter=props.ldap_group_filter, - ldap_group_search_base=props.ldap_group_search_base, - ldap_server_hostname=props.ldap_server_hostname, - ldap_server_port=props.ldap_server_port, - ldap_user_filter=props.ldap_user_filter, - ldap_user_search_base=props.ldap_user_search_base, - software_repository_hostname=props.software_repository_hostname, - storage_account_data_desired_state_name=props.storage_account_data_desired_state_name, + storage_account_desired_state_name=props.storage_account_desired_state_name, storage_account_data_private_user_name=props.storage_account_data_private_user_name, storage_account_data_private_sensitive_name=props.storage_account_data_private_sensitive_name, ).apply(lambda kwargs: self.template_cloudinit(**kwargs)) diff --git a/data_safe_haven/infrastructure/project_manager.py b/data_safe_haven/infrastructure/project_manager.py index f9706ec096..a6d5af805b 100644 --- a/data_safe_haven/infrastructure/project_manager.py +++ b/data_safe_haven/infrastructure/project_manager.py @@ -146,6 +146,10 @@ def add_option(self, name: str, value: str, *, replace: bool) -> None: """Add a public configuration option""" self._options[name] = (value, False, replace) + def add_secret(self, name: str, value: str, *, replace: bool) -> None: + """Add a secret configuration option""" + self._options[name] = (value, True, replace) + def apply_config_options(self) -> None: """Set Pulumi config options""" try: @@ -156,8 +160,8 @@ def apply_config_options(self) -> None: else: self.ensure_config(name, value, secret=is_secret) self._options = {} - except Exception as exc: - msg = "Applying Pulumi configuration options failed.." + except DataSafeHavenError as exc: + msg = "Applying Pulumi configuration options failed." raise DataSafeHavenPulumiError(msg) from exc def cancel(self) -> None: @@ -282,10 +286,26 @@ def destroy(self) -> None: raise DataSafeHavenPulumiError(msg) from exc def ensure_config(self, name: str, value: str, *, secret: bool) -> None: - """Ensure that config values have been set, setting them if they do not exist""" + """ + Ensure that config values have been set. + + Values will be set if they do not exist. + + If the value is already set and does not match the `value` argument, + `DataSafeHavenPulumiError` will be raised. + """ try: - self.stack.get_config(name) + # Check whether a value is already set for this parameter + existing_value = self.stack.get_config(name).value + # ... if it is, ensure it is consistent with the incoming value + if existing_value != value: + msg = ( + f"Unchangeable configuration option '{name}' not consistent, " + f"your configuration: '{value}', Pulumi workspace: '{existing_value}'." + ) + raise DataSafeHavenPulumiError(msg) except automation.CommandError: + # Set value if it does not already exist self.set_config(name, value, secret=secret) def evaluate(self, result: str) -> None: diff --git a/data_safe_haven/logging/plain_file_handler.py b/data_safe_haven/logging/plain_file_handler.py index c41d0e5ffc..ffce4a551c 100644 --- a/data_safe_haven/logging/plain_file_handler.py +++ b/data_safe_haven/logging/plain_file_handler.py @@ -16,16 +16,23 @@ def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) @staticmethod - def strip_formatting(input_string: str) -> str: + def strip_rich_formatting(input_string: str) -> str: """Strip console markup formatting from a string""" text = Text.from_markup(input_string) text.spans = [] return str(text) + @staticmethod + def strip_ansi_escapes(input_string: str) -> str: + """Strip ANSI escape sequences from a string""" + text = Text.from_ansi(input_string) + text.spans = [] + return str(text) + def emit(self, record: logging.LogRecord) -> None: """Emit a record without formatting""" if isinstance(record.msg, Text): # Convert rich.text.Text objects to strings record.msg = str(record.msg) - record.msg = self.strip_formatting(record.msg) + record.msg = self.strip_ansi_escapes(self.strip_rich_formatting(record.msg)) super().emit(record) diff --git a/data_safe_haven/provisioning/sre_provisioning_manager.py b/data_safe_haven/provisioning/sre_provisioning_manager.py index 1111bc573f..7c39046b86 100644 --- a/data_safe_haven/provisioning/sre_provisioning_manager.py +++ b/data_safe_haven/provisioning/sre_provisioning_manager.py @@ -71,11 +71,6 @@ def available_vm_skus(self) -> dict[str, dict[str, Any]]: self._available_vm_skus = azure_sdk.list_available_vm_skus(self.location) return self._available_vm_skus - def create_security_groups(self) -> None: - """Create groups in Entra ID""" - for group_name in self.security_group_params.values(): - self.graph_api.create_group(group_name) - def restart_remote_desktop_containers(self) -> None: """Restart the Guacamole container group""" guacamole_provisioner = AzureContainerInstance( @@ -137,6 +132,5 @@ def update_remote_desktop_connections(self) -> None: def run(self) -> None: """Apply SRE configuration""" - self.create_security_groups() self.update_remote_desktop_connections() self.restart_remote_desktop_containers() diff --git a/data_safe_haven/resources/workspace/ansible/desired_state.yaml b/data_safe_haven/resources/workspace/ansible/desired_state.yaml index 4f47ccbe11..9deaf9d93d 100644 --- a/data_safe_haven/resources/workspace/ansible/desired_state.yaml +++ b/data_safe_haven/resources/workspace/ansible/desired_state.yaml @@ -2,342 +2,53 @@ - name: Desired state configuration hosts: localhost become: true + vars_files: + - vars/pulumi_vars.yaml tasks: - - name: Update package cache - tags: apt - ansible.builtin.apt: - update_cache: true - cache_valid_time: 600 + - name: Install packages + ansible.builtin.import_tasks: tasks/packages.yaml + tags: packages - - name: List apt packages to install - tags: apt - ansible.builtin.debug: - msg: "{{ apt_packages.common | union(apt_packages[ansible_facts.distribution_release]) }}" + - name: Disable Ubuntu Pro services + ansible.builtin.import_tasks: tasks/ubuntu_pro.yaml + tags: ubuntu_pro - - name: Install apt packages - tags: apt - ansible.builtin.apt: - name: "{{ apt_packages.common | union(apt_packages[ansible_facts.distribution_release]) }}" - state: present - async: 3600 - poll: 30 - - - name: Install deb packages - tags: apt - ansible.builtin.script: - executable: /bin/bash - cmd: "/desired_state/install_deb.sh {{ item.source }} {{ item.filename }} {{ item.sha256 }}" - creates: "{{ item.creates }}" - loop: "{{ deb_packages[ansible_facts.distribution_release] }}" - - - name: Install snap packages - community.general.snap: - name: "{{ item.name }}" - classic: "{{ item.classic }}" - state: present - loop: "{{ snap_packages }}" - - # https://ubuntu.com/server/docs/nvidia-drivers-installation#installing-the-drivers-on-servers-andor-for-computing-purposes - - name: Use ubuntu-drivers to install Nvidia drivers # noqa: no-handler - tags: nvidia - ansible.builtin.command: - cmd: ubuntu-drivers install --gpgpu - creates: /usr/bin/nvidia-smi - - - name: Disable and stop Ubuntu Pro services - ansible.builtin.systemd: - name: "{{ item }}" - state: stopped - enabled: false - loop: - - apt-news - - esm-cache - - - name: Enable bash autocompletion globally - ansible.builtin.blockinfile: - path: /etc/bash.bashrc - block: | - # enable bash completion in interactive shells - if [ ! $(shopt -oq posix) ]; then - if [ -f /usr/share/bash-completion/bash_completion ]; then - . /usr/share/bash-completion/bash_completion - elif [ -f /etc/bash_completion ]; then - . /etc/bash_completion - fi - fi - - # This will create directories if src or dest ends in '/' - - name: Copy bashrc skeleton - ansible.builtin.copy: - src: etc/skel/bashrc - dest: /etc/skel/.bashrc - mode: '0755' - - # This will create directories if src or dest ends in '/' - - name: Copy xsession skeleton - ansible.builtin.copy: - src: etc/skel/xsession - dest: /etc/skel/.xsession - mode: '0444' - - - name: Add ldap to /etc/nsswitch.conf - ansible.builtin.replace: - path: /etc/nsswitch.conf - regexp: '^(passwd|group|shadow)(:.*)(? /etc/audit/rules.d/50-privileged.rules - creates: /etc/audit/rules.d/50-privileged.rules - notify: Restart auditd - - # This will create directories if src or dest ends in '/' - - name: Copy ClamAV daemon configuration - ansible.builtin.copy: - src: etc/clamav/clamd.conf - dest: /etc/clamav/clamd.conf - mode: '0444' - owner: clamav - group: adm - register: clamd - - - name: Enable and start ClamAV daemon - ansible.builtin.systemd: - name: clamav-daemon - enabled: true - state: started - - name: Restart ClamAV daemon # noqa: no-handler - ansible.builtin.systemd: - name: clamav-daemon - state: restarted - when: clamd.changed - - - name: Set freshclam private mirror - ansible.builtin.lineinfile: - path: /etc/clamav/freshclam.conf - line: "{{ lookup('file', '/etc/clamav/freshclam-mirror.conf') }}" - state: present + - name: Configure sshd + ansible.builtin.import_tasks: tasks/sshd.yaml + tags: sshd - # This is required to fetch definitions for the clamav daemon to run - - name: Initial freshclam run # noqa: command-instead-of-module - ansible.builtin.shell: - cmd: | - systemctl stop clamav-freshclam && freshclam && systemctl start clamav-freshclam - creates: '/var/lib/clamav/main.{c[vl]d,inc}' + - name: Configure ClamAV + ansible.builtin.import_tasks: tasks/clamav.yaml + tags: clamav - # This will create directories if src or dest ends in '/' - - name: Copy ClamAV services directory - ansible.builtin.copy: - src: "etc/systemd/system/" - dest: /etc/systemd/system/ - mode: '0644' - notify: Systemd daemon reload + - name: Globally configure default user settings + ansible.builtin.import_tasks: tasks/user_config.yaml + tags: user_conf - - name: Enable and start freshclam - ansible.builtin.systemd: - name: clamav-freshclam - state: started - enabled: true + - name: Configure LDAP + ansible.builtin.import_tasks: tasks/ldap.yaml + tags: ldap - - name: Enable and start ClamAV on access scan - ansible.builtin.systemd: - name: clamav-clamonacc - enabled: true - state: started + - name: Configure Xrdp + ansible.builtin.import_tasks: tasks/xrdp.yaml + tags: xrdp - - name: Enable and start ClamAV timer - ansible.builtin.systemd: - name: clamav-clamdscan.timer - enabled: true - state: started + - name: Configure Xfce + ansible.builtin.import_tasks: tasks/xfce.yaml + tags: xfce - # This will create directories if src or dest ends in '/' - - name: Copy smoke test files directory - ansible.builtin.copy: - src: "usr/local/smoke_tests/" - dest: /usr/local/smoke_tests/ - mode: '0755' + - name: Configure package proxies + ansible.builtin.import_tasks: tasks/package_proxy.yaml + tags: package_proxies + - name: Provision smoke tests + ansible.builtin.import_tasks: tasks/smoke_tests.yaml + tags: smoke_tests handlers: - name: Restart auditd diff --git a/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/input.desktop b/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/input.desktop index 97e64b5b95..9d94e38319 100644 --- a/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/input.desktop +++ b/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/input.desktop @@ -4,4 +4,4 @@ Type=Link Name=input Comment= Icon=drive-removable-media -URL=/data +URL=/mnt/input diff --git a/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/output.desktop b/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/output.desktop index 4dc474784a..e34950528e 100644 --- a/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/output.desktop +++ b/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/output.desktop @@ -4,4 +4,4 @@ Type=Link Name=output Comment= Icon=drive-removable-media -URL=/output +URL=/mnt/output diff --git a/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/shared.desktop b/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/shared.desktop index 3e4e97fde7..27552b271d 100644 --- a/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/shared.desktop +++ b/data_safe_haven/resources/workspace/ansible/files/etc/skel/Desktop/shared.desktop @@ -4,4 +4,4 @@ Type=Link Name=shared Comment= Icon=drive-removable-media -URL=/shared +URL=/mnt/shared diff --git a/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/run_all_tests.bats b/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/run_all_tests.bats index bc73d824f7..5ce9692c67 100644 --- a/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/run_all_tests.bats +++ b/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/run_all_tests.bats @@ -42,20 +42,24 @@ check_db_credentials() { # Mounted drives # -------------- -@test "Mounted drives (/data)" { - run bash test_mounted_drives.sh -d data +@test "Mounted drives (/mnt/input)" { + run bash test_mounted_drives.sh -d mnt/input [ "$status" -eq 0 ] } @test "Mounted drives (/home)" { run bash test_mounted_drives.sh -d home [ "$status" -eq 0 ] } -@test "Mounted drives (/output)" { - run bash test_mounted_drives.sh -d output +@test "Mounted drives (/mnt/output)" { + run bash test_mounted_drives.sh -d mnt/output [ "$status" -eq 0 ] } -@test "Mounted drives (/shared)" { - run bash test_mounted_drives.sh -d shared +@test "Mounted drives (/mnt/shared)" { + run bash test_mounted_drives.sh -d mnt/shared + [ "$status" -eq 0 ] +} +@test "Mounted drives (/var/local/ansible)" { + run bash test_mounted_drives.sh -d var/local/ansible [ "$status" -eq 0 ] } diff --git a/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/test_mounted_drives.sh b/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/test_mounted_drives.sh index a1812934b9..c74a7b4b48 100644 --- a/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/test_mounted_drives.sh +++ b/data_safe_haven/resources/workspace/ansible/files/usr/local/smoke_tests/test_mounted_drives.sh @@ -26,7 +26,7 @@ CAN_DELETE="$([[ "$(touch "${directory_path}/${testfile}" 2>&1 1>/dev/null && rm # Check that permissions are as expected for each directory case "$directory" in - data) + mnt/input) if [ "$CAN_CREATE" = 1 ]; then echo "Able to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi if [ "$CAN_WRITE" = 1 ]; then echo "Able to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi if [ "$CAN_DELETE" = 1 ]; then echo "Able to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi @@ -38,18 +38,24 @@ case "$directory" in if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi ;; - output) + mnt/output) if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi ;; - shared) + mnt/shared) if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi ;; + var/local/ansible) + if [ "$CAN_CREATE" = 1 ]; then echo "Able to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_WRITE" = 1 ]; then echo "Able to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_DELETE" = 1 ]; then echo "Able to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + ;; + *) echo "Usage: $0 -d [directory]" exit 1 diff --git a/data_safe_haven/resources/workspace/ansible/host_vars/localhost.yaml b/data_safe_haven/resources/workspace/ansible/host_vars/localhost.yaml index 1baab38e7f..55ee5e6340 100644 --- a/data_safe_haven/resources/workspace/ansible/host_vars/localhost.yaml +++ b/data_safe_haven/resources/workspace/ansible/host_vars/localhost.yaml @@ -137,17 +137,14 @@ deb_packages: - source: https://download1.rstudio.org/electron/jammy/amd64 filename: rstudio-2024.04.2-764-amd64.deb sha256: 1d0bd2f54215f514a8a78a4d035c7804218bb8fafa417aa5083d341e174e6452 - creates: /usr/bin/rstudio noble: - source: https://download1.rstudio.org/electron/jammy/amd64 filename: rstudio-2024.04.2-764-amd64.deb sha256: 1d0bd2f54215f514a8a78a4d035c7804218bb8fafa417aa5083d341e174e6452 - creates: /usr/bin/rstudio snap_packages: - name: codium classic: true - - name: dbeaver-ce - classic: false + - name: beekeeper-studio - name: pycharm-community classic: true diff --git a/data_safe_haven/resources/workspace/ansible/install_deb.sh b/data_safe_haven/resources/workspace/ansible/install_deb.sh deleted file mode 100644 index c3d4fb9919..0000000000 --- a/data_safe_haven/resources/workspace/ansible/install_deb.sh +++ /dev/null @@ -1,33 +0,0 @@ -#! /bin/bash - -# Require three arguments: remote name, debfile name and sha256 hash -if [ $# -ne 3 ]; then - echo "FATAL: Incorrect number of arguments" - exit 1 -fi -PACKAGE_REMOTE=$1 -PACKAGE_DEBFILE=$2 -PACKAGE_HASH=$3 - -# Download and verify the .deb file -echo "Downloading and verifying deb file ${PACKAGE_DEBFILE}" -mkdir -p /tmp/build/ -wget -nv "${PACKAGE_REMOTE}/${PACKAGE_DEBFILE}" -P /tmp/build/ -ls -alh "/tmp/build/${PACKAGE_DEBFILE}" -echo "$PACKAGE_HASH /tmp/build/${PACKAGE_DEBFILE}" > "/tmp/${PACKAGE_DEBFILE}_sha256.hash" -if [ "$(sha256sum -c "/tmp/${PACKAGE_DEBFILE}_sha256.hash" | grep FAILED)" != "" ]; then - echo "FATAL: Checksum did not match expected for $PACKAGE_DEBFILE" - exit 1 -fi - -# Wait until the package repository is not in use -while ! apt-get check >/dev/null 2>&1; do - echo "Waiting for another installation process to finish..." - sleep 1 -done - -# Install and cleanup -echo "Installing deb file: ${PACKAGE_DEBFILE}" -apt install -y "/tmp/build/${PACKAGE_DEBFILE}" -echo "Cleaning up" -rm "/tmp/build/${PACKAGE_DEBFILE}" diff --git a/data_safe_haven/resources/workspace/ansible/tasks/auditd.yaml b/data_safe_haven/resources/workspace/ansible/tasks/auditd.yaml new file mode 100644 index 0000000000..212c63aa1e --- /dev/null +++ b/data_safe_haven/resources/workspace/ansible/tasks/auditd.yaml @@ -0,0 +1,32 @@ +--- + +- name: Enable and start auditd service + ansible.builtin.systemd: + name: auditd + enabled: true + state: started + +- name: Get minimum uid # noqa: inline-env-var + ansible.builtin.command: + cmd: awk '/^\s*UID_MIN/{print $2}' /etc/login.defs + register: uid_min + changed_when: false + +- name: Template auditd rules + ansible.builtin.template: + src: etc/audit/rules.d/audit.rules.j2 + dest: /etc/audit/rules.d/audit.rules + mode: '0640' + notify: Restart auditd + +- name: Copy auditd privileged executable rules script + ansible.builtin.copy: + src: usr/local/bin/privileged-rules + dest: /usr/local/bin/privileged-rules + mode: '0500' + +- name: Generate auditd privileged executable rules + ansible.builtin.shell: + cmd: /usr/local/bin/privileged-rules > /etc/audit/rules.d/50-privileged.rules + creates: /etc/audit/rules.d/50-privileged.rules + notify: Restart auditd diff --git a/data_safe_haven/resources/workspace/ansible/tasks/clamav.yaml b/data_safe_haven/resources/workspace/ansible/tasks/clamav.yaml new file mode 100644 index 0000000000..4bb887f571 --- /dev/null +++ b/data_safe_haven/resources/workspace/ansible/tasks/clamav.yaml @@ -0,0 +1,62 @@ +--- + +- name: Copy ClamAV daemon configuration + ansible.builtin.copy: + src: etc/clamav/clamd.conf + dest: /etc/clamav/clamd.conf + mode: '0444' + owner: clamav + group: adm + register: clamd + +- name: Enable and start ClamAV daemon + ansible.builtin.systemd: + name: clamav-daemon + enabled: true + state: started + +- name: Restart ClamAV daemon # noqa: no-handler + ansible.builtin.systemd: + name: clamav-daemon + state: restarted + when: clamd.changed + +- name: Set freshclam private mirror + ansible.builtin.lineinfile: + path: /etc/clamav/freshclam.conf + line: "PrivateMirror {{ clamav_mirror_hostname }}" + state: present + +# This is required to fetch definitions for the clamav daemon to run +- name: Initial freshclam run # noqa: command-instead-of-module + ansible.builtin.shell: + cmd: | + systemctl stop clamav-freshclam && freshclam && systemctl start clamav-freshclam + creates: '/var/lib/clamav/main.{c[vl]d,inc}' + +- name: Copy ClamAV services + ansible.builtin.copy: + src: "{{ item }}" + dest: /etc/systemd/system/ + mode: '0644' + with_fileglob: + - "etc/systemd/system/clamav-*" + notify: Systemd daemon reload + +- name: Enable and start freshclam + ansible.builtin.systemd: + name: clamav-freshclam + state: started + enabled: true + +- name: Enable and start ClamAV on access scan + ansible.builtin.systemd: + name: clamav-clamonacc + enabled: true + state: started + +- name: Enable and start ClamAV timer + ansible.builtin.systemd: + name: clamav-clamdscan.timer + enabled: true + state: started diff --git a/data_safe_haven/resources/workspace/ansible/tasks/install_deb.yaml b/data_safe_haven/resources/workspace/ansible/tasks/install_deb.yaml new file mode 100644 index 0000000000..f79da81b88 --- /dev/null +++ b/data_safe_haven/resources/workspace/ansible/tasks/install_deb.yaml @@ -0,0 +1,14 @@ +--- + +- name: Fetch deb + ansible.builtin.get_url: + url: "{{ item.source }}/{{ item.filename }}" + dest: "/tmp/{{ item.filename }}" + checksum: "sha256:{{ item.sha256 }}" + mode: '0400' + register: debfile + +- name: Install deb + ansible.builtin.apt: + deb: "{{ debfile.dest }}" + state: present diff --git a/data_safe_haven/resources/workspace/ansible/tasks/ldap.yaml b/data_safe_haven/resources/workspace/ansible/tasks/ldap.yaml new file mode 100644 index 0000000000..7698b1fd2b --- /dev/null +++ b/data_safe_haven/resources/workspace/ansible/tasks/ldap.yaml @@ -0,0 +1,34 @@ +--- + +- name: Add ldap to /etc/nsswitch.conf + ansible.builtin.replace: + path: /etc/nsswitch.conf + regexp: '^(passwd|group|shadow)(:.*)(?=== Mounting all external volumes... ===<" - grep -v -e '^[[:space:]]*$' /etc/fstab | sed 's|^| /etc/fstab |' - mount -fav - - while (! mountpoint -q /data); do sleep 5; mount /data; done - - while (! mountpoint -q /desired_state); do sleep 5; mount /desired_state; done + - while (! mountpoint -q /mnt/input); do sleep 5; mount /mnt/input; done + - while (! mountpoint -q /var/local/ansible); do sleep 5; mount /var/local/ansible; done - while (! mountpoint -q /home); do sleep 5; mount /home; done - - while (! mountpoint -q /output); do sleep 5; mount /output; done - - while (! mountpoint -q /shared); do sleep 5; mount /shared; done + - while (! mountpoint -q /mnt/output); do sleep 5; mount /mnt/output; done + - while (! mountpoint -q /mnt/shared); do sleep 5; mount /mnt/shared; done - findmnt # Enable and start desired state timer @@ -160,5 +92,10 @@ runcmd: # Run desired state service # ------------------------- + - echo ">=== Waiting for Pulumi vars file... ===<" + - while (! test -f /var/local/ansible/vars/pulumi_vars.yaml) do sleep 5; done - echo ">=== Running initial desired state configuration... ===<" - systemctl start desired-state + + # Restart services + - systemctl restart nslcd diff --git a/data_safe_haven/serialisers/azure_serialisable_model.py b/data_safe_haven/serialisers/azure_serialisable_model.py index 389522376f..1a9f206485 100644 --- a/data_safe_haven/serialisers/azure_serialisable_model.py +++ b/data_safe_haven/serialisers/azure_serialisable_model.py @@ -5,7 +5,7 @@ from data_safe_haven.exceptions import ( DataSafeHavenAzureError, DataSafeHavenAzureStorageError, - DataSafeHavenError, + DataSafeHavenTypeError, ) from data_safe_haven.external import AzureSdk @@ -44,9 +44,12 @@ def from_remote( except DataSafeHavenAzureStorageError as exc: msg = f"Storage account '{context.storage_account_name}' does not exist." raise DataSafeHavenAzureStorageError(msg) from exc - except DataSafeHavenError as exc: + except DataSafeHavenAzureError as exc: msg = f"Could not load file '{filename or cls.default_filename}' from Azure storage." raise DataSafeHavenAzureError(msg) from exc + except DataSafeHavenTypeError as exc: + msg = f"'{filename or cls.default_filename}' does not contain a valid {cls.config_type} configuration." + raise DataSafeHavenTypeError(msg) from exc @classmethod def from_remote_or_create( diff --git a/data_safe_haven/serialisers/yaml_serialisable_model.py b/data_safe_haven/serialisers/yaml_serialisable_model.py index 6541defbae..1ab5a031f8 100644 --- a/data_safe_haven/serialisers/yaml_serialisable_model.py +++ b/data_safe_haven/serialisers/yaml_serialisable_model.py @@ -54,9 +54,9 @@ def from_yaml(cls: type[T], settings_yaml: str) -> T: ) for error in exc.errors(): logger.error( - f"[red]{'.'.join(map(str, error.get('loc', [])))}: {error.get('input', '')}[/] - {error.get('msg', '')}" + f"{error.get('msg', '')}: [red]{'.'.join(map(str, error.get('loc', [])))}.[/] Original input: [red]{error.get('input', '')}[/]" ) - msg = f"Could not load {cls.config_type} configuration." + msg = f"{cls.config_type} configuration is invalid." raise DataSafeHavenTypeError(msg) from exc def to_filepath(self, config_file_path: PathType) -> None: diff --git a/data_safe_haven/types/__init__.py b/data_safe_haven/types/__init__.py index 471fb56656..4f2f89b3be 100644 --- a/data_safe_haven/types/__init__.py +++ b/data_safe_haven/types/__init__.py @@ -15,6 +15,7 @@ from .enums import ( AzureDnsZoneNames, AzureSdkCredentialScope, + AzureServiceTag, DatabaseSystem, FirewallPriorities, ForbiddenDomains, @@ -29,6 +30,7 @@ "AzureDnsZoneNames", "AzureLocation", "AzurePremiumFileShareSize", + "AzureServiceTag", "AzureSdkCredentialScope", "AzureSubscriptionName", "AzureVmSku", diff --git a/data_safe_haven/types/enums.py b/data_safe_haven/types/enums.py index 170cbba4a0..35465f260e 100644 --- a/data_safe_haven/types/enums.py +++ b/data_safe_haven/types/enums.py @@ -26,6 +26,11 @@ class AzureSdkCredentialScope(str, Enum): KEY_VAULT = "https://vault.azure.net" +@verify(UNIQUE) +class AzureServiceTag(str, Enum): + INTERNET = "Internet" + + @verify(UNIQUE) class DatabaseSystem(str, Enum): MICROSOFT_SQL_SERVER = "mssql" diff --git a/data_safe_haven/validators/validators.py b/data_safe_haven/validators/validators.py index dd4458ec57..27507d26b4 100644 --- a/data_safe_haven/validators/validators.py +++ b/data_safe_haven/validators/validators.py @@ -124,7 +124,7 @@ def ip_address(ip_address: str) -> str: try: return str(ipaddress.ip_network(ip_address)) except Exception as exc: - msg = "Expected valid IPv4 address, for example '1.1.1.1'." + msg = "Expected valid IPv4 address, for example '1.1.1.1', or 'Internet'." raise ValueError(msg) from exc diff --git a/data_safe_haven/version.py b/data_safe_haven/version.py index bfb9e4b4b8..0513a64c8f 100644 --- a/data_safe_haven/version.py +++ b/data_safe_haven/version.py @@ -1,2 +1,2 @@ -__version__ = "5.0.0" +__version__ = "5.0.1" __version_info__ = tuple(__version__.split(".")) diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index a2cf542beb..0000000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 488a2ab2b8..0000000000 --- a/docs/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Documentation - -The documentation is built from Markdown files using [Sphinx](https://www.sphinx-doc.org/) and [MyST parser](https://myst-parser.readthedocs.io/). - -## Building the Documentation - -Create a virtual environment - -```console -python3 -m venv ./venv -source ./venv/bin/activate -``` - -Install the python dependencies (specified in [`requirements.txt`](./requirements.txt)) - -```console -pip install -r requirements.txt -``` - -Use the [`Makefile`](./Makefile) to build the document site - -```console -make html -``` - -The generated documents will be placed under `build/html/`. -To view the documents open `build/html/index.html` in your browser. -For example - -```console -firefox build/html/index.html -``` - -## Reproducible Builds - -To improve the reproducibly of build at each commit, [`requirements.txt`](./requirements.txt) contains a complete list of dependencies and specific versions. - -The projects _direct_ dependencies are listed in [`requirements.in`](./requirements.in). -The full list is then generated using [`pip-compile`](https://pip-tools.readthedocs.io/en/latest/#requirements-from-requirements-in) - -```console -pip-compile requirements.in -``` - -### Updating Requirements - -All requirements can be updated with - -```console -pip-compile --upgrade requirements.in -``` - -Your virtual environment can be updated with - -```console -pip-sync -``` diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md index 3d51d07e46..37f5e26f9d 100644 --- a/docs/source/contributing/index.md +++ b/docs/source/contributing/index.md @@ -64,10 +64,13 @@ harisood
harisood

๐Ÿ“– ๐Ÿ› ๐Ÿค” ๐Ÿ” ๐Ÿ“‹ ๐Ÿ“† ๐Ÿ“ฃ ๐Ÿ’ฌ ๐Ÿ“ข ๐Ÿ›ก๏ธ ๐Ÿ““ kevinxufs
kevinxufs

๐Ÿ“– ๐Ÿค” ๐Ÿ›ก๏ธ + mattwestby
mattwestby

๐Ÿ› miguelmorin
miguelmorin

๐Ÿ’ป ๐Ÿ“– ๐Ÿค” โš ๏ธ oforrest
oforrest

๐Ÿ“– ๐Ÿค” ๐Ÿ“† ๐Ÿ“ฃ ๐Ÿ–‹ rwinstanley1
rwinstanley1

๐Ÿ“– ๐Ÿค” ๐Ÿ“† ๐Ÿ›ก๏ธ vollmersj
vollmersj

๐Ÿ“– ๐Ÿ› ๐Ÿค” ๐Ÿ–‹ + + warwick26
warwick26

๐Ÿ’ป ๐Ÿค” diff --git a/docs/source/deployment/configure_entra_id.md b/docs/source/deployment/configure_entra_id.md index 4be5339895..ea1cca6b7e 100644 --- a/docs/source/deployment/configure_entra_id.md +++ b/docs/source/deployment/configure_entra_id.md @@ -36,17 +36,18 @@ Follow the instructions [here](https://learn.microsoft.com/en-us/entra/fundament Use the following settings: - **Basics** tab: - - **User principal name:** entra.admin._FIRST_NAME_._LAST_NAME_ + - **User principal name:** entra.admin._first_name_._last_name_ - If you have a choice of domains use _YOUR_ORGANISATION_.onmicrosoft.com, which will create a clearer separation between administrators and users - - **Display name:** Entra Admin - _FIRST_NAME_ _LAST_NAME_ + - **Display name:** Entra Admin - _first_name_ _last_name_ - **Other fields:** leave them with their default values - **Properties** tab: - **Usage location:** set to the country being used for this deployment -- **Assigments** tab: +- **Assignments** tab: - Click the **{guilabel}`+ Add role`** button - Search for **Global Administrator**, and check the box - Click the **{guilabel}`Select`** button +Click the **{guilabel}`Review + Create`** button ::: ## Register allowed authentication methods diff --git a/docs/source/deployment/deploy_shm.md b/docs/source/deployment/deploy_shm.md index c1cf238b09..b26d451bfb 100644 --- a/docs/source/deployment/deploy_shm.md +++ b/docs/source/deployment/deploy_shm.md @@ -17,29 +17,16 @@ However, you may choose to use multiple SHMs if, for example, you want to separa ## Requirements - A [Microsoft Entra](https://learn.microsoft.com/en-us/entra/fundamentals/) tenant for managing your users - - An account with [Global Administrator](https://learn.microsoft.com/en-us/entra/global-secure-access/reference-role-based-permissions#global-administrator) privileges on this tenant + - An account with [Global Administrator](https://learn.microsoft.com/en-us/entra/global-secure-access/reference-role-based-permissions#global-administrator) privileges on the tenant that you set up in the {ref}`configure_entra_id` step. - An Azure subscription where you will deploy your infrastructure - An account with at least [Contributor](https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles/general#contributor) permissions on this subscription ## Deployment -::::{admonition} Ensure you are using a hatch shell -:class: dropdown important - -You must use a `hatch` shell to run any `dsh` commands. -From the project base directory run: - -:::{code} shell -$ hatch shell -::: - -This ensures that you are using the intended version of Data Safe Haven with the correct set of dependencies. -:::: - Before deploying the Safe Haven Management (SHM) infrastructure you need to decide on a few parameters: **entra_tenant_id** -: Tenant ID for the Entra ID used to manage TRE users +: Tenant ID for the Entra tenant you will be using to manage the TRE users :::{admonition} How to find your Microsoft Entra Tenant ID :class: dropdown hint diff --git a/docs/source/deployment/deploy_sre.md b/docs/source/deployment/deploy_sre.md index fb236e7963..2a1e5511a7 100644 --- a/docs/source/deployment/deploy_sre.md +++ b/docs/source/deployment/deploy_sre.md @@ -4,19 +4,6 @@ These instructions will deploy a new Secure Research Environment (SRE). -::::{admonition} Ensure you are using a hatch shell -:class: dropdown important - -You must use a `hatch` shell to run any `dsh` commands. -From the project base directory run: - -:::{code} shell -$ hatch shell -::: - -This ensures that you are using the intended version of Data Safe Haven with the correct set of dependencies. -:::: - ::::{note} As the Basic Application Gateway is still in preview, you will need to run the following commands once per subscription: @@ -82,11 +69,98 @@ sre: :::: +:::{admonition} Supported Azure regions +:class: dropdown important + +Some of the SRE resources are not available in all Azure regions. + +- Workspace virtual machines use zone redundant storage managed disks which have [limited regional availability](https://learn.microsoft.com/en-us/azure/virtual-machines/disks-redundancy). +- Some shares mounted on workspace virtual machines require premium file shares which have [limited regional availability](https://learn.microsoft.com/en-us/azure/storage/files/redundancy-premium-file-shares). + +The regions which satisfy all requirements are, + +- Australia East +- Brazil South +- Canada Central +- Central India +- China North 3 +- East Asia +- East US +- East US 2 +- France Central +- Germany West Central +- Israel Central +- Italy North +- Japan East +- Korea Central +- North Europe +- Norway East +- Poland Central +- Qatar Central +- South Africa North +- South Central US +- Southeast Asia +- Sweden Central +- Switzerland North +- UAE North +- UK South +- US Gov Virginia +- West Europe +- West US 2 +- West US 3 + +::: + +:::{hint} +See [here](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/) for a full list of valid Azure VM SKUs. +::: + :::{important} -All VM SKUs you want to deploy must support premium SSDs. +All VM SKUs you deploy must support premium SSDs. +- SKUs that support premium SSDs have a lower case 's' in their name. +- See [here](https://learn.microsoft.com/en-us/azure/virtual-machines/vm-naming-conventions) for a full naming convention explanation. - See [here](https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types#premium-ssds) for more details on premium SSD support. -- See [here](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/) for a full list of valid SKUs + +::: + +:::{important} +All VM SKUs you deploy must have CPUs with the `x86_64` architecture. + +- SKUs with a lower case 'p' in their name have the ARM architecture and should not be used. +- See [here](https://learn.microsoft.com/en-us/azure/virtual-machines/vm-naming-conventions) for a full naming convention explanation. + +::: + +:::{important} +The antivirus process running on each workspace consumes around 1.3 GiB at idle. +This usage will roughly double for a short period each day while its database is updated. + +You should take this into account when choosing a VM size and pick an SKU with enough memory overhead for your workload and the antivirus service. +::: + +:::{important} +Only GPUs supported by CUDA and the Nvidia GPU drivers can be used. +['N' series](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/overview#gpu-accelerated) SKUs feature GPUs. +The NC and ND families are recommended as they feature GPUs designed for general purpose computation rather than graphics processing. + +There is no key to distinguish SKUs with Nvidia GPUs, however newer SKUs contain the name of the accelerator. +::: + +:::{hint} +Picking a good VM size depends on a lot of variables. +You should think about your expected use case and what kind of resources you need. + +As some general recommendations, + +- For general purpose use, the D family gives decent performance and a good balance of CPU and memory. + The [Dsv6 series](https://learn.microsoft.com/en-us/azure/virtual-machines/sizes/general-purpose/dsv6-series#sizes-in-series) is a good starting point and can be scaled from 2 CPUs and 8 GB RAM to 128 CPUs and 512 GB RAM. + - `Standard_D8s_v6` should give reasonable performance for a single concurrent user. +- For GPU accelerated work, the NC family provides Nvidia GPUs and a good balance of CPU and memory. + In order of increasing throughput, the `NCv3` series features Nvidia V100 GPUs, the `NC_A100_v4` series features Nvidia A100 GPUs, and the `NCads_H100_v5` series features Nvidia H100 GPUs. + - `Stanard_NC6s_v3` should give reasonable performance for a single concurrent user with AI/ML workloads. + Scaling up in the same series (for example `Standard_NC12s_v3`) gives more accelerators of the same type. + Alternatively a series with more recent GPUs should give better performance. ::: diff --git a/docs/source/deployment/index.md b/docs/source/deployment/index.md index 14082e3163..435d32ceb5 100644 --- a/docs/source/deployment/index.md +++ b/docs/source/deployment/index.md @@ -21,8 +21,9 @@ Deploying an instance of the Data Safe Haven involves the following steps: Install the following requirements before starting +- [Python 3.12](https://wiki.python.org/moin/BeginnersGuide/Download) - [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) -- [Hatch](https://hatch.pypa.io/1.9/install/) +- [pipx](https://pipx.pypa.io/stable/installation/) - [Pulumi](https://www.pulumi.com/docs/get-started/install/) ### Docker Hub @@ -38,9 +39,33 @@ See [the instructions here](https://docs.docker.com/security/for-developers/acce ## Install the project -- Download or checkout the [latest supported version](https://github.com/alan-turing-institute/data-safe-haven/blob/develop/SECURITY.md) of this code from [GitHub](https://github.com/alan-turing-institute/data-safe-haven). -- Enter the base directory and install Python dependencies with `hatch` by doing the following: +- Look up the [latest supported version](https://github.com/alan-turing-institute/data-safe-haven/blob/develop/SECURITY.md) of this code from [GitHub](https://github.com/alan-turing-institute/data-safe-haven). +- Install the executable with `pipx` by running: :::{code} shell -$ hatch run true +$ pipx install data-safe-haven +::: + +- Or install a specific version with + +:::{code} shell +$ pipx install data-safe-haven==5.0.0 +::: + +::::{admonition} [Advanced] install into a virtual environment +:class: dropdown caution + +If you prefer, you can install this package into a virtual environment: + +:::{code} shell +$ python -m venv /path/to/new/virtual/environment +$ source /path/to/new/virtual/environment/bin/activate +$ pip install data-safe-haven +::: +:::: + +- Test that this has worked by checking the version + +:::{code} shell +$ dsh --version ::: diff --git a/docs/source/deployment/security_checklist.md b/docs/source/deployment/security_checklist.md index 451a4fd070..2737b1cb5c 100644 --- a/docs/source/deployment/security_checklist.md +++ b/docs/source/deployment/security_checklist.md @@ -454,12 +454,12 @@ To minimise the risk of unauthorised access to the dataset while the ingress vol ### Turing configuration setting: -- Research users can write to the `/output` volume. -- A {ref}`role_system_manager` can view and download data in the `/output` volume via `Azure Storage Explorer`. +- Research users can write to the `/mnt/output` volume. +- A {ref}`role_system_manager` can view and download data in the `/mnt/output` volume via `Azure Storage Explorer`. ### Implication: -- SREs contain an `/output` volume, in which SRE users can store data designated for egress. +- SREs contain an `/mnt/output` volume, in which SRE users can store data designated for egress. ### Verify by: @@ -469,7 +469,7 @@ To minimise the risk of unauthorised access to the dataset while the ingress vol - Open up a file explorer and search for the various storage volumes ```{attention} -{{white_check_mark}} Verify that: the `/output` volume exists and can be read and written to. +{{white_check_mark}} Verify that: the `/mnt/output` volume exists and can be read and written to. ``` ```{attention} @@ -481,7 +481,7 @@ To minimise the risk of unauthorised access to the dataset while the ingress vol - As the {ref}`role_system_manager`, follow the instructions in the [project manager documentation](../roles/project_manager/data_egress.md#data-egress-process) on how to access files set for egress with `Azure Storage Explorer`. ```{attention} -{{white_check_mark}} Verify that: you can see the files written to the `/output` storage volume. +{{white_check_mark}} Verify that: you can see the files written to the `/mnt/output` storage volume. ``` ```{attention} diff --git a/docs/source/design/security/reference_configuration.md b/docs/source/design/security/reference_configuration.md index 5d508cdebd..fa848ec730 100644 --- a/docs/source/design/security/reference_configuration.md +++ b/docs/source/design/security/reference_configuration.md @@ -102,12 +102,12 @@ To minimise the risk of unauthorised access to the dataset while the ingress vol ### Turing configuration setting:: -- Users can write to the `/output` volume. -- A {ref}`role_system_manager` can view and download data in the `/output` volume via **Azure Storage Explorer**. +- Users can write to the `/mnt/output` volume. +- A {ref}`role_system_manager` can view and download data in the `/mnt/output` volume via **Azure Storage Explorer**. ### Implication: -- SRE users can mark data as ready for egress approval by placing it in the `/output` volume. +- SRE users can mark data as ready for egress approval by placing it in the `/mnt/output` volume. ## 9. Software ingress diff --git a/docs/source/index.md b/docs/source/index.md index 2383953e7e..764b06ec38 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -27,7 +27,7 @@ We have developed: - A proposed default set of technical security measures for each tier. - A set of infrastructure-as-code tools which will allow anyone to deploy their own isolated research environment. -If this sounds interesting to you, take a look at our GitHub releases: [![Data Safe Haven releases](https://img.shields.io/static/v1?label=Data%20Safe%20Haven&message=Releases&style=flat&logo=github)](https://github.com/alan-turing-institute/data-safe-haven/releases). +If this sounds interesting to you, take a look at our GitHub releases: [![Data Safe Haven releases](https://img.shields.io/static/v1?label=Data%20Safe%20Haven&message=Releases&style=flat&logo=github)](https://github.com/alan-turing-institute/mnt/input-safe-haven/releases). ## Documentation structure diff --git a/docs/source/management/egress_token_read_only.png b/docs/source/management/egress_token_read_only.png new file mode 100644 index 0000000000..a28ca2e99d Binary files /dev/null and b/docs/source/management/egress_token_read_only.png differ diff --git a/docs/source/management/index.md b/docs/source/management/index.md index 931d5e003a..e9f49a5733 100644 --- a/docs/source/management/index.md +++ b/docs/source/management/index.md @@ -1,6 +1,8 @@ # Management -## Add users to the Data Safe Haven +## Managing users + +### Add users to the Data Safe Haven :::{important} You will need a full name, phone number, email address and country for each user. @@ -27,7 +29,7 @@ Grace;Hopper;+18005550100;grace@nasa.gov;US $ dsh users add PATH_TO_MY_CSV_FILE ``` -## Listing available users +### List available users - You can do this from the [Microsoft Entra admin centre](https://entra.microsoft.com/) @@ -54,7 +56,7 @@ $ dsh users add PATH_TO_MY_CSV_FILE โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ ``` -## Assign existing users to an SRE +### Assign existing users to an SRE 1. You can do this directly in your Entra tenant by adding them to the **Data Safe Haven SRE _YOUR\_SRE\_NAME_ Users** group, following the instructions [here](https://learn.microsoft.com/en-us/entra/fundamentals/groups-view-azure-portal#add-a-group-member). @@ -70,7 +72,7 @@ $ dsh users add PATH_TO_MY_CSV_FILE Do not include the Entra ID domain part of the username, just the part before the @. ::: -## Manually register users for self-service password reset +### Manually register users for self-service password reset :::{tip} Users created via the `dsh users` command line tool will be automatically registered for SSPR. @@ -87,7 +89,30 @@ If you have manually created a user and want to enable SSPR, do the following - **Email:** enter the user's email address here - Click the **{guilabel}`Save`** icon in the top panel -## Removing a deployed Data Safe Haven +## Managing SREs + +### List available SRE configurations and deployment status + +- Run the following if you want to check what SRE configurations are available in the current context, and whether those SREs are deployed + +```{code} shell +$ dsh config available +``` + +which will give output like the following + +```{code} shell +Available SRE configurations for context 'green': +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“ +โ”ƒ SRE Name โ”ƒ Deployed โ”ƒ +โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ +โ”‚ emerald โ”‚ x โ”‚ +โ”‚ jade โ”‚ โ”‚ +โ”‚ olive โ”‚ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### Remove a deployed Data Safe Haven - Run the following if you want to teardown a deployed SRE: @@ -95,8 +120,96 @@ If you have manually created a user and want to enable SSPR, do the following $ dsh sre teardown YOUR_SRE_NAME ``` +::::{admonition} Tearing down an SRE is destructive and irreversible +:class: danger +Running `dsh sre teardown` will destroy **all** resources deployed within the SRE. +Ensure that any desired outputs have been extracted before deleting the SRE. +**All** data remaining on the SRE will be deleted. +The user groups for the SRE on Microsoft Entra ID will also be deleted. +:::: + - Run the following if you want to teardown the deployed SHM: ```{code} shell $ dsh shm teardown ``` + +::::{admonition} Tearing down an SHM +:class: warning +Tearing down the SHM permanently deletes **all** remotely stored configuration and state data. +Tearing down the SHM also renders the SREs inaccessible to users and prevents them from being fully managed using the CLI. +All SREs associated with the SHM should be torn down before the SHM is torn down. +:::: + +## Managing data ingress and egress + +### Data Ingress + +It is the {ref}`role_data_provider_representative`'s responsibility to upload the data required by the safe haven. + +The following steps show how to generate a temporary, write-only upload token that can be securely sent to the {ref}`role_data_provider_representative`, enabling them to upload the data: + +- In the Azure portal select **Subscriptions** then navigate to the subscription containing the relevant SHM +- Search for the resource group: `shm--sre--rg`, then click through to the storage account ending with `sensitivedata` +- Browse to **{menuselection}`Settings --> Networking`** and ensure that the data provider's IP address is one of those allowed under the **Firewall** header + - If it is not listed, modify and reupload the SRE configuration and redeploy the SRE using the `dsh` CLI, as per {ref}`deploy_sre` +- Browse to **{menuselection}`Data storage --> Containers`** from the menu on the left hand side +- Click **ingress** +- Browse to **{menuselection}`Settings --> Shared access tokens`** and do the following: + - Under **Signing method**, select **User delegation key** + - Under **Permissions**, check these boxes: + - **Write** + - **List** + - Set a 24 hour time window in the **Start and expiry date/time** (or an appropriate length of time) + - Leave everything else as default and click **{guilabel}`Generate SAS token and URL`** + - Copy the **Blob SAS URL** + + ```{image} ingress_token_write_only.png + :alt: write-only SAS token + :align: center + ``` + +- Send the **Blob SAS URL** to the data provider through a secure channel +- The data provider should now be able to upload data +- Validate successful data ingress + - Browse to **{menuselection}`Data storage --> Containers`** (in the middle of the page) + - Select the **ingress** container and ensure that the uploaded files are present + +### Data egress + +```{important} +Assessment of output must be completed **before** an egress link is created. +Outputs are potentially sensitive, and so an appropriate process must be applied to ensure that they are suitable for egress. +``` + +The {ref}`role_system_manager` creates a time-limited and IP restricted link to remove data from the environment. + +- In the Azure portal select **Subscriptions** then navigate to the subscription containing the relevant SHM +- Search for the resource group: `shm--sre--rg`, then click through to the storage account ending with `sensitivedata` +- Browse to **{menuselection}`Settings --> Networking`** and check the list of pre-approved IP addresses allowed under the **Firewall** header + - Ensure that the IP address of the person to receive the outputs is listed + - If it is not listed, modify and reupload the SRE configuration and redeploy the SRE using the `dsh` CLI, as per {ref}`deploy_sre` +- Browse to **{menuselection}`Data storage --> Containers`** +- Select the **egress** container +- Browse to **{menuselection}`Settings --> Shared access tokens`** and do the following: + - Under **Signing method**, select **User delegation key** + - Under **Permissions**, check these boxes: + - **Read** + - **List** + - Set a time window in the **Start and expiry date/time** that gives enough time for the person who will perform the secure egress download to do so + - Leave everything else as default and press **{guilabel}`Generate SAS token and URL`** + - Copy the **Blob SAS URL** + + ```{image} egress_token_read_only.png + :alt: Read-only SAS token + :align: center + ``` + +- Send the **Blob SAS URL** to the relevant person through a secure channel +- The appropriate person should now be able to download data + +### The output volume + +Once you have set up the egress connection in Azure Storage Explorer, you should be able to view data from the **output volume**, a read-write area intended for the extraction of results, such as figures for publication. +On the workspaces, this volume is `/mnt/output` and is shared between all workspaces in an SRE. +For more information on shared SRE storage volumes, consult the {ref}`Safe Haven User Guide `. diff --git a/docs/source/management/ingress_token_write_only.png b/docs/source/management/ingress_token_write_only.png new file mode 100644 index 0000000000..34829ee4fa Binary files /dev/null and b/docs/source/management/ingress_token_write_only.png differ diff --git a/docs/source/roles/data_provider_representative/data_ingress.md b/docs/source/roles/data_provider_representative/data_ingress.md index e0f80c8ce2..cb4e12faa4 100644 --- a/docs/source/roles/data_provider_representative/data_ingress.md +++ b/docs/source/roles/data_provider_representative/data_ingress.md @@ -5,9 +5,22 @@ The **Dataset Provider Representative** plays an important role in data ingress. As well as being involved in agreeing an appropriate security tier for a project, they may also prepare the data to be uploaded. -## Preparing data +## Bringing data into the environment + +Talk to your {ref}`role_system_manager` to discuss possible methods of bringing data into the environments. +It may be convenient to use [Azure Storage Explorer](https://azure.microsoft.com/en-us/products/storage/storage-explorer/). +In this case you will not need log-in credentials, as your {ref}`role_system_manager` can provide a short-lived secure access token which will let you upload data. + +```{tip} +You may want to keep the following considerations in mind when transferring data in order to reduce the chance of a data breach. + +- Use of short-lived access tokens limits the time within which an attacker can operate. +- Letting your {ref}`role_system_manager` know a fixed IP address you will be connecting from (_e.g._ a corporate VPN) limits the places an attacker can operate from. +- Communicating with your {ref}`role_system_manager` through a secure out-of-band channel (_e.g._ encrypted email) reduces the chances that an attacker can intercept or alter your messages in transit. + +``` -This section has some recommendations for preparing input data for the Data Safe Haven. +## Preparing input data for the Data Safe Haven ### Avoid archives @@ -61,10 +74,10 @@ They are common algorithms built into many operating systems, and included in th You can generate a checksum file, which can be used to verify the integrity of files. If you upload this file then researchers will be able to independently verify data integrity within the environment. -Here are instructions to generate a checksum file using the `md5sum` algorithm for a data set stored in a directory called `data`. +Here are instructions to generate a checksum file using the `md5sum` algorithm for a data set stored in the `input` directory. ```console -find ./data/ -type fl -exec md5sum {} + > hashes.txt +find ./mnt/input/ -type fl -exec md5sum {} + > hashes.txt ``` `find` searches the `data` directory for files and symbolic links (`-type fl`). @@ -86,16 +99,3 @@ md5sum -c hashes.txt | grep FAILED ``` To use the `sha256` algorithm, replace `md5sum` with `sha256` in the above commands. - -## Bringing data into the environment - -Talk to your {ref}`role_system_manager` to discuss possible methods of bringing data into the environments. -It may be convenient to use [Azure Storage Explorer](https://azure.microsoft.com/en-us/products/storage/storage-explorer/). -In this case you will not need log-in credentials, as your {ref}`role_system_manager` can provide a short-lived secure access token which will let you upload data. - -```{tip} -You may want to keep the following considerations in mind when transferring data in order to reduce the chance of a data breach -- use of short-lived access tokens limits the time within which an attacker can operate -- letting your {ref}`role_system_manager` know a fixed IP address you will be connecting from (eg. a corporate VPN) limits the places an attacker can operate from -- communicating with your {ref}`role_system_manager` through a secure out-of-band channel (eg. encrypted email) reduces the chances that an attacker can intercept or alter your messages in transit -``` diff --git a/docs/source/roles/researcher/accessing_the_sre.md b/docs/source/roles/researcher/accessing_the_sre.md index eac7fe804f..3f6600df72 100644 --- a/docs/source/roles/researcher/accessing_the_sre.md +++ b/docs/source/roles/researcher/accessing_the_sre.md @@ -175,5 +175,5 @@ When you are connected to a workspace, you may switch to another by bringing up ::: :::{tip} -Any files in the **/output/**, **/home/** or **/shared** folders on other workspaces will be available in this workspace too. +Any files in the **/mnt/output/**, **/home/** or **/mnt/shared** folders on other workspaces will be available in this workspace too. ::: diff --git a/docs/source/roles/researcher/images/db_beekeeper_studio_mssql.png b/docs/source/roles/researcher/images/db_beekeeper_studio_mssql.png new file mode 100644 index 0000000000..fede151a4a Binary files /dev/null and b/docs/source/roles/researcher/images/db_beekeeper_studio_mssql.png differ diff --git a/docs/source/roles/researcher/images/db_beekeeper_studio_postgres.png b/docs/source/roles/researcher/images/db_beekeeper_studio_postgres.png new file mode 100644 index 0000000000..473f40cda2 Binary files /dev/null and b/docs/source/roles/researcher/images/db_beekeeper_studio_postgres.png differ diff --git a/docs/source/roles/researcher/images/db_dbeaver_connect_mssql.png b/docs/source/roles/researcher/images/db_dbeaver_connect_mssql.png deleted file mode 100644 index 273d53a993..0000000000 Binary files a/docs/source/roles/researcher/images/db_dbeaver_connect_mssql.png and /dev/null differ diff --git a/docs/source/roles/researcher/images/db_dbeaver_connect_postgresql.png b/docs/source/roles/researcher/images/db_dbeaver_connect_postgresql.png deleted file mode 100644 index ac1e79cf76..0000000000 Binary files a/docs/source/roles/researcher/images/db_dbeaver_connect_postgresql.png and /dev/null differ diff --git a/docs/source/roles/researcher/images/db_dbeaver_driver_download.png b/docs/source/roles/researcher/images/db_dbeaver_driver_download.png deleted file mode 100644 index a2225657b1..0000000000 Binary files a/docs/source/roles/researcher/images/db_dbeaver_driver_download.png and /dev/null differ diff --git a/docs/source/roles/researcher/images/db_dbeaver_select_mssql.png b/docs/source/roles/researcher/images/db_dbeaver_select_mssql.png deleted file mode 100644 index ea5b7e9e41..0000000000 Binary files a/docs/source/roles/researcher/images/db_dbeaver_select_mssql.png and /dev/null differ diff --git a/docs/source/roles/researcher/images/db_dbeaver_select_postgresql.png b/docs/source/roles/researcher/images/db_dbeaver_select_postgresql.png deleted file mode 100644 index 75f0d019d3..0000000000 Binary files a/docs/source/roles/researcher/images/db_dbeaver_select_postgresql.png and /dev/null differ diff --git a/docs/source/roles/researcher/snippets/software_database.partial.md b/docs/source/roles/researcher/snippets/software_database.partial.md index e47f443c17..d76766476e 100644 --- a/docs/source/roles/researcher/snippets/software_database.partial.md +++ b/docs/source/roles/researcher/snippets/software_database.partial.md @@ -1,3 +1,3 @@ -- `DBeaver` desktop database management software +- `Beekeeper Studio` desktop database management software - `psql` a command line PostgreSQL client - `unixodbc-dev` driver for interacting with Microsoft SQL databases diff --git a/docs/source/roles/researcher/using_the_sre.md b/docs/source/roles/researcher/using_the_sre.md index 8c1aecf614..51a7b60abf 100644 --- a/docs/source/roles/researcher/using_the_sre.md +++ b/docs/source/roles/researcher/using_the_sre.md @@ -53,8 +53,8 @@ For instance, describing in detail what a dataset contains and how it will be us SREs are designed to be ephemeral and only deployed for as long as necessary. It is likely that the infrastructure, and data, will be permanently deleted when work has concluded. -The `/output/` directory is designed for storing output to be kept after a project concludes. -You should move such data to the `/output/` directory and contact your designated contact about data egress. +The `/mnt/output/` directory is designed for storing output to be kept after a project concludes. +You should move such data to the `/mnt/output/` directory and contact your designated contact about data egress. :::{important} You are responsible for deciding what is worth archiving. @@ -63,8 +63,8 @@ You are responsible for deciding what is worth archiving. While working on the project: - store all your code in a **Gitea** repository. -- store all resources that might be useful to the rest of the project in the **/shared/** folder. -- store anything that might form an output from the project (_e.g._ images, documents or output datasets) in the **/output/** folder. +- store all resources that might be useful to the rest of the project in the **/mnt/shared/** folder. +- store anything that might form an output from the project (_e.g._ images, documents or output datasets) in the **/mnt/output/** folder. See {ref}`the section on sharing files ` to find out more about where to store your files. @@ -244,45 +244,45 @@ Type `yes` to install the packages. There are several shared folder on each workspace that all collaborators within a research project team can see and access: -- [input data](#input-data): in the **/data/** folder -- [shared space](#shared-space): in the **/shared/** folder -- [output resources](#output-resources): in the **/output/** folder +- [input data](#input-data): in the **/mnt/input/** folder +- [shared space](#shared-space): in the **/mnt/shared/** folder +- [output resources](#output-resources): in the **/mnt/output/** folder ### Input data -Data that has been approved and brought into the secure research environment can be found in the **/data/** folder. +Data that has been approved and brought into the secure research environment can be found in the **/mnt/input/** folder. -- The contents of **/data/** will be identical on all workspaces in your SRE. +- The contents of **/mnt/input/** will be identical on all workspaces in your SRE. - Everyone working on your project will be able to access it. - Everyone has **read-only access** to the files stored here. -If you are using the Data Safe Haven as part of an organised event, you might find additional resources in the **/data/** folder, such as example slides or document templates. +If you are using the Data Safe Haven as part of an organised event, you might find additional resources in the **/mnt/input/** folder, such as example slides or document templates. :::{important} -You will not be able to change any of the files in **/data/**. -If you want to make derived datasets, for example cleaned and reformatted data, please add those to the **/shared/** or **/output/** folders. +You will not be able to change any of the files in **/mnt/input/**. +If you want to make derived datasets, for example cleaned and reformatted data, please add those to the **/mnt/shared/** or **/mnt/output/** folders. ::: ### Shared space -The **/shared/** folder should be used for any work that you want to share with your group. +The **/mnt/shared/** folder should be used for any work that you want to share with your group. -- The contents of **/shared/** will be identical on all workspaces in your SRE. +- The contents of **/mnt/shared/** will be identical on all workspaces in your SRE. - Everyone working on your project will be able to access it - Everyone has **read-and-write access** to the files stored here. ### Output resources -Any outputs that you want to extract from the secure environment should be placed in the **/output/** folder on the workspace. +Any outputs that you want to extract from the secure environment should be placed in the **/mnt/output/** folder on the workspace. -- The contents of **/output/** will be identical on all workspaces in your SRE. +- The contents of **/mnt/output/** will be identical on all workspaces in your SRE. - Everyone working on your project will be able to access it - Everyone has **read-and-write access** to the files stored here. Anything placed in here will be considered for data egress - removal from the secure research environment - by the project's principal investigator together with the data provider. :::{tip} -You may want to consider having subfolders of **/output/** to make the review of this directory easier. +You may want to consider having subfolders of **/mnt/output/** to make the review of this directory easier. ::: ## {{pill}} Version control using Gitea @@ -529,7 +529,7 @@ Official tutorials for [MSSQL](https://learn.microsoft.com/en-us/sql/sql-server/ :class: dropdown note - **Server name** : mssql._SRE\_URL_ (e.g. mssql.sandbox.projects.example.org) -- **Username**: databaseadmin +- **Username**: databaseadmin@shm-_SHM\_NAME_-sre-_SRE\_NAME_-db-server-mssql - **Password**: provided by your {ref}`System Manager ` - **Database name**: provided by your {ref}`System Manager ` - **Port**: 1433 @@ -547,111 +547,57 @@ Official tutorials for [MSSQL](https://learn.microsoft.com/en-us/sql/sql-server/ ::: -Examples are given below for connecting using **DBeaver**, **Python** and **R**. +Examples are given below for connecting using **Beekeeper Studio**, **Python** and **R**. The instructions for using other graphical interfaces or programming languages will be similar. -### {{bear}} Connecting using DBeaver - -#### Microsoft SQL +### {{bee}} Connecting using Beekeeper Studio -::::{admonition} 1. Create new Microsoft SQL server connection +::::{admonition} Microsoft SQL :class: dropdown note -Click on the **{guilabel}`New database connection`** button (which looks a bit like an electrical plug with a plus sign next to it) - +- Click on the **{guilabel}`+ New Connection`** button - Select **SQL Server** as the database type - - :::{image} images/db_dbeaver_select_mssql.png - :alt: DBeaver select Microsoft SQL +- Enter the connection details + - **Authentication**: Username/Password + - **Host**: as above + - **Port**: as above + - **Enable SSL**: false + - **User**: as above + - **Password**: as above + - **Domain**: empty + - **Trust Server Certificate**: true + - **Default Database**: master +- Click on **{guilabel}`Test`** to test the connection settings +- Click on **{guilabel}`Connect`** to connect to the database or enter a name and click **{guilabel}`Save`** to save the connection settings for future use + + :::{image} images/db_beekeeper_studio_mssql.png + :alt: Beekeeper Studio MS SQL connection configuration :align: center :width: 90% ::: - -:::: - -::::{admonition} 2. Provide connection details -:class: dropdown note - -- **Host**: as above -- **Database**: as above -- **Authentication**: SQL Server Authentication -- **Username**: as above -- **Password**: as above -- Tick **Show All Schemas** -- Tick **Trust server certificate** - - :::{image} images/db_dbeaver_connect_mssql.png - :alt: DBeaver connect with Microsoft SQL - :align: center - :width: 90% - ::: - -:::: - -::::{admonition} 3. Download drivers if needed -:class: dropdown note - -- After clicking finish, you may be prompted to download driver files even though they should be pre-installed. -- Click on the **{guilabel}`Download`** button if this happens. - - :::{image} images/db_dbeaver_driver_download.png - :alt: DBeaver driver download for Microsoft SQL - :align: center - :width: 90% - ::: - -- If drivers are not available contact your {ref}`System Manager ` - :::: -#### PostgreSQL - -::::{admonition} 1. Create new PostgreSQL server connection -:class: dropdown note - -Click on the **{guilabel}`New database connection`** button (which looks a bit like an electrical plug with a plus sign next to it) - -- Select **PostgreSQL** as the database type - - :::{image} images/db_dbeaver_select_postgresql.png - :alt: DBeaver select PostgreSQL - :align: center - :width: 90% - ::: - -:::: - -::::{admonition} 2. Provide connection details -:class: dropdown note - -- **Host**: as above -- **Database**: as above -- **Authentication**: Database Native -- **Username**: as above -- **Password**: as above - - :::{image} images/db_dbeaver_connect_postgresql.png - :alt: DBeaver connect with PostgreSQL - :align: center - :width: 90% - ::: - -:::: - -::::{admonition} 3. Download drivers if needed +::::{admonition} PostgreSQL :class: dropdown note -- After clicking finish, you may be prompted to download driver files even though they should be pre-installed. -- Click on the **{guilabel}`Download`** button if this happens. - - :::{image} images/db_dbeaver_driver_download.png - :alt: DBeaver driver download for PostgreSQL +- Click on the **{guilabel}`+ New Connection`** button +- Select **Postgres** as the database type +- Enter the connection details + - **Connection Mode**: Host and Port + - **Host**: as above + - **Port**: as above + - **Enable SSL**: false + - **User**: as above + - **Password**: as above + - **Default Database**: postgres +- Click on **{guilabel}`Test`** to test the connection settings +- Click on **{guilabel}`Connect`** to connect to the database or enter a name and click **{guilabel}`Save`** to save the connection settings for future use + + :::{image} images/db_beekeeper_studio_postgres.png + :alt: Beekeeper Studio PostgreSQL connection configuration :align: center :width: 90% ::: - -- If drivers are not available contact your {ref}`System Manager ` - :::: ### {{snake}} Connecting using Python diff --git a/pyproject.toml b/pyproject.toml index bf4fcec5b6..d444befc3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,45 +9,89 @@ description = "An open-source framework for creating secure environments to anal authors = [ { name = "Data Safe Haven development team", email = "safehavendevs@turing.ac.uk" }, ] +readme = "README.md" requires-python = "==3.12.*" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Topic :: Security", + "Topic :: System :: Systems Administration", +] license = { text = "BSD-3-Clause" } dependencies = [ - "appdirs>=1.4", - "azure-core>=1.26", - "azure-identity>=1.16.1", - "azure-keyvault-certificates>=4.6", - "azure-keyvault-keys>=4.6", - "azure-keyvault-secrets>=4.6", - "azure-mgmt-automation>=1.0", - "azure-mgmt-compute>=30.3", - "azure-mgmt-containerinstance>=10.1", - "azure-mgmt-dns>=8.0", - "azure-mgmt-keyvault>=10.3", - "azure-mgmt-msi>=7.0", - "azure-mgmt-network>=25.0", - "azure-mgmt-rdbms>=10.1", - "azure-mgmt-resource>=23.0", - "azure-mgmt-storage>=21.1", - "azure-storage-blob>=12.15", - "azure-storage-file-datalake>=12.10", - "azure-storage-file-share>=12.10", - "chevron>=0.14", - "cryptography>=42.0", - "fqdn>=1.5", - "msal>=1.21", - "psycopg>=3.1", - "pulumi>=3.80", - "pulumi-azure-native>=2.49.1", - "pulumi-random>=4.14", - "pulumi-tls>=5.0.3", - "pydantic>=2.4", - "pyjwt>=2.8", - "pytz>=2023.3", - "PyYAML>=6.0", - "rich>=13.4", - "simple-acme-dns>=3.0", - "typer>=0.9", - "websocket-client>=1.5", + "appdirs==1.4.4", + "azure-core==1.31.0", + "azure-identity==1.19.0", + "azure-keyvault-certificates==4.8.0", + "azure-keyvault-keys==4.9.0", + "azure-keyvault-secrets==4.8.0", + "azure-mgmt-compute==33.0.0", + "azure-mgmt-containerinstance==10.1.0", + "azure-mgmt-dns==8.1.0", + "azure-mgmt-keyvault==10.3.1", + "azure-mgmt-msi==7.0.0", + "azure-mgmt-rdbms==10.1.0", + "azure-mgmt-resource==23.1.1", + "azure-mgmt-storage==21.2.1", + "azure-storage-blob==12.23.1", + "azure-storage-file-datalake==12.17.0", + "azure-storage-file-share==12.19.0", + "chevron==0.14.0", + "cryptography==43.0.1", + "fqdn==1.5.1", + "psycopg[binary]==3.1.19", # needed for installation on older MacOS versions + "pulumi-azure-native==2.66.0", + "pulumi-azuread==6.0.0", + "pulumi-random==4.16.6", + "pulumi==3.136.1", + "pydantic==2.9.2", + "pyjwt[crypto]==2.9.0", + "pytz==2024.2", + "pyyaml==6.0.2", + "rich==13.9.2", + "simple-acme-dns==3.1.0", + "typer==0.12.5", + "websocket-client==1.8.0", +] + +[project.urls] +Documentation = "https://data-safe-haven.readthedocs.io" +Issues = "https://github.com/alan-turing-institute/data-safe-haven/issues" +Source = "https://github.com/alan-turing-institute/data-safe-haven" + +[project.optional-dependencies] +docs = [ + "emoji==2.14.0", + "myst-parser==4.0.0", + "pydata-sphinx-theme==0.15.4", + "sphinx-togglebutton==0.3.2", + "sphinx==8.1.3", +] +lint = [ + "ansible-dev-tools==24.9.0", + "ansible==10.5.0", + "black==24.10.0", + "mypy==1.11.2", + "pandas-stubs==2.2.3.241009", + "pydantic==2.9.2", + "ruff==0.6.9", + "types-appdirs==1.4.3.5", + "types-chevron==0.14.2.20240310", + "types-pytz==2024.2.0.20241003", + "types-pyyaml==6.0.12.20240917", + "types-requests==2.32.0.20240914", +] +test = [ + "coverage==7.6.3", + "freezegun==1.5.1", + "pytest-mock==3.14.0", + "pytest==8.3.3", + "requests-mock==1.12.1", ] [project.scripts] @@ -66,41 +110,28 @@ omit= [ "data_safe_haven/resources/*", ] +[tool.hatch.env] +requires = ["hatch-pip-compile"] + [tool.hatch.envs.default] -pre-install-commands = ["pip install -r requirements.txt"] +type = "pip-compile" +lock-filename = ".hatch/requirements.txt" [tool.hatch.envs.docs] +type = "pip-compile" +lock-filename = ".hatch/requirements-docs.txt" detached = true -dependencies = [ - "emoji>=2.10.0", - "myst-parser>=2.0.0", - "pydata-sphinx-theme>=0.15.0", - "Sphinx>=7.3.0", - "sphinx-togglebutton>0.3.0", -] -pre-install-commands = ["pip install -r docs/requirements.txt"] +features = ["docs"] [tool.hatch.envs.docs.scripts] -build = [ - # Treat warnings as errors - "make -C docs html SPHINXOPTS='-W'" -] +build = "sphinx-build -M html docs/source/ docs/build/ --fail-on-warning" +lint = "mdl --style .mdlstyle.rb ./docs/source" [tool.hatch.envs.lint] +type = "pip-compile" +lock-filename = ".hatch/requirements-lint.txt" detached = true -dependencies = [ - "ansible>=10.2.0", - "ansible-dev-tools>=24.7.1", - "black>=24.1.0", - "mypy>=1.0.0", - "pydantic>=2.4", - "ruff>=0.3.4", - "types-appdirs>=1.4.3.5", - "types-chevron>=0.14.2.5", - "types-pytz>=2023.3.0.0", - "types-PyYAML>=6.0.12.11", - "types-requests>=2.31.0.2", -] +features = ["lint"] [tool.hatch.envs.lint.scripts] all = [ @@ -121,14 +152,10 @@ style = [ typing = "mypy {args:data_safe_haven}" [tool.hatch.envs.test] -dependencies = [ - "coverage>=7.5.1", - "freezegun>=1.5", - "pytest>=8.1", - "pytest-mock>=3.14", - "requests-mock>=1.12", -] -pre-install-commands = ["pip install -r requirements.txt"] +type = "pip-compile" +lock-filename = ".hatch/requirements-test.txt" +pip-compile-constraint = "default" +features = ["test"] [tool.hatch.envs.test.scripts] test = "coverage run -m pytest {args: tests}" @@ -142,7 +169,7 @@ path = "data_safe_haven/version.py" disallow_subclassing_any = false # allow subclassing of types from third-party libraries files = "data_safe_haven" # run mypy over this directory mypy_path = "typings" # use this directory for stubs -plugins = "pydantic.mypy" # enable the pydantic plugin +plugins = ["pydantic.mypy"] # enable the pydantic plugin strict = true # enable all optional error checking flags [[tool.mypy.overrides]] @@ -153,19 +180,15 @@ module = [ "azure.keyvault.*", "azure.mgmt.*", "azure.storage.*", - "cryptography.*", "dns.*", "jwt.*", - "msal.*", "numpy.*", - "pandas.*", "psycopg.*", - "pulumi.*", "pulumi_azure_native.*", + "pulumi_azuread.*", "pulumi_random.*", - "pulumi_tls.*", + "pulumi.*", "pymssql.*", - "rich.*", "simple_acme_dns.*", "sklearn.*", "websocket.*", diff --git a/requirements-constraints.txt b/requirements-constraints.txt deleted file mode 100644 index ebcbb8ff2e..0000000000 --- a/requirements-constraints.txt +++ /dev/null @@ -1,29 +0,0 @@ -# Use this file to specify constraints on packages that we do not directly depend on -# It will be used by pip-compile when solving the environment, but only if the package is required otherwise - -# Exclude azure-identity < 1.16.1 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/17) -azure-identity >=1.16.1 - -# Exclude cryptography < 42.0.0 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/8) -# Exclude cryptography < 42.0.2 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/9) -# Exclude cryptography >= 38.0.0, < 42.0.4 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/10) -cryptography >=42.0.4 - -# Exclude dnspython < 2.6.1 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/13) -dnspython >=2.6.1 - -# Exclude idna < 3.7 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/11) -idna >=3.7 - -# Exclude jinja < 3.1.3 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/7) -# Exclude jinja < 3.1.4 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/14) -jinja >=3.1.4 - - -# Exclude requests >= 2.3.0, < 2.31.0 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/3) -# Exclude requests < 2.32.0 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/15) -requests >=2.32.0 - -# Exclude urllib3 >= 2.0.0, < 2.0.6 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/5) -# Exclude urllib3 >= 2.0.0, < 2.2.2 (from https://github.com/alan-turing-institute/data-safe-haven/security/dependabot/18) -urllib3 !=2.0.*,!=2.1.*,!=2.2.0,!=2.2.1 diff --git a/tests/commands/conftest.py b/tests/commands/conftest.py index 6459c84d6c..dab10adb7b 100644 --- a/tests/commands/conftest.py +++ b/tests/commands/conftest.py @@ -26,6 +26,12 @@ def mock_azure_sdk_blob_exists_false(mocker): mocker.patch.object(AzureSdk, "blob_exists", return_value=False) +@fixture +def mock_contextmanager_assert_context(mocker, context) -> Context: + context._entra_application_secret = "dummy-secret" # noqa: S105 + mocker.patch.object(ContextManager, "assert_context", return_value=context) + + @fixture def mock_graph_api_add_custom_domain(mocker): mocker.patch.object( @@ -33,6 +39,15 @@ def mock_graph_api_add_custom_domain(mocker): ) +@fixture +def mock_graph_api_get_application_by_name(mocker, request): + mocker.patch.object( + GraphApi, + "get_application_by_name", + return_value={"appId": request.config.guid_application}, + ) + + @fixture def mock_graph_api_token(mocker): mocker.patch.object(GraphApi, "token", return_value="dummy-token") @@ -174,7 +189,7 @@ def teardown_then_exit(*args, **kwargs): # noqa: ARG001 @fixture -def runner(tmp_contexts): +def runner(tmp_contexts) -> CliRunner: runner = CliRunner( env={ "DSH_CONFIG_DIRECTORY": str(tmp_contexts), diff --git a/tests/commands/test_config_sre.py b/tests/commands/test_config_sre.py index ddd5529903..7460a908eb 100644 --- a/tests/commands/test_config_sre.py +++ b/tests/commands/test_config_sre.py @@ -4,9 +4,10 @@ from data_safe_haven.config import ContextManager, SREConfig from data_safe_haven.config.sre_config import sre_config_name from data_safe_haven.exceptions import ( + DataSafeHavenAzureError, DataSafeHavenAzureStorageError, DataSafeHavenConfigError, - DataSafeHavenError, + DataSafeHavenTypeError, ) from data_safe_haven.external import AzureSdk @@ -41,6 +42,25 @@ def test_show_file(self, mocker, runner, sre_config_yaml, tmp_path): template_text = f.read() assert sre_config_yaml in template_text + def test_show_invalid_config(self, mocker, runner, context, sre_config_yaml): + mocker.patch.object( + SREConfig, "from_remote_by_name", side_effect=DataSafeHavenTypeError(" ") + ) + mock_method = mocker.patch.object( + AzureSdk, "download_blob", return_value=sre_config_yaml + ) + sre_name = "sandbox" + result = runner.invoke(config_command_group, ["show", sre_name]) + + assert result.exit_code == 1 + assert sre_config_yaml in result.stdout + mock_method.assert_called_once_with( + sre_config_name(sre_name), + context.resource_group_name, + context.storage_account_name, + context.storage_container_name, + ) + def test_no_context(self, mocker, runner): sre_name = "sandbox" mocker.patch.object( @@ -73,7 +93,7 @@ def test_no_storage_account(self, mocker, runner): def test_incorrect_sre_name(self, mocker, runner): sre_name = "sandbox" mocker.patch.object( - SREConfig, "from_remote_by_name", side_effect=DataSafeHavenError(" ") + SREConfig, "from_remote_by_name", side_effect=DataSafeHavenAzureError(" ") ) result = runner.invoke(config_command_group, ["show", sre_name]) assert "No configuration exists for an SRE" in result.stdout @@ -261,5 +281,66 @@ def test_upload_no_file(self, mocker, runner): def test_upload_file_does_not_exist(self, mocker, runner): mocker.patch.object(Path, "is_file", return_value=False) result = runner.invoke(config_command_group, ["upload", "fake_config.yaml"]) + assert result.exit_code == 1 assert "Configuration file 'fake_config.yaml' not found." in result.stdout + + def test_upload_invalid_config( + self, mocker, runner, context, sre_config_file, sre_config_yaml + ): + sre_name = "SandBox" + sre_filename = sre_config_name(sre_name) + + mock_exists = mocker.patch.object(SREConfig, "remote_exists", return_value=True) + mocker.patch.object( + SREConfig, "remote_yaml_diff", side_effect=DataSafeHavenTypeError(" ") + ) + mocker.patch.object(AzureSdk, "download_blob", return_value=sre_config_yaml) + + result = runner.invoke(config_command_group, ["upload", str(sre_config_file)]) + + assert result.exit_code == 1 + + mock_exists.assert_called_once_with(context, filename=sre_filename) + assert sre_config_yaml in result.stdout + assert ( + "To overwrite the remote config, use `dsh config upload --force`" + in result.stdout + ) + + def test_upload_invalid_config_force( + self, mocker, runner, context, sre_config_file, sre_config_yaml + ): + sre_name = "SandBox" + sre_filename = sre_config_name(sre_name) + + mocker.patch.object( + SREConfig, "remote_yaml_diff", side_effect=DataSafeHavenTypeError(" ") + ) + mock_upload = mocker.patch.object(AzureSdk, "upload_blob", return_value=None) + + result = runner.invoke( + config_command_group, ["upload", "--force", str(sre_config_file)] + ) + + assert result.exit_code == 0 + + mock_upload.assert_called_once_with( + sre_config_yaml, + sre_filename, + context.resource_group_name, + context.storage_account_name, + context.storage_container_name, + ) + + def test_upload_missing_field( + self, runner, tmp_path, sre_config_yaml_missing_field + ): + config_file_path = tmp_path / "config.yaml" + with open(config_file_path, "w") as f: + f.write(sre_config_yaml_missing_field) + + result = runner.invoke(config_command_group, ["upload", str(config_file_path)]) + assert result.exit_code == 1 + assert "validation errors" in result.stdout + assert "Check for missing" in result.stdout diff --git a/tests/commands/test_sre.py b/tests/commands/test_sre.py index dd6a9fba05..6c4a13f545 100644 --- a/tests/commands/test_sre.py +++ b/tests/commands/test_sre.py @@ -1,33 +1,88 @@ +from pytest import CaptureFixture, LogCaptureFixture +from pytest_mock import MockerFixture +from typer.testing import CliRunner + from data_safe_haven.commands.sre import sre_command_group +from data_safe_haven.config import Context, ContextManager +from data_safe_haven.exceptions import DataSafeHavenAzureError +from data_safe_haven.external import AzureSdk class TestDeploySRE: def test_deploy( self, - runner, + runner: CliRunner, + mock_azuresdk_get_subscription_name, # noqa: ARG002 mock_graph_api_token, # noqa: ARG002 + mock_contextmanager_assert_context, # noqa: ARG002 mock_ip_1_2_3_4, # noqa: ARG002 mock_pulumi_config_from_remote_or_create, # noqa: ARG002 mock_pulumi_config_upload, # noqa: ARG002 mock_shm_config_from_remote, # noqa: ARG002 mock_sre_config_from_remote, # noqa: ARG002 + mock_graph_api_get_application_by_name, # noqa: ARG002 mock_sre_project_manager_deploy_then_exit, # noqa: ARG002 - ): + ) -> None: result = runner.invoke(sre_command_group, ["deploy", "sandbox"]) assert result.exit_code == 1 assert "mock deploy" in result.stdout assert "mock deploy error" in result.stdout - def test_no_context_file(self, runner_no_context_file): + def test_no_application( + self, + caplog: LogCaptureFixture, + runner: CliRunner, + mock_azuresdk_get_subscription_name, # noqa: ARG002 + mock_contextmanager_assert_context, # noqa: ARG002 + mock_graph_api_token, # noqa: ARG002 + mock_ip_1_2_3_4, # noqa: ARG002 + mock_pulumi_config_from_remote_or_create, # noqa: ARG002 + mock_shm_config_from_remote, # noqa: ARG002 + mock_sre_config_from_remote, # noqa: ARG002 + ) -> None: + result = runner.invoke(sre_command_group, ["deploy", "sandbox"]) + assert result.exit_code == 1 + assert ( + "No Entra application 'Data Safe Haven (acmedeployment) Pulumi Service Principal' was found." + in caplog.text + ) + assert "Please redeploy your SHM." in caplog.text + + def test_no_application_secret( + self, + caplog: LogCaptureFixture, + runner: CliRunner, + context: Context, + mocker: MockerFixture, + mock_azuresdk_get_subscription_name, # noqa: ARG002 + mock_graph_api_get_application_by_name, # noqa: ARG002 + mock_graph_api_token, # noqa: ARG002 + mock_ip_1_2_3_4, # noqa: ARG002 + mock_pulumi_config_from_remote_or_create, # noqa: ARG002 + mock_shm_config_from_remote, # noqa: ARG002 + mock_sre_config_from_remote, # noqa: ARG002 + ) -> None: + mocker.patch.object( + AzureSdk, "get_keyvault_secret", side_effect=DataSafeHavenAzureError("") + ) + mocker.patch.object(ContextManager, "assert_context", return_value=context) + result = runner.invoke(sre_command_group, ["deploy", "sandbox"]) + assert result.exit_code == 1 + assert ( + "No Entra application secret 'Pulumi Deployment Secret' was found. Please redeploy your SHM." + in caplog.text + ) + + def test_no_context_file(self, runner_no_context_file) -> None: result = runner_no_context_file.invoke(sre_command_group, ["deploy", "sandbox"]) assert result.exit_code == 1 assert "Could not find file" in result.stdout def test_auth_failure( self, - runner, + runner: CliRunner, mock_azuresdk_get_credential_failure, # noqa: ARG002 - ): + ) -> None: result = runner.invoke(sre_command_group, ["deploy", "sandbox"]) assert result.exit_code == 1 assert "mock get_credential\n" in result.stdout @@ -36,9 +91,9 @@ def test_auth_failure( def test_no_shm( self, capfd, - runner, + runner: CliRunner, mock_shm_config_from_remote_fails, # noqa: ARG002 - ): + ) -> None: result = runner.invoke(sre_command_group, ["deploy", "sandbox"]) out, _ = capfd.readouterr() assert result.exit_code == 1 @@ -48,19 +103,19 @@ def test_no_shm( class TestTeardownSRE: def test_teardown( self, - runner, + runner: CliRunner, mock_graph_api_token, # noqa: ARG002 mock_ip_1_2_3_4, # noqa: ARG002 mock_pulumi_config_from_remote, # noqa: ARG002 mock_shm_config_from_remote, # noqa: ARG002 mock_sre_config_from_remote, # noqa: ARG002 mock_sre_project_manager_teardown_then_exit, # noqa: ARG002 - ): + ) -> None: result = runner.invoke(sre_command_group, ["teardown", "sandbox"]) assert result.exit_code == 1 assert "mock teardown" in result.stdout - def test_no_context_file(self, runner_no_context_file): + def test_no_context_file(self, runner_no_context_file) -> None: result = runner_no_context_file.invoke( sre_command_group, ["teardown", "sandbox"] ) @@ -69,10 +124,10 @@ def test_no_context_file(self, runner_no_context_file): def test_no_shm( self, - capfd, - runner, + capfd: CaptureFixture, + runner: CliRunner, mock_shm_config_from_remote_fails, # noqa: ARG002 - ): + ) -> None: result = runner.invoke(sre_command_group, ["teardown", "sandbox"]) out, _ = capfd.readouterr() assert result.exit_code == 1 @@ -80,9 +135,9 @@ def test_no_shm( def test_auth_failure( self, - runner, + runner: CliRunner, mock_azuresdk_get_credential_failure, # noqa: ARG002 - ): + ) -> None: result = runner.invoke(sre_command_group, ["teardown", "sandbox"]) assert result.exit_code == 1 assert "mock get_credential\n" in result.stdout diff --git a/tests/commands/test_users.py b/tests/commands/test_users.py index a9feb25ee2..c1b183c922 100644 --- a/tests/commands/test_users.py +++ b/tests/commands/test_users.py @@ -26,6 +26,17 @@ def test_invalid_shm( assert result.exit_code == 1 assert "Have you deployed the SHM?" in result.stdout + def test_invalid_sre( + self, + mock_pulumi_config_from_remote, # noqa: ARG002 + mock_shm_config_from_remote, # noqa: ARG002 + runner, + ): + result = runner.invoke(users_command_group, ["list", "my_sre"]) + + assert result.exit_code == 1 + assert "Is the SRE deployed?" in result.stdout + class TestRegister: def test_invalid_shm( diff --git a/tests/config/test_config_sections.py b/tests/config/test_config_sections.py index 0363e41e38..6528b130fa 100644 --- a/tests/config/test_config_sections.py +++ b/tests/config/test_config_sections.py @@ -9,7 +9,11 @@ ConfigSubsectionRemoteDesktopOpts, ConfigSubsectionStorageQuotaGB, ) -from data_safe_haven.types import DatabaseSystem, SoftwarePackageCategory +from data_safe_haven.types import ( + AzureServiceTag, + DatabaseSystem, + SoftwarePackageCategory, +) class TestConfigSectionAzure: @@ -184,6 +188,24 @@ def test_ip_overlap_research_user(self): research_user_ip_addresses=["1.2.3.4", "1.2.3.4"], ) + def test_research_user_tag_internet( + self, + config_subsection_remote_desktop: ConfigSubsectionRemoteDesktopOpts, + config_subsection_storage_quota_gb: ConfigSubsectionStorageQuotaGB, + ): + sre_config = ConfigSectionSRE( + admin_email_address="admin@example.com", + remote_desktop=config_subsection_remote_desktop, + storage_quota_gb=config_subsection_storage_quota_gb, + research_user_ip_addresses="Internet", + ) + assert isinstance(sre_config.research_user_ip_addresses, AzureServiceTag) + assert sre_config.research_user_ip_addresses == "Internet" + + def test_research_user_tag_invalid(self): + with pytest.raises(ValueError, match="Input should be 'Internet'"): + ConfigSectionSRE(research_user_ip_addresses="Not a tag") + @pytest.mark.parametrize( "addresses", [ diff --git a/tests/config/test_context_manager.py b/tests/config/test_context_manager.py index 1118d1bbb3..c4a6b5e39c 100644 --- a/tests/config/test_context_manager.py +++ b/tests/config/test_context_manager.py @@ -4,10 +4,12 @@ from data_safe_haven.config import Context, ContextManager from data_safe_haven.exceptions import ( + DataSafeHavenAzureError, DataSafeHavenConfigError, DataSafeHavenTypeError, DataSafeHavenValueError, ) +from data_safe_haven.external import AzureSdk from data_safe_haven.version import __version__ @@ -29,6 +31,35 @@ def test_invalid_subscription_name(self, context_dict): ): Context(**context_dict) + def test_entra_application_name(self, context: Context) -> None: + assert ( + context.entra_application_name + == "Data Safe Haven (acmedeployment) Pulumi Service Principal" + ) + + def test_entra_application_secret(self, context: Context, mocker) -> None: + mocker.patch.object( + AzureSdk, "get_keyvault_secret", return_value="secret-value" + ) + assert context.entra_application_secret == "secret-value" # noqa: S105 + + def test_entra_application_secret_missing(self, context: Context, mocker) -> None: + mocker.patch.object( + AzureSdk, + "get_keyvault_secret", + side_effect=DataSafeHavenAzureError("Error message"), + ) + assert context.entra_application_secret == "" + + def test_entra_application_secret_setter(self, context: Context, mocker) -> None: + mock_set_keyvault_secret = mocker.patch.object(AzureSdk, "set_keyvault_secret") + context.entra_application_secret = "secret-value" # noqa: S105 + mock_set_keyvault_secret.assert_called_once_with( + key_vault_name="shm-acmedeployment-kv", + secret_name="pulumi-deployment-secret", + secret_value="secret-value", + ) + def test_tags(self, context): assert context.tags["description"] == "Acme Deployment" assert context.tags["project"] == "Data Safe Haven" @@ -114,7 +145,7 @@ def test_missing_selected(self, context_yaml): ) with pytest.raises( DataSafeHavenTypeError, - match="Could not load ContextManager configuration.", + match="ContextManager configuration is invalid.", ): ContextManager.from_yaml(context_yaml) @@ -124,7 +155,7 @@ def test_invalid_selected_input(self, context_yaml): ) with pytest.raises( DataSafeHavenTypeError, - match="Could not load ContextManager configuration.", + match="ContextManager configuration is invalid.", ): ContextManager.from_yaml(context_yaml) diff --git a/tests/config/test_pulumi.py b/tests/config/test_pulumi.py index 3bb57b1a38..e93e0f982a 100644 --- a/tests/config/test_pulumi.py +++ b/tests/config/test_pulumi.py @@ -114,9 +114,7 @@ def test_from_yaml_not_dict(self): def test_from_yaml_validation_error(self): not_valid = "projects: -3" - with raises( - DataSafeHavenTypeError, match="Could not load Pulumi configuration." - ): + with raises(DataSafeHavenTypeError, match="Pulumi configuration is invalid."): DSHPulumiConfig.from_yaml(not_valid) def test_upload(self, mocker, pulumi_config, context): diff --git a/tests/conftest.py b/tests/conftest.py index 055ee0ad47..5a8ce42847 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,7 @@ def pytest_configure(config): """Define constants for use across multiple tests""" config.guid_admin = "00edec65-b071-4d26-8779-a9fe791c6e14" + config.guid_application = "aa78dceb-4116-4713-8554-cf2b3027e119" config.guid_entra = "48b2425b-5f2c-4cbd-9458-0441daa8994c" config.guid_subscription = "35ebced1-4e7a-4c1f-b634-c0886937085d" config.guid_tenant = "d5c5c439-1115-4cb6-ab50-b8e547b6c8dd" @@ -205,6 +206,15 @@ def mock_azuresdk_get_subscription(mocker, request): ) +@fixture +def mock_azuresdk_get_subscription_name(mocker): + mocker.patch.object( + AzureSdk, + "get_subscription_name", + return_value="Data Safe Haven Acme", + ) + + @fixture def mock_azuresdk_get_credential(mocker): class MockCredential(TokenCredential): @@ -524,6 +534,12 @@ def sre_config_yaml(request): return yaml.dump(yaml.safe_load(content)) +@fixture +def sre_config_yaml_missing_field(sre_config_yaml): + content = sre_config_yaml.replace("admin_email_address: admin@example.com", "") + return yaml.dump(yaml.safe_load(content)) + + @fixture def sre_project_manager( context_no_secrets, diff --git a/tests/infrastructure/common/test_ip_ranges.py b/tests/infrastructure/common/test_ip_ranges.py index 3dab535193..2c4dc749d3 100644 --- a/tests/infrastructure/common/test_ip_ranges.py +++ b/tests/infrastructure/common/test_ip_ranges.py @@ -14,10 +14,8 @@ def test_vnet_and_subnets(self): assert SREIpRanges.data_configuration == AzureIPv4Range( "10.0.1.16", "10.0.1.23" ) - assert SREIpRanges.data_desired_state == AzureIPv4Range( - "10.0.1.24", "10.0.1.31" - ) - assert SREIpRanges.data_private == AzureIPv4Range("10.0.1.32", "10.0.1.39") + assert SREIpRanges.data_private == AzureIPv4Range("10.0.1.24", "10.0.1.31") + assert SREIpRanges.desired_state == AzureIPv4Range("10.0.1.32", "10.0.1.39") assert SREIpRanges.firewall == AzureIPv4Range("10.0.1.64", "10.0.1.127") assert SREIpRanges.firewall_management == AzureIPv4Range( "10.0.1.128", "10.0.1.191" diff --git a/tests/infrastructure/programs/sre/test_workspaces.py b/tests/infrastructure/programs/sre/test_workspaces.py index 108ce162d0..bfa360ae53 100644 --- a/tests/infrastructure/programs/sre/test_workspaces.py +++ b/tests/infrastructure/programs/sre/test_workspaces.py @@ -6,10 +6,10 @@ class TestTemplateCloudInit: def test_template_cloudinit(self): cloudinit = SREWorkspacesComponent.template_cloudinit( - storage_account_data_desired_state_name="storageaccount", + storage_account_desired_state_name="sadesiredstate", ) assert ( - '- ["storageaccount.blob.core.windows.net:/storageaccount/desiredstate", /desired_state, nfs, "ro,' + '- ["sadesiredstate.blob.core.windows.net:/sadesiredstate/desiredstate", /var/local/ansible, nfs, "ro,' in cloudinit ) diff --git a/tests/infrastructure/test_project_manager.py b/tests/infrastructure/test_project_manager.py index 259c5f1b37..8c74a83808 100644 --- a/tests/infrastructure/test_project_manager.py +++ b/tests/infrastructure/test_project_manager.py @@ -49,6 +49,22 @@ def test_cleanup( ) assert "Purged Azure Key Vault shmacmedsresandbosecrets." in stdout + def test_ensure_config(self, sre_project_manager): + sre_project_manager.ensure_config( + "azure-native:location", "uksouth", secret=False + ) + sre_project_manager.ensure_config("data-safe-haven:variable", "8", secret=False) + + def test_ensure_config_exception(self, sre_project_manager): + + with raises( + DataSafeHavenPulumiError, + match=r"Unchangeable configuration option 'azure-native:location'.*your configuration: 'ukwest', Pulumi workspace: 'uksouth'", + ): + sre_project_manager.ensure_config( + "azure-native:location", "ukwest", secret=False + ) + def test_new_project( self, context_no_secrets, diff --git a/tests/logging/test_plain_file_handler.py b/tests/logging/test_plain_file_handler.py index a2bf60ad81..90f7c6ca70 100644 --- a/tests/logging/test_plain_file_handler.py +++ b/tests/logging/test_plain_file_handler.py @@ -1,6 +1,15 @@ +import pytest + from data_safe_haven.logging.plain_file_handler import PlainFileHandler class TestPlainFileHandler: - def test_strip_formatting(self): - assert PlainFileHandler.strip_formatting("[green]hello[/]") == "hello" + def test_strip_rich_formatting(self): + assert PlainFileHandler.strip_rich_formatting("[green]Hello[/]") == "Hello" + + @pytest.mark.parametrize("escape", ["\033", "\x1B", "\u001b", "\x1B"]) + def test_strip_ansi_escapes(self, escape): + assert ( + PlainFileHandler.strip_ansi_escapes(f"{escape}[31;1;4mHello{escape}[0m") + == "Hello" + ) diff --git a/tests/serialisers/test_azure_serialisable_model.py b/tests/serialisers/test_azure_serialisable_model.py index 093d4f70d9..22d8a16e3f 100644 --- a/tests/serialisers/test_azure_serialisable_model.py +++ b/tests/serialisers/test_azure_serialisable_model.py @@ -118,7 +118,7 @@ def test_from_yaml_validation_error(self): with raises( DataSafeHavenTypeError, - match="Could not load Example configuration.", + match="Example configuration is invalid.", ): ExampleAzureSerialisableModel.from_yaml(yaml) @@ -137,3 +137,12 @@ def test_from_remote(self, mocker, context, example_config_yaml): context.storage_account_name, context.storage_container_name, ) + + def test_from_remote_validation_error(self, mocker, context, example_config_yaml): + example_config_yaml = example_config_yaml.replace("5", "abc") + mocker.patch.object(AzureSdk, "download_blob", return_value=example_config_yaml) + with raises( + DataSafeHavenTypeError, + match="'file.yaml' does not contain a valid Example configuration.", + ): + ExampleAzureSerialisableModel.from_remote(context) diff --git a/tests/serialisers/test_yaml_serialisable_model.py b/tests/serialisers/test_yaml_serialisable_model.py index cd27ea612b..8850123bb1 100644 --- a/tests/serialisers/test_yaml_serialisable_model.py +++ b/tests/serialisers/test_yaml_serialisable_model.py @@ -68,15 +68,24 @@ def test_from_yaml_not_dict(self): ): ExampleYAMLSerialisableModel.from_yaml(yaml) - def test_from_yaml_validation_error(self): + def test_from_yaml_validation_errors(self, caplog): yaml = "\n".join( - ["string: 'abc'", "integer: 'not an integer'", "list_of_integers: [-1,0,1]"] + [ + "string: 'abc'", + "integer: 'not an integer'", + "list_of_integers: [-1,0,z,1]", + ] ) with raises( DataSafeHavenTypeError, - match="Could not load Example configuration.", + match="Example configuration is invalid.", ): ExampleYAMLSerialisableModel.from_yaml(yaml) + assert "Input should be a valid integer" in caplog.text + assert "Original input: not an integer" in caplog.text + assert "unable to parse string as an integer" in caplog.text + assert "list_of_integers.2" in caplog.text + assert "Original input: z" in caplog.text def test_to_filepath(self, tmp_path, example_config_class): filepath = tmp_path / "test.yaml" diff --git a/tests/validators/test_validators.py b/tests/validators/test_validators.py index 1c38e551f8..18d2fd31b5 100644 --- a/tests/validators/test_validators.py +++ b/tests/validators/test_validators.py @@ -86,6 +86,36 @@ def test_fqdn_fail(self, fqdn): validators.fqdn(fqdn) +class TestValidateIpAddress: + @pytest.mark.parametrize( + "ip_address,output", + [ + ("127.0.0.1", "127.0.0.1/32"), + ("0.0.0.0/0", "0.0.0.0/0"), + ("192.168.171.1/32", "192.168.171.1/32"), + ], + ) + def test_ip_address(self, ip_address, output): + assert validators.ip_address(ip_address) == output + + @pytest.mark.parametrize( + "ip_address", + [ + "example.com", + "University of Life", + "999.999.999.999", + "0.0.0.0/-1", + "255.255.255.0/2", + ], + ) + def test_ip_address_fail(self, ip_address): + with pytest.raises( + ValueError, + match="Expected valid IPv4 address, for example '1.1.1.1', or 'Internet'.", + ): + validators.ip_address(ip_address) + + class TestValidateSafeString: @pytest.mark.parametrize( "safe_string", diff --git a/typings/pulumi/__init__.pyi b/typings/pulumi/__init__.pyi index e1468220dd..0b80efa17e 100644 --- a/typings/pulumi/__init__.pyi +++ b/typings/pulumi/__init__.pyi @@ -1,6 +1,6 @@ import pulumi.automation as automation import pulumi.dynamic as dynamic -from pulumi.asset import FileAsset +from pulumi.asset import FileAsset, StringAsset from pulumi.config import ( Config, ) @@ -11,6 +11,7 @@ from pulumi.output import ( ) from pulumi.resource import ( ComponentResource, + InvokeOptions, Resource, ResourceOptions, export, @@ -24,8 +25,10 @@ __all__ = [ "export", "FileAsset", "Input", + "InvokeOptions", "Output", "Resource", "ResourceOptions", + "StringAsset", "UNKNOWN", ] diff --git a/typings/pulumi_azure_native/__init__.pyi b/typings/pulumi_azure_native/__init__.pyi index 56be0a1e3a..598b0a9e6c 100644 --- a/typings/pulumi_azure_native/__init__.pyi +++ b/typings/pulumi_azure_native/__init__.pyi @@ -12,6 +12,7 @@ import pulumi_azure_native.managedidentity as managedidentity import pulumi_azure_native.network as network import pulumi_azure_native.operationalinsights as operationalinsights import pulumi_azure_native.operationsmanagement as operationsmanagement +import pulumi_azure_native.provider as provider import pulumi_azure_native.resources as resources import pulumi_azure_native.sql as sql import pulumi_azure_native.storage as storage @@ -30,6 +31,7 @@ __all__ = [ "network", "operationalinsights", "operationsmanagement", + "provider", "resources", "sql", "storage",