From b3dbc96729f2d23c65eb5fab6a8b2f9f8d1e695c Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Tue, 19 Sep 2023 11:31:59 +0100
Subject: [PATCH 01/12] :sparkles: Install a basic bats version

---
 .../resources/workspace/workspace.cloud_init.mustache.yaml      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml b/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml
index 17471221de..c4216adb76 100644
--- a/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml
+++ b/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml
@@ -104,6 +104,8 @@ packages:
   - libpq-dev     # interact with PostgreSQL databases
   - msodbcsql17   # interact with Microsoft SQL databases
   - unixodbc-dev  # interact with Microsoft SQL databases
+  # Bash testing
+  - bats
 package_update: true
 package_upgrade: true
 

From f37b3055f7a7f6bd192f73227d3423898596c4e5 Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Thu, 14 Sep 2023 13:19:43 +0100
Subject: [PATCH 02/12] :sparkles: Add a skeleton bats script

---
 .../resources/workspace/run_all_tests.bats    | 92 +++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 data_safe_haven/resources/workspace/run_all_tests.bats

diff --git a/data_safe_haven/resources/workspace/run_all_tests.bats b/data_safe_haven/resources/workspace/run_all_tests.bats
new file mode 100644
index 0000000000..b843d046f3
--- /dev/null
+++ b/data_safe_haven/resources/workspace/run_all_tests.bats
@@ -0,0 +1,92 @@
+#! /usr/bin/env bats
+load "../bats/bats-assert/load"
+load "../bats/bats-file/load"
+load "../bats/bats-support/load"
+
+
+# Helper functions
+# ----------------
+install_requirements_python() {
+    pip install pandas psycopg pymssql
+}
+
+install_requirements_R() {
+    Rscript -e "install.packages(c('DBI', 'odbc', 'RPostgres'))"
+}
+
+
+# Python
+# ------
+# Test Python functionality
+@test "Python functionality" {
+    run python tests/test_functionality_python.py 2>&1
+    assert_output --partial 'All functionality tests passed'
+}
+# Test Python package repository
+@test "Python package repository" {
+    run bash tests/test_repository_python.sh 2>&1
+    assert_output --partial 'All package installations behaved as expected'
+}
+
+
+# R
+# -
+# Test R packages
+# Test R functionality
+@test "R functionality" {
+    run Rscript tests/test_functionality_R.R
+    assert_output --partial 'All functionality tests passed'
+}
+
+# Test R package repository
+@test "R package repository" {
+    run bash tests/test_repository_R.sh
+    assert_output --partial 'All package installations behaved as expected'
+}
+
+
+# Databases
+# ---------
+# Test MS SQL database
+@test "MS SQL database (Python)" {
+    install_requirements_python
+    run bash tests/test_databases.sh -d mssql -l python
+    assert_output --partial 'All database tests passed'
+}
+@test "MS SQL database (R)" {
+    install_requirements_R
+    run bash tests/test_databases.sh -d mssql -l R
+    assert_output --partial 'All database tests passed'
+}
+
+# Test Postgres database
+@test "Postgres database (Python)" {
+    install_requirements_python
+    run bash tests/test_databases.sh -d postgresql -l python
+    assert_output --partial 'All database tests passed'
+}
+@test "Postgres database (R)" {
+    install_requirements_R
+    run bash tests/test_databases.sh -d postgresql -l R
+    assert_output --partial 'All database tests passed'
+}
+
+
+# Mounted drives
+# --------------
+@test "Mounted drives (/data)" {
+    run bash tests/test_mounted_drives.sh -d data
+    assert_output --partial 'All tests passed'
+}
+@test "Mounted drives (/home)" {
+    run bash tests/test_mounted_drives.sh -d home
+    assert_output --partial 'All tests passed'
+}
+@test "Mounted drives (/output)" {
+    run bash tests/test_mounted_drives.sh -d output
+    assert_output --partial 'All tests passed'
+}
+@test "Mounted drives (/shared)" {
+    run bash tests/test_mounted_drives.sh -d shared
+    assert_output --partial 'All tests passed'
+}

From 29983957baaf30f03fa9e43155bf47b113f40bd3 Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Fri, 15 Sep 2023 11:41:25 +0100
Subject: [PATCH 03/12] :truck: Rename remote_powershell.py to remote_script.py

---
 data_safe_haven/infrastructure/components/dynamic/__init__.py   | 2 +-
 .../dynamic/{remote_powershell.py => remote_script.py}          | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename data_safe_haven/infrastructure/components/dynamic/{remote_powershell.py => remote_script.py} (100%)

diff --git a/data_safe_haven/infrastructure/components/dynamic/__init__.py b/data_safe_haven/infrastructure/components/dynamic/__init__.py
index 2fe0f8decb..7c51c1d231 100644
--- a/data_safe_haven/infrastructure/components/dynamic/__init__.py
+++ b/data_safe_haven/infrastructure/components/dynamic/__init__.py
@@ -2,7 +2,7 @@
 from .blob_container_acl import BlobContainerAcl, BlobContainerAclProps
 from .compiled_dsc import CompiledDsc, CompiledDscProps
 from .file_share_file import FileShareFile, FileShareFileProps
-from .remote_powershell import RemoteScript, RemoteScriptProps
+from .remote_script import RemoteScript, RemoteScriptProps
 from .ssl_certificate import SSLCertificate, SSLCertificateProps
 
 __all__ = [
diff --git a/data_safe_haven/infrastructure/components/dynamic/remote_powershell.py b/data_safe_haven/infrastructure/components/dynamic/remote_script.py
similarity index 100%
rename from data_safe_haven/infrastructure/components/dynamic/remote_powershell.py
rename to data_safe_haven/infrastructure/components/dynamic/remote_script.py

From 415c68a4a53ca5f6fb36afb706d60724c3590c56 Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Tue, 19 Sep 2023 12:18:51 +0100
Subject: [PATCH 04/12] :sparkles: Add a FileUpload provider

---
 .../components/dynamic/file_upload.py         | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 data_safe_haven/infrastructure/components/dynamic/file_upload.py

diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
new file mode 100644
index 0000000000..067d150dd4
--- /dev/null
+++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
@@ -0,0 +1,117 @@
+"""Pulumi dynamic component for running remote scripts on an Azure VM."""
+from typing import Any
+
+from pulumi import Input, Output, ResourceOptions
+from pulumi.dynamic import CreateResult, DiffResult, Resource
+
+from data_safe_haven.external import AzureApi
+from data_safe_haven.functions import b64encode
+
+from .dsh_resource_provider import DshResourceProvider
+
+
+class FileUploadProps:
+    """Props for the FileUpload class"""
+
+    def __init__(
+        self,
+        file_contents: Input[str],
+        file_hash: Input[str],
+        file_permissions: Input[str],
+        file_target: Input[str],
+        subscription_name: Input[str],
+        vm_name: Input[str],
+        vm_resource_group_name: Input[str],
+        force_refresh: Input[bool] | None = None,
+    ) -> None:
+        self.file_contents = file_contents
+        self.file_hash = file_hash
+        self.file_target = file_target
+        self.file_permissions = file_permissions
+        self.force_refresh = force_refresh
+        self.subscription_name = subscription_name
+        self.vm_name = vm_name
+        self.vm_resource_group_name = vm_resource_group_name
+
+
+class FileUploadProvider(DshResourceProvider):
+    def create(self, props: dict[str, Any]) -> CreateResult:
+        """Run a remote script to create a file on a VM"""
+        outs = dict(**props)
+        azure_api = AzureApi(props["subscription_name"], disable_logging=True)
+        script_contents = f"""
+        target_dir=$(dirname "$target");
+        mkdir -p $target_dir 2> /dev/null;
+        echo $contents_b64 | base64 --decode > $target;
+        chmod {props['file_permissions']} $target;
+        echo "Wrote file to $target";
+        """
+        script_parameters = {
+            "contents_b64": b64encode(props["file_contents"]),
+            "target": props["file_target"],
+        }
+        # Run remote script
+        outs["script_output"] = azure_api.run_remote_script(
+            props["vm_resource_group_name"],
+            script_contents,
+            script_parameters,
+            props["vm_name"],
+        )
+        return CreateResult(
+            f"FileUpload-{props['file_hash']}",
+            outs=outs,
+        )
+
+    def delete(self, id_: str, props: dict[str, Any]) -> None:
+        """Delete the remote file from the VM"""
+        # Use `id` as a no-op to avoid ARG002 while maintaining function signature
+        id((id_, props))
+        outs = dict(**props)
+        azure_api = AzureApi(props["subscription_name"], disable_logging=True)
+        script_contents = """
+        rm -f "$target";
+        echo "Removed file at $target";
+        """
+        script_parameters = {
+            "target": props["file_target"],
+        }
+        # Run remote script
+        outs["script_output"] = azure_api.run_remote_script(
+            props["vm_resource_group_name"],
+            script_contents,
+            script_parameters,
+            props["vm_name"],
+        )
+
+    def diff(
+        self,
+        id_: str,
+        old_props: dict[str, Any],
+        new_props: dict[str, Any],
+    ) -> DiffResult:
+        """Calculate diff between old and new state"""
+        # Use `id` as a no-op to avoid ARG002 while maintaining function signature
+        id(id_)
+        if new_props["force_refresh"]:
+            return DiffResult(
+                changes=True,
+                replaces=list(new_props.keys()),
+                stables=[],
+                delete_before_replace=True,
+            )
+        return self.partial_diff(old_props, new_props, [])
+
+
+class FileUpload(Resource):
+    script_output: Output[str]
+    _resource_type_name = "dsh:common:FileUpload"  # set resource type
+
+    def __init__(
+        self,
+        name: str,
+        props: FileUploadProps,
+        opts: ResourceOptions | None = None,
+    ):
+        super().__init__(
+            FileUploadProvider(), name, {"script_output": None, **vars(props)}, opts
+        )

From f4c3d701ab81989226cf2b7756499f9e0e73b7ab Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Tue, 19 Sep 2023 11:21:08 +0100
Subject: [PATCH 05/12] :sparkles: Use FileUpload provider for smoke tests

---
 .../infrastructure/components/__init__.py     |  4 ++++
 .../components/dynamic/__init__.py            |  3 +++
 .../infrastructure/stacks/declarative_sre.py  |  1 +
 .../infrastructure/stacks/sre/workspaces.py   | 24 +++++++++++++++++++
 4 files changed, 32 insertions(+)

diff --git a/data_safe_haven/infrastructure/components/__init__.py b/data_safe_haven/infrastructure/components/__init__.py
index ee872fabe0..6fcb8d3f9b 100644
--- a/data_safe_haven/infrastructure/components/__init__.py
+++ b/data_safe_haven/infrastructure/components/__init__.py
@@ -20,6 +20,8 @@
     CompiledDscProps,
     FileShareFile,
     FileShareFileProps,
+    FileUpload,
+    FileUploadProps,
     RemoteScript,
     RemoteScriptProps,
     SSLCertificate,
@@ -41,6 +43,8 @@
     "CompiledDscProps",
     "FileShareFile",
     "FileShareFileProps",
+    "FileUpload",
+    "FileUploadProps",
     "LinuxVMComponentProps",
     "LocalDnsRecordComponent",
     "LocalDnsRecordProps",
diff --git a/data_safe_haven/infrastructure/components/dynamic/__init__.py b/data_safe_haven/infrastructure/components/dynamic/__init__.py
index 7c51c1d231..4fdfb12dfc 100644
--- a/data_safe_haven/infrastructure/components/dynamic/__init__.py
+++ b/data_safe_haven/infrastructure/components/dynamic/__init__.py
@@ -2,6 +2,7 @@
 from .blob_container_acl import BlobContainerAcl, BlobContainerAclProps
 from .compiled_dsc import CompiledDsc, CompiledDscProps
 from .file_share_file import FileShareFile, FileShareFileProps
+from .file_upload import FileUpload, FileUploadProps
 from .remote_script import RemoteScript, RemoteScriptProps
 from .ssl_certificate import SSLCertificate, SSLCertificateProps
 
@@ -14,6 +15,8 @@
     "CompiledDscProps",
     "FileShareFile",
     "FileShareFileProps",
+    "FileUpload",
+    "FileUploadProps",
     "RemoteScript",
     "RemoteScriptProps",
     "SSLCertificate",
diff --git a/data_safe_haven/infrastructure/stacks/declarative_sre.py b/data_safe_haven/infrastructure/stacks/declarative_sre.py
index 439da4b8ca..907ebfe667 100644
--- a/data_safe_haven/infrastructure/stacks/declarative_sre.py
+++ b/data_safe_haven/infrastructure/stacks/declarative_sre.py
@@ -253,6 +253,7 @@ def run(self) -> None:
                 storage_account_data_private_user_name=data.storage_account_data_private_user_name,
                 storage_account_data_private_sensitive_name=data.storage_account_data_private_sensitive_name,
                 subnet_workspaces=networking.subnet_workspaces,
+                subscription_name=self.cfg.subscription_name,
                 virtual_network_resource_group=networking.resource_group,
                 virtual_network=networking.virtual_network,
                 vm_details=list(enumerate(self.cfg.sres[self.sre_name].workspace_skus)),
diff --git a/data_safe_haven/infrastructure/stacks/sre/workspaces.py b/data_safe_haven/infrastructure/stacks/sre/workspaces.py
index 2821b6f669..64e0c6d4f3 100644
--- a/data_safe_haven/infrastructure/stacks/sre/workspaces.py
+++ b/data_safe_haven/infrastructure/stacks/sre/workspaces.py
@@ -14,10 +14,13 @@
     get_name_from_vnet,
 )
 from data_safe_haven.infrastructure.components import (
+    FileUpload,
+    FileUploadProps,
     LinuxVMComponentProps,
     VMComponent,
 )
 from data_safe_haven.resources import resources_path
+from data_safe_haven.utility import FileReader
 
 
 class SREWorkspacesProps:
@@ -43,6 +46,7 @@ def __init__(
         storage_account_data_private_user_name: Input[str],
         storage_account_data_private_sensitive_name: Input[str],
         subnet_workspaces: Input[network.GetSubnetResult],
+        subscription_name: Input[str],
         virtual_network_resource_group: Input[resources.ResourceGroup],
         virtual_network: Input[network.VirtualNetwork],
         vm_details: list[tuple[int, str]],  # this must *not* be passed as an Input[T]
@@ -69,6 +73,7 @@ def __init__(
         self.storage_account_data_private_sensitive_name = (
             storage_account_data_private_sensitive_name
         )
+        self.subscription_name = subscription_name
         self.virtual_network_name = Output.from_input(virtual_network).apply(
             get_name_from_vnet
         )
@@ -170,6 +175,25 @@ def __init__(
             for vm in vms
         ]
 
+        # Upload smoke tests
+        run_all_tests = FileReader(resources_path / "workspace" / "run_all_tests.bats")
+        for vm, vm_output in zip(vms, vm_outputs, strict=True):
+            file_run_all_tests = FileUpload(
+                f"{self._name}_file_run_all_tests",
+                FileUploadProps(
+                    file_contents=run_all_tests.file_contents(),
+                    file_hash=run_all_tests.sha256(),
+                    file_permissions="0444",
+                    file_target=f"/opt/tests/{run_all_tests.name}",
+                    force_refresh=True,
+                    subscription_name=props.subscription_name,
+                    vm_name=vm.vm_name,
+                    vm_resource_group_name=resource_group.name,
+                ),
+                opts=child_opts,
+            )
+            vm_output["run_all_tests"] = file_run_all_tests.script_output
+
         # Register outputs
         self.resource_group = resource_group
 

From cccecbfc4b1af26bdd917223f4d4ca033dcca429 Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Tue, 19 Sep 2023 11:22:01 +0100
Subject: [PATCH 06/12] :truck: Use one MSAL cache per tenant

---
 data_safe_haven/external/api/graph_api.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/data_safe_haven/external/api/graph_api.py b/data_safe_haven/external/api/graph_api.py
index a329b50437..1b1a00b677 100644
--- a/data_safe_haven/external/api/graph_api.py
+++ b/data_safe_haven/external/api/graph_api.py
@@ -361,7 +361,9 @@ def create_token_administrator(self) -> str:
         result = None
         try:
             # Load local token cache
-            local_token_cache = LocalTokenCache(pathlib.Path.home() / ".msal_cache")
+            local_token_cache = LocalTokenCache(
+                pathlib.Path.home() / f".msal_cache_{self.tenant_id}"
+            )
             # Use the Powershell application by default as this should be pre-installed
             app = PublicClientApplication(
                 authority=f"https://login.microsoftonline.com/{self.tenant_id}",

From d9edec2b5f449b9b14694cd9db7e670074701bd6 Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Tue, 19 Sep 2023 11:22:54 +0100
Subject: [PATCH 07/12] :recycle: Simplify SHA256 calculation

---
 data_safe_haven/functions/strings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data_safe_haven/functions/strings.py b/data_safe_haven/functions/strings.py
index a3dac5e106..27089eeaed 100644
--- a/data_safe_haven/functions/strings.py
+++ b/data_safe_haven/functions/strings.py
@@ -90,7 +90,7 @@ def seeded_uuid(seed: str) -> uuid.UUID:
 
 def sha256hash(input_string: str) -> str:
     """Return the SHA256 hash of a string as a string."""
-    return hashlib.sha256(str.encode(input_string, encoding="utf-8")).hexdigest()
+    return hashlib.sha256(input_string.encode("utf-8")).hexdigest()
 
 
 def truncate_tokens(tokens: Sequence[str], max_length: int) -> list[str]:

From 29249df465109af3e3904247d38ac9acc69fb33a Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Wed, 20 Sep 2023 09:34:08 +0100
Subject: [PATCH 08/12] :alien: Allow multiple FileUploads to run at once, with
 each waiting until it is possible to run

---
 data_safe_haven/external/api/azure_api.py     | 36 +++++++++++++++++++
 .../components/dynamic/file_upload.py         | 30 ++++++++++++----
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/data_safe_haven/external/api/azure_api.py b/data_safe_haven/external/api/azure_api.py
index 552bc35713..0ce8044bce 100644
--- a/data_safe_haven/external/api/azure_api.py
+++ b/data_safe_haven/external/api/azure_api.py
@@ -1084,6 +1084,42 @@ def run_remote_script(
             msg = f"Failed to run command on '{vm_name}'.\n{exc}"
             raise DataSafeHavenAzureError(msg) from exc
 
+    def run_remote_script_waiting(
+        self,
+        resource_group_name: str,
+        script: str,
+        script_parameters: dict[str, str],
+        vm_name: str,
+    ) -> str:
+        """Run a script on a remote virtual machine waiting for other scripts to complete
+
+        Returns:
+            str: The script output
+
+        Raises:
+            DataSafeHavenAzureError if running the script failed
+        """
+        while True:
+            try:
+                script_output = self.run_remote_script(
+                    resource_group_name=resource_group_name,
+                    script=script,
+                    script_parameters=script_parameters,
+                    vm_name=vm_name,
+                )
+                break
+            except DataSafeHavenAzureError as exc:
+                if all(
+                    reason not in str(exc)
+                    for reason in (
+                        "The request failed due to conflict with a concurrent request",
+                        "Run command extension execution is in progress",
+                    )
+                ):
+                    raise
+                time.sleep(5)
+        return script_output
+
     def set_blob_container_acl(
         self,
         container_name: str,
diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
index 067d150dd4..231b583430 100644
--- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py
+++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
@@ -2,7 +2,7 @@
 from typing import Any
 
 from pulumi import Input, Output, ResourceOptions
-from pulumi.dynamic import CreateResult, DiffResult, Resource
+from pulumi.dynamic import CreateResult, DiffResult, Resource, UpdateResult
 
 from data_safe_haven.external import AzureApi
 from data_safe_haven.functions import b64encode
@@ -51,12 +51,19 @@ def create(self, props: dict[str, Any]) -> CreateResult:
             "target": props["file_target"],
         }
         # Run remote script
-        outs["script_output"] = azure_api.run_remote_script(
+        script_output = azure_api.run_remote_script_waiting(
             props["vm_resource_group_name"],
             script_contents,
             script_parameters,
             props["vm_name"],
         )
+        outs["script_output"] = "\n".join(
+            [
+                line.strip()
+                for line in script_output.replace("Enable succeeded:", "").split("\n")
+                if line
+            ]
+        )
         return CreateResult(
             f"FileUpload-{props['file_hash']}",
             outs=outs,
@@ -65,8 +72,7 @@ def create(self, props: dict[str, Any]) -> CreateResult:
     def delete(self, id_: str, props: dict[str, Any]) -> None:
         """Delete the remote file from the VM"""
         # Use `id` as a no-op to avoid ARG002 while maintaining function signature
-        id((id_, props))
-        outs = dict(**props)
+        id(id_)
         azure_api = AzureApi(props["subscription_name"], disable_logging=True)
         script_contents = """
         rm -f "$target";
@@ -76,7 +82,7 @@ def delete(self, id_: str, props: dict[str, Any]) -> None:
             "target": props["file_target"],
         }
         # Run remote script
-        outs["script_output"] = azure_api.run_remote_script(
+        azure_api.run_remote_script_waiting(
             props["vm_resource_group_name"],
             script_contents,
             script_parameters,
@@ -97,10 +103,22 @@ def diff(
                 changes=True,
                 replaces=list(new_props.keys()),
                 stables=[],
-                delete_before_replace=True,
+                delete_before_replace=False,
             )
         return self.partial_diff(old_props, new_props, [])
 
+    def update(
+        self,
+        id_: str,
+        old_props: dict[str, Any],
+        new_props: dict[str, Any],
+    ) -> UpdateResult:
+        """Updating is creating without the need to delete."""
+        # Use `id` as a no-op to avoid ARG002 while maintaining function signature
+        id((id_, old_props))
+        updated = self.create(new_props)
+        return UpdateResult(outs=updated.outs)
+
 
 class FileUpload(Resource):
     script_output: Output[str]

From 7a889ccf9aefb4bbb9a520d39f80475158806765 Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Wed, 20 Sep 2023 19:22:02 +0100
Subject: [PATCH 09/12] :bug: Signal if the FileUpload could not be created

---
 .../infrastructure/components/dynamic/file_upload.py     | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
index 231b583430..cb9f5152f1 100644
--- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py
+++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
@@ -4,6 +4,7 @@
 from pulumi import Input, Output, ResourceOptions
 from pulumi.dynamic import CreateResult, DiffResult, Resource, UpdateResult
 
+from data_safe_haven.exceptions import DataSafeHavenAzureError
 from data_safe_haven.external import AzureApi
 from data_safe_haven.functions import b64encode
 
@@ -44,7 +45,11 @@ def create(self, props: dict[str, Any]) -> CreateResult:
         mkdir -p $target_dir 2> /dev/null;
         echo $contents_b64 | base64 --decode > $target;
         chmod {props['file_permissions']} $target;
-        echo "Wrote file to $target";
+        if [ -f "$target" ]; then
+            echo "Wrote file to $target";
+        else
+            echo "Failed to write file to $target";
+        fi
         """
         script_parameters = {
             "contents_b64": b64encode(props["file_contents"]),
@@ -64,6 +69,8 @@ def create(self, props: dict[str, Any]) -> CreateResult:
                 if line
             ]
         )
+        if "Failed to write" in outs["script_output"]:
+            raise DataSafeHavenAzureError(outs["script_output"])
         return CreateResult(
             f"FileUpload-{props['file_hash']}",
             outs=outs,

From 05c8a160ab5478cc638bc7b3cb0b33f9c7579ddd Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Wed, 20 Sep 2023 17:55:45 +0100
Subject: [PATCH 10/12] :white_check_mark: Add smoke tests

---
 .../components/dynamic/file_upload.py         |   2 +-
 .../infrastructure/stacks/sre/workspaces.py   |  42 +++--
 .../resources/workspace/run_all_tests.bats    | 156 +++++++++++-------
 .../resources/workspace/test_databases.sh     |  51 ++++++
 .../resources/workspace/test_databases_R.R    |  51 ++++++
 .../workspace/test_databases_python.py        |  55 ++++++
 .../workspace/test_functionality_R.R          |  39 +++++
 .../workspace/test_functionality_python.py    |  37 +++++
 .../workspace/test_mounted_drives.sh          |  66 ++++++++
 .../workspace/test_repository_R.mustache.sh   |  49 ++++++
 .../test_repository_python.mustache.sh        |  42 +++++
 11 files changed, 512 insertions(+), 78 deletions(-)
 create mode 100644 data_safe_haven/resources/workspace/test_databases.sh
 create mode 100644 data_safe_haven/resources/workspace/test_databases_R.R
 create mode 100644 data_safe_haven/resources/workspace/test_databases_python.py
 create mode 100644 data_safe_haven/resources/workspace/test_functionality_R.R
 create mode 100644 data_safe_haven/resources/workspace/test_functionality_python.py
 create mode 100644 data_safe_haven/resources/workspace/test_mounted_drives.sh
 create mode 100644 data_safe_haven/resources/workspace/test_repository_R.mustache.sh
 create mode 100644 data_safe_haven/resources/workspace/test_repository_python.mustache.sh

diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
index cb9f5152f1..731a662899 100644
--- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py
+++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
@@ -29,7 +29,7 @@ def __init__(
         self.file_hash = file_hash
         self.file_target = file_target
         self.file_permissions = file_permissions
-        self.force_refresh = force_refresh
+        self.force_refresh = Output.from_input(force_refresh).apply(lambda force: force if force else False)
         self.subscription_name = subscription_name
         self.vm_name = vm_name
         self.vm_resource_group_name = vm_resource_group_name
diff --git a/data_safe_haven/infrastructure/stacks/sre/workspaces.py b/data_safe_haven/infrastructure/stacks/sre/workspaces.py
index 64e0c6d4f3..fa854168f1 100644
--- a/data_safe_haven/infrastructure/stacks/sre/workspaces.py
+++ b/data_safe_haven/infrastructure/stacks/sre/workspaces.py
@@ -1,3 +1,4 @@
+import pathlib
 from collections.abc import Mapping
 from typing import Any
 
@@ -166,7 +167,7 @@ def __init__(
         ]
 
         # Get details for each deployed VM
-        vm_outputs = [
+        vm_outputs: list[dict[str, Any]] = [
             {
                 "ip_address": vm.ip_address_private,
                 "name": vm.vm_name,
@@ -176,23 +177,30 @@ def __init__(
         ]
 
         # Upload smoke tests
-        run_all_tests = FileReader(resources_path / "workspace" / "run_all_tests.bats")
+        mustache_values={
+            "check_uninstallable_packages": "0",
+        }
+        file_uploads = [(FileReader(resources_path / "workspace" / "run_all_tests.bats"), "0444")]
+        for test_file in pathlib.Path(resources_path / "workspace").glob("test*"):
+            file_uploads.append((FileReader(test_file), "0444"))
         for vm, vm_output in zip(vms, vm_outputs, strict=True):
-            file_run_all_tests = FileUpload(
-                f"{self._name}_file_run_all_tests",
-                FileUploadProps(
-                    file_contents=run_all_tests.file_contents(),
-                    file_hash=run_all_tests.sha256(),
-                    file_permissions="0444",
-                    file_target=f"/opt/tests/{run_all_tests.name}",
-                    force_refresh=True,
-                    subscription_name=props.subscription_name,
-                    vm_name=vm.vm_name,
-                    vm_resource_group_name=resource_group.name,
-                ),
-                opts=child_opts,
-            )
-            vm_output["run_all_tests"] = file_run_all_tests.script_output
+            outputs: dict[str, Output[str]] = {}
+            for file_upload, file_permissions in file_uploads:
+                file_smoke_test = FileUpload(
+                    replace_separators(f"{self._name}_file_{file_upload.name}", "_"),
+                    FileUploadProps(
+                        file_contents=file_upload.file_contents(mustache_values=mustache_values),
+                        file_hash=file_upload.sha256(),
+                        file_permissions=file_permissions,
+                        file_target=f"/opt/tests/{file_upload.name}",
+                        subscription_name=props.subscription_name,
+                        vm_name=vm.vm_name,
+                        vm_resource_group_name=resource_group.name,
+                    ),
+                    opts=child_opts,
+                )
+                outputs[file_upload.name] = file_smoke_test.script_output
+            vm_output["file_uploads"] = outputs
 
         # Register outputs
         self.resource_group = resource_group
diff --git a/data_safe_haven/resources/workspace/run_all_tests.bats b/data_safe_haven/resources/workspace/run_all_tests.bats
index b843d046f3..800a55cd3d 100644
--- a/data_safe_haven/resources/workspace/run_all_tests.bats
+++ b/data_safe_haven/resources/workspace/run_all_tests.bats
@@ -1,47 +1,92 @@
 #! /usr/bin/env bats
-load "../bats/bats-assert/load"
-load "../bats/bats-file/load"
-load "../bats/bats-support/load"
 
 
 # Helper functions
 # ----------------
-install_requirements_python() {
-    pip install pandas psycopg pymssql
+initialise_python_environment() {
+    ENV_PATH="${HOME}/.local/bats-python-environment"
+    rm -rf "$ENV_PATH"
+    python -m venv "$ENV_PATH"
+    source "${ENV_PATH}/bin/activate"
+    pip install --upgrade pip --quiet
 }
 
-install_requirements_R() {
-    Rscript -e "install.packages(c('DBI', 'odbc', 'RPostgres'))"
+initialise_r_environment() {
+    ENV_PATH="${HOME}/.local/bats-r-environment"
+    rm -rf "$ENV_PATH"
+    mkdir -p "$ENV_PATH"
 }
 
+install_r_package() {
+    PACKAGE_NAME="$1"
+    ENV_PATH="${HOME}/.local/bats-r-environment"
+    Rscript -e "install.packages('$PACKAGE_NAME', lib='$ENV_PATH');"
+}
 
-# Python
-# ------
-# Test Python functionality
-@test "Python functionality" {
-    run python tests/test_functionality_python.py 2>&1
-    assert_output --partial 'All functionality tests passed'
+install_r_package_version() {
+    PACKAGE_NAME="$1"
+    PACKAGE_VERSION="$2"
+    ENV_PATH="${HOME}/.local/bats-r-environment"
+    Rscript -e "install.packages('remotes', lib='$ENV_PATH');"
+    Rscript -e "library('remotes', lib='$ENV_PATH'); remotes::install_version(package='$PACKAGE_NAME', version='$PACKAGE_VERSION', lib='$ENV_PATH');"
 }
-# Test Python package repository
-@test "Python package repository" {
-    run bash tests/test_repository_python.sh 2>&1
-    assert_output --partial 'All package installations behaved as expected'
+
+check_db_credentials() {
+    db_credentials="${HOME}/.local/db.dsh"
+    if [ -f "$db_credentials" ]; then
+        return 0
+    fi
+    return 1
 }
 
 
-# R
-# -
-# Test R packages
-# Test R functionality
-@test "R functionality" {
-    run Rscript tests/test_functionality_R.R
-    assert_output --partial 'All functionality tests passed'
+# Mounted drives
+# --------------
+@test "Mounted drives (/data)" {
+    run bash test_mounted_drives.sh -d data
+    [ "$status" -eq 0 ]
+}
+@test "Mounted drives (/home)" {
+    run bash test_mounted_drives.sh -d home
+    [ "$status" -eq 0 ]
+}
+@test "Mounted drives (/output)" {
+    run bash test_mounted_drives.sh -d output
+    [ "$status" -eq 0 ]
+}
+@test "Mounted drives (/shared)" {
+    run bash test_mounted_drives.sh -d shared
+    [ "$status" -eq 0 ]
 }
 
-# Test R package repository
+
+# Package repositories
+# --------------------
+@test "Python package repository" {
+    initialise_python_environment
+    run bash test_repository_python.sh 2>&1
+    [ "$status" -eq 0 ]
+}
 @test "R package repository" {
-    run bash tests/test_repository_R.sh
-    assert_output --partial 'All package installations behaved as expected'
+    initialise_r_environment
+    run bash test_repository_R.sh
+    [ "$status" -eq 0 ]
+}
+
+
+# Language functionality
+# ----------------------
+@test "Python functionality" {
+    initialise_python_environment
+    pip install numpy pandas scikit-learn --quiet
+    run python test_functionality_python.py 2>&1
+    [ "$status" -eq 0 ]
+}
+@test "R functionality" {
+    initialise_r_environment
+    install_r_package_version "MASS" "7.3-52"
+    run Rscript test_functionality_R.R
+    [ "$status" -eq 0 ]
 }
 
 
@@ -49,44 +94,35 @@ install_requirements_R() {
 # ---------
 # Test MS SQL database
 @test "MS SQL database (Python)" {
-    install_requirements_python
-    run bash tests/test_databases.sh -d mssql -l python
-    assert_output --partial 'All database tests passed'
+    check_db_credentials || skip "No database credentials available"
+    initialise_python_environment
+    pip install pandas psycopg pymssql --quiet
+    run bash test_databases.sh -d mssql -l python
+    [ "$status" -eq 0 ]
 }
 @test "MS SQL database (R)" {
-    install_requirements_R
-    run bash tests/test_databases.sh -d mssql -l R
-    assert_output --partial 'All database tests passed'
+    check_db_credentials || skip "No database credentials available"
+    initialise_r_environment
+    install_r_package "DBI"
+    install_r_package "odbc"
+    install_r_package "RPostgres"
+    run bash test_databases.sh -d mssql -l R
+    [ "$status" -eq 0 ]
 }
-
 # Test Postgres database
 @test "Postgres database (Python)" {
-    install_requirements_python
-    run bash tests/test_databases.sh -d postgresql -l python
-    assert_output --partial 'All database tests passed'
+    check_db_credentials || skip "No database credentials available"
+    initialise_python_environment
+    pip install pandas psycopg pymssql --quiet
+    run bash test_databases.sh -d postgresql -l python
+    [ "$status" -eq 0 ]
 }
 @test "Postgres database (R)" {
-    install_requirements_R
-    run bash tests/test_databases.sh -d postgresql -l R
-    assert_output --partial 'All database tests passed'
-}
-
-
-# Mounted drives
-# --------------
-@test "Mounted drives (/data)" {
-    run bash tests/test_mounted_drives.sh -d data
-    assert_output --partial 'All tests passed'
-}
-@test "Mounted drives (/home)" {
-    run bash tests/test_mounted_drives.sh -d home
-    assert_output --partial 'All tests passed'
-}
-@test "Mounted drives (/output)" {
-    run bash tests/test_mounted_drives.sh -d output
-    assert_output --partial 'All tests passed'
-}
-@test "Mounted drives (/shared)" {
-    run bash tests/test_mounted_drives.sh -d shared
-    assert_output --partial 'All tests passed'
+    check_db_credentials || skip "No database credentials available"
+    initialise_r_environment
+    install_r_package "DBI"
+    install_r_package "odbc"
+    install_r_package "RPostgres"
+    run bash test_databases.sh -d postgresql -l R
+    [ "$status" -eq 0 ]
 }
diff --git a/data_safe_haven/resources/workspace/test_databases.sh b/data_safe_haven/resources/workspace/test_databases.sh
new file mode 100644
index 0000000000..69fd7a456c
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_databases.sh
@@ -0,0 +1,51 @@
+#! /bin/bash
+db_type=""
+language=""
+while getopts d:l: flag; do
+    case "${flag}" in
+    d) db_type=${OPTARG} ;;
+    l) language=${OPTARG} ;;
+    *)
+        echo "Invalid option ${OPTARG}"
+        exit 1
+        ;;
+    esac
+done
+
+db_credentials="${HOME}/.local/db.dsh"
+if [ -f "$db_credentials" ]; then
+    username="databaseadmin"
+    password="$(cat "$db_credentials")"
+else
+    echo "Credentials file ($db_credentials) not found."
+    exit 1
+fi
+
+sre_fqdn="$(grep trusted /etc/pip.conf | cut -d "." -f 2-99)"
+sre_prefix="$(hostname | cut -d "-" -f 1-4)"
+if [ "$db_type" == "mssql" ]; then
+    db_name="master"
+    port="1433"
+    server_name="mssql.${sre_fqdn}"
+    hostname="${sre_prefix}-db-server-mssql"
+elif [ "$db_type" == "postgresql" ]; then
+    db_name="postgres"
+    port="5432"
+    server_name="postgresql.${sre_fqdn}"
+    hostname="${sre_prefix}-db-server-postgresql"
+else
+    echo "Did not recognise database type '$db_type'"
+    exit 1
+fi
+
+if [ "$port" == "" ]; then
+    echo "Database type '$db_type' is not part of this SRE"
+    exit 1
+else
+    script_path=$(dirname "$(readlink -f "$0")")
+    if [ "$language" == "python" ]; then
+        python "${script_path}"/test_databases_python.py --db-type "$db_type" --db-name "$db_name" --port "$port" --server-name "$server_name" --hostname "$hostname" --username "$username" --password "$password" || exit 1
+    elif [ "$language" == "R" ]; then
+        Rscript "${script_path}"/test_databases_R.R "$db_type" "$db_name" "$port" "$server_name" "$hostname" "$username" "$password" || exit 1
+    fi
+fi
diff --git a/data_safe_haven/resources/workspace/test_databases_R.R b/data_safe_haven/resources/workspace/test_databases_R.R
new file mode 100644
index 0000000000..a261f21532
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_databases_R.R
@@ -0,0 +1,51 @@
+#!/usr/bin/env Rscript
+library(DBI, lib.loc='~/.local/bats-r-environment')
+library(odbc, lib.loc='~/.local/bats-r-environment')
+library(RPostgres, lib.loc='~/.local/bats-r-environment')
+
+# Parse command line arguments
+args = commandArgs(trailingOnly=TRUE)
+if (length(args)!=7) {
+    stop("Exactly seven arguments are required: db_type, db_name, port, server_name, hostname, username and password")
+}
+db_type = args[1]
+db_name = args[2]
+port = args[3]
+server_name = args[4]
+hostname = args[5]
+username = args[6]
+password = args[7]
+
+# Connect to the database
+print(paste("Attempting to connect to '", db_name, "' on '", server_name, "' via port '", port, sep=""))
+if (db_type == "mssql") {
+    cnxn <- DBI::dbConnect(
+        odbc::odbc(),
+        Driver = "ODBC Driver 17 for SQL Server",
+        Server = paste(server_name, port, sep=","),
+        Database = db_name,
+        # Trusted_Connection = "yes",
+        UID = paste(username, "@", hostname, sep=""),
+        PWD = password
+    )
+} else if (db_type == "postgresql") {
+    cnxn <- DBI::dbConnect(
+        RPostgres::Postgres(),
+        host = server_name,
+        port = port,
+        dbname = db_name,
+        user = paste(username, "@", hostname, sep=""),
+        password = password
+    )
+} else {
+    stop(paste("Database type '", db_type, "' was not recognised", sep=""))
+}
+
+# Run a query and save the output into a dataframe
+df <- dbGetQuery(cnxn, "SELECT * FROM information_schema.tables;")
+if (dim(df)[1] > 0) {
+    print(head(df, 5))
+    print("All database tests passed")
+} else {
+    stop(paste("Reading from database '", db_name, "' failed", sep=""))
+}
diff --git a/data_safe_haven/resources/workspace/test_databases_python.py b/data_safe_haven/resources/workspace/test_databases_python.py
new file mode 100644
index 0000000000..37a37acb91
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_databases_python.py
@@ -0,0 +1,55 @@
+#! /usr/bin/env python
+import argparse
+
+import pandas as pd
+import psycopg
+import pymssql
+
+
+def test_database(server_name, hostname, port, db_type, db_name, username, password):
+    print(f"Attempting to connect to '{db_name}' on '{server_name}' via port {port}")
+    username_full = f"{username}@{hostname}"
+    cnxn = None
+    if db_type == "mssql":
+        cnxn = pymssql.connect(
+            server=server_name, user=username_full, password=password, database=db_name
+        )
+    elif db_type == "postgresql":
+        connection_string = f"host={server_name} port={port} dbname={db_name} user={username_full} password={password}"
+        cnxn = psycopg.connect(connection_string)
+    else:
+        raise ValueError(f"Database type '{db_type}' was not recognised")
+    df = pd.read_sql("SELECT * FROM information_schema.tables;", cnxn)
+    if df.size:
+        print(df.head(5))
+        print("All database tests passed")
+    else:
+        raise ValueError(f"Reading from database '{db_name}' failed.")
+
+
+# Parse command line arguments
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--db-type",
+    type=str,
+    choices=["mssql", "postgresql"],
+    help="Which database type to use",
+)
+parser.add_argument("--db-name", type=str, help="Which database to connect to")
+parser.add_argument("--port", type=str, help="Which port to connect to")
+parser.add_argument("--server-name", type=str, help="Which server to connect to")
+parser.add_argument("--username", type=str, help="Database username")
+parser.add_argument("--hostname", type=str, help="Azure hostname of the server")
+parser.add_argument("--password", type=str, help="Database user password")
+args = parser.parse_args()
+
+# Run database test
+test_database(
+    args.server_name,
+    args.hostname,
+    args.port,
+    args.db_type,
+    args.db_name,
+    args.username,
+    args.password,
+)
diff --git a/data_safe_haven/resources/workspace/test_functionality_R.R b/data_safe_haven/resources/workspace/test_functionality_R.R
new file mode 100644
index 0000000000..94c351e7c3
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_functionality_R.R
@@ -0,0 +1,39 @@
+# Test logistic regression using R
+library('MASS', lib.loc='~/.local/bats-r-environment')
+library('stats')
+
+gen_data <- function(n = 100, p = 3) {
+    set.seed(1)
+    weights <- stats::rgamma(n = n, shape = rep(1, n), rate = rep(1, n))
+    y <- stats::rbinom(n = n, size = 1, prob = 0.5)
+    theta <- stats::rnorm(n = p, mean = 0, sd = 1)
+    means <- colMeans(as.matrix(y) %*% theta)
+    x <- MASS::mvrnorm(n = n, means, diag(1, p, p))
+    return(list(x = x, y = y, weights = weights, theta = theta))
+}
+
+run_logistic_regression <- function(data) {
+    fit <- stats::glm.fit(x = data$x,
+                          y = data$y,
+                          weights = data$weights,
+                          family = stats::quasibinomial(link = "logit"))
+    return(fit$coefficients)
+}
+
+data <- gen_data()
+theta <- run_logistic_regression(data)
+print("Logistic regression ran OK")
+
+
+# Test clustering of random data using R
+num_clusters <- 5
+N <- 10
+set.seed(0, kind = "Mersenne-Twister")
+cluster_means <- runif(num_clusters, 0, 10)
+means_selector <- as.integer(runif(N, 1, num_clusters + 1))
+data_means <- cluster_means[means_selector]
+data <- rnorm(n = N, mean = data_means, sd = 0.5)
+hc <- hclust(dist(data))
+print("Clustering ran OK")
+
+print("All functionality tests passed")
diff --git a/data_safe_haven/resources/workspace/test_functionality_python.py b/data_safe_haven/resources/workspace/test_functionality_python.py
new file mode 100644
index 0000000000..9ca9662d98
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_functionality_python.py
@@ -0,0 +1,37 @@
+"""Test logistic regression using python"""
+import numpy as np
+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+
+
+def gen_data(n_samples, n_points):
+    """Generate data for fitting"""
+    target = np.random.binomial(n=1, p=0.5, size=(n_samples, 1))
+    theta = np.random.normal(loc=0.0, scale=1.0, size=(1, n_points))
+    means = np.mean(np.multiply(target, theta), axis=0)
+    values = np.random.multivariate_normal(
+        means, np.diag([1] * n_points), size=n_samples
+    ).T
+    data = dict(("x{}".format(n), values[n]) for n in range(n_points))
+    data["y"] = target.reshape((n_samples,))
+    data["weights"] = np.random.gamma(shape=1, scale=1.0, size=n_samples)
+    return pd.DataFrame(data=data)
+
+
+def main():
+    """Logistic regression"""
+    data = gen_data(100, 3)
+    input_data = data.iloc[:, :-2]
+    output_data = data["y"]
+    weights = data["weights"]
+
+    logit = LogisticRegression(solver="liblinear")
+    logit.fit(input_data, output_data, sample_weight=weights)
+    logit.score(input_data, output_data, sample_weight=weights)
+
+    print("Logistic model ran OK")
+    print("All functionality tests passed")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/data_safe_haven/resources/workspace/test_mounted_drives.sh b/data_safe_haven/resources/workspace/test_mounted_drives.sh
new file mode 100644
index 0000000000..a1812934b9
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_mounted_drives.sh
@@ -0,0 +1,66 @@
+#! /bin/bash
+while getopts d: flag
+do
+    case "${flag}" in
+        d) directory=${OPTARG};;
+    *)
+        echo "Usage: $0 -d [directory]"
+        exit 1
+    esac
+done
+
+nfailed=0
+if [[ "$directory" = "home" ]]; then directory_path=$(echo ~); else directory_path="/${directory}"; fi
+testfile="$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 32 | head -n 1)"
+
+# Check that directory exists
+if [ "$(ls "${directory_path}" 2>&1 1>/dev/null)" ]; then
+    echo "Could not find mount '${directory_path}'"
+    nfailed=$((nfailed + 1))
+fi
+
+# Test operations
+CAN_CREATE="$([[ "$(touch "${directory_path}/${testfile}" 2>&1 1>/dev/null)" = "" ]] && echo '1' || echo '0')"
+CAN_WRITE="$([[ -w "${directory_path}/${testfile}" ]] && echo '1' || echo '0')"
+CAN_DELETE="$([[ "$(touch "${directory_path}/${testfile}" 2>&1 1>/dev/null && rm "${directory_path}/${testfile}" 2>&1)" ]] && echo '0' || echo '1')"
+
+# Check that permissions are as expected for each directory
+case "$directory" in
+    data)
+        if [ "$CAN_CREATE" = 1 ]; then echo "Able to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_WRITE" = 1 ]; then echo "Able to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_DELETE" = 1 ]; then echo "Able to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        ;;
+
+    home)
+        if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        ;;
+
+    output)
+        if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        ;;
+
+    shared)
+        if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi
+        ;;
+
+    *)
+        echo "Usage: $0 -d [directory]"
+        exit 1
+esac
+
+# Cleanup and print output
+rm -f "${directory_path}/${testfile}" 2> /dev/null
+if [ $nfailed = 0 ]; then
+    echo "All tests passed for '${directory_path}'"
+    exit 0
+else
+    echo "$nfailed tests failed for '${directory_path}'!"
+    exit $nfailed
+fi
diff --git a/data_safe_haven/resources/workspace/test_repository_R.mustache.sh b/data_safe_haven/resources/workspace/test_repository_R.mustache.sh
new file mode 100644
index 0000000000..03568b1e62
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_repository_R.mustache.sh
@@ -0,0 +1,49 @@
+#! /bin/bash
+# We need to test packages that are:
+# - *not* pre-installed
+# - on the tier-3 list (so we can test all tiers)
+# - alphabetically early and late (so we can test the progress of the mirror synchronisation)
+packages=("askpass" "zeallot")
+uninstallable_packages=("aws.s3")
+
+# Create a temporary library directory
+TEST_INSTALL_PATH="${HOME}/.local/bats-r-environment"
+# TEST_INSTALL_PATH="${HOME}/test-repository-R"
+# rm -rf "$TEST_INSTALL_PATH"
+# mkdir -p "$TEST_INSTALL_PATH"
+
+# Install sample packages to local user library
+N_FAILURES=0
+for package in "${packages[@]}"; do
+    echo "Attempting to install ${package}..."
+    Rscript -e "options(warn=-1); install.packages('${package}', lib='${TEST_INSTALL_PATH}', quiet=TRUE)"
+    if (Rscript -e "library('${package}', lib.loc='${TEST_INSTALL_PATH}')"); then
+        echo "... $package installation succeeded"
+    else
+        echo "... $package installation failed"
+        N_FAILURES=$((N_FAILURES + 1))
+    fi
+done
+# If requested, demonstrate that installation fails for packages *not* on the approved list
+TEST_FAILURE="{{check_uninstallable_packages}}"
+if [ $TEST_FAILURE -eq 1 ]; then
+    for package in "${uninstallable_packages[@]}"; do
+        echo "Attempting to install ${package}..."
+        Rscript -e "options(warn=-1); install.packages('${package}', lib='${TEST_INSTALL_PATH}', quiet=TRUE)"
+        if (Rscript -e "library('${package}', lib.loc='${TEST_INSTALL_PATH}')"); then
+            echo "... $package installation unexpectedly succeeded!"
+            N_FAILURES=$((N_FAILURES + 1))
+        else
+            echo "... $package installation failed as expected"
+        fi
+    done
+fi
+rm -rf "$TEST_INSTALL_PATH"
+
+if [ $N_FAILURES -eq 0 ]; then
+    echo "All package installations behaved as expected"
+    exit 0
+else
+    echo "One or more package installations did not behave as expected!"
+    exit $N_FAILURES
+fi
diff --git a/data_safe_haven/resources/workspace/test_repository_python.mustache.sh b/data_safe_haven/resources/workspace/test_repository_python.mustache.sh
new file mode 100644
index 0000000000..28e46a23e1
--- /dev/null
+++ b/data_safe_haven/resources/workspace/test_repository_python.mustache.sh
@@ -0,0 +1,42 @@
+#! /bin/bash
+
+# We need to test packages that are:
+# - *not* pre-installed
+# - on the allowlist (so we can test this is working)
+# - alphabetically early and late (so we can test the progress of the mirror synchronisation)
+installable_packages=("contourpy" "tzdata")
+uninstallable_packages=("awscli")
+
+# Install sample packages to local user library
+N_FAILURES=0
+for package in "${installable_packages[@]}"; do
+    echo "Attempting to install ${package}..."
+    if (pip install "$package" --quiet); then
+        echo "... $package installation succeeded"
+    else
+        echo "... $package installation failed"
+        N_FAILURES=$((N_FAILURES + 1))
+    fi
+done
+# If requested, demonstrate that installation fails for packages *not* on the approved list
+TEST_FAILURE="{{check_uninstallable_packages}}"
+if [ $TEST_FAILURE -eq 1 ]; then
+    for package in "${uninstallable_packages[@]}"; do
+        echo "Attempting to install ${package}..."
+        if (pip install "$package" --quiet); then
+            echo "... $package installation unexpectedly succeeded!"
+            N_FAILURES=$((N_FAILURES + 1))
+        else
+            echo "... $package installation failed as expected"
+        fi
+    done
+fi
+rm -rf "$TEST_INSTALL_PATH"
+
+if [ $N_FAILURES -eq 0 ]; then
+    echo "All package installations behaved as expected"
+    exit 0
+else
+    echo "One or more package installations did not behave as expected!"
+    exit $N_FAILURES
+fi

From 15668ecbf56c6d1ac0a0becc59221af2984039ec Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Wed, 20 Sep 2023 19:23:14 +0100
Subject: [PATCH 11/12] :arrow_up: Add dependencies needed for smoke tests to
 allowlists

---
 .../resources/software_repositories/allowlists/cran.allowlist    | 1 +
 .../resources/software_repositories/allowlists/pypi.allowlist    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist b/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist
index d65ef196ea..9624ec7060 100644
--- a/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist
+++ b/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist
@@ -1,4 +1,5 @@
 DBI
+MASS
 RPostgres
 Rcpp
 bit
diff --git a/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist b/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist
index 3ab3c07dfe..704937893f 100644
--- a/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist
+++ b/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist
@@ -15,6 +15,7 @@ pyodbc
 pyparsing
 python-dateutil
 pytz
+scikit-learn
 six
 typing-extensions
 tzdata

From b039cd7eaabdd76da11467e04aeb0cae07f06b9c Mon Sep 17 00:00:00 2001
From: James Robinson <james.em.robinson@gmail.com>
Date: Wed, 20 Sep 2023 19:32:59 +0100
Subject: [PATCH 12/12] :rotating_light: Fix linting errors in smoke tests

---
 .../components/dynamic/file_upload.py         |  4 +++-
 .../infrastructure/stacks/sre/workspaces.py   | 10 +++++---
 .../workspace/test_databases_python.py        | 23 ++++++++++++++-----
 .../workspace/test_functionality_python.py    | 10 ++++----
 pyproject.toml                                |  4 ++++
 5 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
index 731a662899..4f1f259c47 100644
--- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py
+++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py
@@ -29,7 +29,9 @@ def __init__(
         self.file_hash = file_hash
         self.file_target = file_target
         self.file_permissions = file_permissions
-        self.force_refresh = Output.from_input(force_refresh).apply(lambda force: force if force else False)
+        self.force_refresh = Output.from_input(force_refresh).apply(
+            lambda force: force if force else False
+        )
         self.subscription_name = subscription_name
         self.vm_name = vm_name
         self.vm_resource_group_name = vm_resource_group_name
diff --git a/data_safe_haven/infrastructure/stacks/sre/workspaces.py b/data_safe_haven/infrastructure/stacks/sre/workspaces.py
index fa854168f1..fdf3d46c4d 100644
--- a/data_safe_haven/infrastructure/stacks/sre/workspaces.py
+++ b/data_safe_haven/infrastructure/stacks/sre/workspaces.py
@@ -177,10 +177,12 @@ def __init__(
         ]
 
         # Upload smoke tests
-        mustache_values={
+        mustache_values = {
             "check_uninstallable_packages": "0",
         }
-        file_uploads = [(FileReader(resources_path / "workspace" / "run_all_tests.bats"), "0444")]
+        file_uploads = [
+            (FileReader(resources_path / "workspace" / "run_all_tests.bats"), "0444")
+        ]
         for test_file in pathlib.Path(resources_path / "workspace").glob("test*"):
             file_uploads.append((FileReader(test_file), "0444"))
         for vm, vm_output in zip(vms, vm_outputs, strict=True):
@@ -189,7 +191,9 @@ def __init__(
                 file_smoke_test = FileUpload(
                     replace_separators(f"{self._name}_file_{file_upload.name}", "_"),
                     FileUploadProps(
-                        file_contents=file_upload.file_contents(mustache_values=mustache_values),
+                        file_contents=file_upload.file_contents(
+                            mustache_values=mustache_values
+                        ),
                         file_hash=file_upload.sha256(),
                         file_permissions=file_permissions,
                         file_target=f"/opt/tests/{file_upload.name}",
diff --git a/data_safe_haven/resources/workspace/test_databases_python.py b/data_safe_haven/resources/workspace/test_databases_python.py
index 37a37acb91..ab0f01a3fe 100644
--- a/data_safe_haven/resources/workspace/test_databases_python.py
+++ b/data_safe_haven/resources/workspace/test_databases_python.py
@@ -6,8 +6,17 @@
 import pymssql
 
 
-def test_database(server_name, hostname, port, db_type, db_name, username, password):
-    print(f"Attempting to connect to '{db_name}' on '{server_name}' via port {port}")
+def test_database(
+    server_name: str,
+    hostname: str,
+    port: int,
+    db_type: str,
+    db_name: str,
+    username: str,
+    password: str,
+) -> None:
+    msg = f"Attempting to connect to '{db_name}' on '{server_name}' via port {port}"
+    print(msg)  # noqa: T201
     username_full = f"{username}@{hostname}"
     cnxn = None
     if db_type == "mssql":
@@ -18,13 +27,15 @@ def test_database(server_name, hostname, port, db_type, db_name, username, passw
         connection_string = f"host={server_name} port={port} dbname={db_name} user={username_full} password={password}"
         cnxn = psycopg.connect(connection_string)
     else:
-        raise ValueError(f"Database type '{db_type}' was not recognised")
+        msg = f"Database type '{db_type}' was not recognised"
+        raise ValueError(msg)
     df = pd.read_sql("SELECT * FROM information_schema.tables;", cnxn)
     if df.size:
-        print(df.head(5))
-        print("All database tests passed")
+        print(df.head(5))  # noqa: T201
+        print("All database tests passed")  # noqa: T201
     else:
-        raise ValueError(f"Reading from database '{db_name}' failed.")
+        msg = f"Reading from database '{db_name}' failed."
+        raise ValueError(msg)
 
 
 # Parse command line arguments
diff --git a/data_safe_haven/resources/workspace/test_functionality_python.py b/data_safe_haven/resources/workspace/test_functionality_python.py
index 9ca9662d98..855e5e5f15 100644
--- a/data_safe_haven/resources/workspace/test_functionality_python.py
+++ b/data_safe_haven/resources/workspace/test_functionality_python.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 
 
-def gen_data(n_samples, n_points):
+def gen_data(n_samples: int, n_points: int) -> pd.DataFrame:
     """Generate data for fitting"""
     target = np.random.binomial(n=1, p=0.5, size=(n_samples, 1))
     theta = np.random.normal(loc=0.0, scale=1.0, size=(1, n_points))
@@ -12,13 +12,13 @@ def gen_data(n_samples, n_points):
     values = np.random.multivariate_normal(
         means, np.diag([1] * n_points), size=n_samples
     ).T
-    data = dict(("x{}".format(n), values[n]) for n in range(n_points))
+    data = {f"x{n}": values[n] for n in range(n_points)}
     data["y"] = target.reshape((n_samples,))
     data["weights"] = np.random.gamma(shape=1, scale=1.0, size=n_samples)
     return pd.DataFrame(data=data)
 
 
-def main():
+def main() -> None:
     """Logistic regression"""
     data = gen_data(100, 3)
     input_data = data.iloc[:, :-2]
@@ -29,8 +29,8 @@ def main():
     logit.fit(input_data, output_data, sample_weight=weights)
     logit.score(input_data, output_data, sample_weight=weights)
 
-    print("Logistic model ran OK")
-    print("All functionality tests passed")
+    print("Logistic model ran OK")  # noqa: T201
+    print("All functionality tests passed")  # noqa: T201
 
 
 if __name__ == "__main__":
diff --git a/pyproject.toml b/pyproject.toml
index e033e8459b..45d64e4052 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -152,11 +152,15 @@ module = [
     "cryptography.*",
     "dns.*",
     "msal.*",
+    "numpy.*",
+    "pandas.*",
     "psycopg.*",
     "pulumi.*",
     "pulumi_azure_native.*",
+    "pymssql.*",
     "rich.*",
     "simple_acme_dns.*",
+    "sklearn.*",
     "typer.*",
     "websocket.*",
 ]