From b3dbc96729f2d23c65eb5fab6a8b2f9f8d1e695c Mon Sep 17 00:00:00 2001 From: James Robinson Date: Tue, 19 Sep 2023 11:31:59 +0100 Subject: [PATCH 01/12] :sparkles: Install a basic bats version --- .../resources/workspace/workspace.cloud_init.mustache.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml b/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml index 17471221de..c4216adb76 100644 --- a/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml +++ b/data_safe_haven/resources/workspace/workspace.cloud_init.mustache.yaml @@ -104,6 +104,8 @@ packages: - libpq-dev # interact with PostgreSQL databases - msodbcsql17 # interact with Microsoft SQL databases - unixodbc-dev # interact with Microsoft SQL databases + # Bash testing + - bats package_update: true package_upgrade: true From f37b3055f7a7f6bd192f73227d3423898596c4e5 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Thu, 14 Sep 2023 13:19:43 +0100 Subject: [PATCH 02/12] :sparkles: Add a skeleton bats script --- .../resources/workspace/run_all_tests.bats | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 data_safe_haven/resources/workspace/run_all_tests.bats diff --git a/data_safe_haven/resources/workspace/run_all_tests.bats b/data_safe_haven/resources/workspace/run_all_tests.bats new file mode 100644 index 0000000000..b843d046f3 --- /dev/null +++ b/data_safe_haven/resources/workspace/run_all_tests.bats @@ -0,0 +1,92 @@ +#! /usr/bin/env bats +load "../bats/bats-assert/load" +load "../bats/bats-file/load" +load "../bats/bats-support/load" + + +# Helper functions +# ---------------- +install_requirements_python() { + pip install pandas psycopg pymssql +} + +install_requirements_R() { + Rscript -e "install.packages(c('DBI', 'odbc', 'RPostgres'))" +} + + +# Python +# ------ +# Test Python functionality +@test "Python functionality" { + run python tests/test_functionality_python.py 2>&1 + assert_output --partial 'All functionality tests passed' +} +# Test Python package repository +@test "Python package repository" { + run bash tests/test_repository_python.sh 2>&1 + assert_output --partial 'All package installations behaved as expected' +} + + +# R +# - +# Test R packages +# Test R functionality +@test "R functionality" { + run Rscript tests/test_functionality_R.R + assert_output --partial 'All functionality tests passed' +} + +# Test R package repository +@test "R package repository" { + run bash tests/test_repository_R.sh + assert_output --partial 'All package installations behaved as expected' +} + + +# Databases +# --------- +# Test MS SQL database +@test "MS SQL database (Python)" { + install_requirements_python + run bash tests/test_databases.sh -d mssql -l python + assert_output --partial 'All database tests passed' +} +@test "MS SQL database (R)" { + install_requirements_R + run bash tests/test_databases.sh -d mssql -l R + assert_output --partial 'All database tests passed' +} + +# Test Postgres database +@test "Postgres database (Python)" { + install_requirements_python + run bash tests/test_databases.sh -d postgresql -l python + assert_output --partial 'All database tests passed' +} +@test "Postgres database (R)" { + install_requirements_R + run bash tests/test_databases.sh -d postgresql -l R + assert_output --partial 'All database tests passed' +} + + +# Mounted drives +# -------------- +@test "Mounted drives (/data)" { + run bash tests/test_mounted_drives.sh -d data + assert_output --partial 'All tests passed' +} +@test "Mounted drives (/home)" { + run bash tests/test_mounted_drives.sh -d home + assert_output --partial 'All tests passed' +} +@test "Mounted drives (/output)" { + run bash tests/test_mounted_drives.sh -d output + assert_output --partial 'All tests passed' +} +@test "Mounted drives (/shared)" { + run bash tests/test_mounted_drives.sh -d shared + assert_output --partial 'All tests passed' +} From 29983957baaf30f03fa9e43155bf47b113f40bd3 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Fri, 15 Sep 2023 11:41:25 +0100 Subject: [PATCH 03/12] :truck: Rename remote_powershell.py to remote_script.py --- data_safe_haven/infrastructure/components/dynamic/__init__.py | 2 +- .../dynamic/{remote_powershell.py => remote_script.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename data_safe_haven/infrastructure/components/dynamic/{remote_powershell.py => remote_script.py} (100%) diff --git a/data_safe_haven/infrastructure/components/dynamic/__init__.py b/data_safe_haven/infrastructure/components/dynamic/__init__.py index 2fe0f8decb..7c51c1d231 100644 --- a/data_safe_haven/infrastructure/components/dynamic/__init__.py +++ b/data_safe_haven/infrastructure/components/dynamic/__init__.py @@ -2,7 +2,7 @@ from .blob_container_acl import BlobContainerAcl, BlobContainerAclProps from .compiled_dsc import CompiledDsc, CompiledDscProps from .file_share_file import FileShareFile, FileShareFileProps -from .remote_powershell import RemoteScript, RemoteScriptProps +from .remote_script import RemoteScript, RemoteScriptProps from .ssl_certificate import SSLCertificate, SSLCertificateProps __all__ = [ diff --git a/data_safe_haven/infrastructure/components/dynamic/remote_powershell.py b/data_safe_haven/infrastructure/components/dynamic/remote_script.py similarity index 100% rename from data_safe_haven/infrastructure/components/dynamic/remote_powershell.py rename to data_safe_haven/infrastructure/components/dynamic/remote_script.py From 415c68a4a53ca5f6fb36afb706d60724c3590c56 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Tue, 19 Sep 2023 12:18:51 +0100 Subject: [PATCH 04/12] :sparkles: Add a FileUpload provider --- .../components/dynamic/file_upload.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 data_safe_haven/infrastructure/components/dynamic/file_upload.py diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py new file mode 100644 index 0000000000..067d150dd4 --- /dev/null +++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py @@ -0,0 +1,117 @@ +"""Pulumi dynamic component for running remote scripts on an Azure VM.""" +from typing import Any + +from pulumi import Input, Output, ResourceOptions +from pulumi.dynamic import CreateResult, DiffResult, Resource + +from data_safe_haven.external import AzureApi +from data_safe_haven.functions import b64encode + +from .dsh_resource_provider import DshResourceProvider + + +class FileUploadProps: + """Props for the FileUpload class""" + + def __init__( + self, + file_contents: Input[str], + file_hash: Input[str], + file_permissions: Input[str], + file_target: Input[str], + subscription_name: Input[str], + vm_name: Input[str], + vm_resource_group_name: Input[str], + force_refresh: Input[bool] | None = None, + ) -> None: + self.file_contents = file_contents + self.file_hash = file_hash + self.file_target = file_target + self.file_permissions = file_permissions + self.force_refresh = force_refresh + self.subscription_name = subscription_name + self.vm_name = vm_name + self.vm_resource_group_name = vm_resource_group_name + + +class FileUploadProvider(DshResourceProvider): + def create(self, props: dict[str, Any]) -> CreateResult: + """Run a remote script to create a file on a VM""" + outs = dict(**props) + azure_api = AzureApi(props["subscription_name"], disable_logging=True) + script_contents = f""" + target_dir=$(dirname "$target"); + mkdir -p $target_dir 2> /dev/null; + echo $contents_b64 | base64 --decode > $target; + chmod {props['file_permissions']} $target; + echo "Wrote file to $target"; + """ + script_parameters = { + "contents_b64": b64encode(props["file_contents"]), + "target": props["file_target"], + } + # Run remote script + outs["script_output"] = azure_api.run_remote_script( + props["vm_resource_group_name"], + script_contents, + script_parameters, + props["vm_name"], + ) + return CreateResult( + f"FileUpload-{props['file_hash']}", + outs=outs, + ) + + def delete(self, id_: str, props: dict[str, Any]) -> None: + """Delete the remote file from the VM""" + # Use `id` as a no-op to avoid ARG002 while maintaining function signature + id((id_, props)) + outs = dict(**props) + azure_api = AzureApi(props["subscription_name"], disable_logging=True) + script_contents = """ + rm -f "$target"; + echo "Removed file at $target"; + """ + script_parameters = { + "target": props["file_target"], + } + # Run remote script + outs["script_output"] = azure_api.run_remote_script( + props["vm_resource_group_name"], + script_contents, + script_parameters, + props["vm_name"], + ) + + def diff( + self, + id_: str, + old_props: dict[str, Any], + new_props: dict[str, Any], + ) -> DiffResult: + """Calculate diff between old and new state""" + # Use `id` as a no-op to avoid ARG002 while maintaining function signature + id(id_) + if new_props["force_refresh"]: + return DiffResult( + changes=True, + replaces=list(new_props.keys()), + stables=[], + delete_before_replace=True, + ) + return self.partial_diff(old_props, new_props, []) + + +class FileUpload(Resource): + script_output: Output[str] + _resource_type_name = "dsh:common:FileUpload" # set resource type + + def __init__( + self, + name: str, + props: FileUploadProps, + opts: ResourceOptions | None = None, + ): + super().__init__( + FileUploadProvider(), name, {"script_output": None, **vars(props)}, opts + ) From f4c3d701ab81989226cf2b7756499f9e0e73b7ab Mon Sep 17 00:00:00 2001 From: James Robinson Date: Tue, 19 Sep 2023 11:21:08 +0100 Subject: [PATCH 05/12] :sparkles: Use FileUpload provider for smoke tests --- .../infrastructure/components/__init__.py | 4 ++++ .../components/dynamic/__init__.py | 3 +++ .../infrastructure/stacks/declarative_sre.py | 1 + .../infrastructure/stacks/sre/workspaces.py | 24 +++++++++++++++++++ 4 files changed, 32 insertions(+) diff --git a/data_safe_haven/infrastructure/components/__init__.py b/data_safe_haven/infrastructure/components/__init__.py index ee872fabe0..6fcb8d3f9b 100644 --- a/data_safe_haven/infrastructure/components/__init__.py +++ b/data_safe_haven/infrastructure/components/__init__.py @@ -20,6 +20,8 @@ CompiledDscProps, FileShareFile, FileShareFileProps, + FileUpload, + FileUploadProps, RemoteScript, RemoteScriptProps, SSLCertificate, @@ -41,6 +43,8 @@ "CompiledDscProps", "FileShareFile", "FileShareFileProps", + "FileUpload", + "FileUploadProps", "LinuxVMComponentProps", "LocalDnsRecordComponent", "LocalDnsRecordProps", diff --git a/data_safe_haven/infrastructure/components/dynamic/__init__.py b/data_safe_haven/infrastructure/components/dynamic/__init__.py index 7c51c1d231..4fdfb12dfc 100644 --- a/data_safe_haven/infrastructure/components/dynamic/__init__.py +++ b/data_safe_haven/infrastructure/components/dynamic/__init__.py @@ -2,6 +2,7 @@ from .blob_container_acl import BlobContainerAcl, BlobContainerAclProps from .compiled_dsc import CompiledDsc, CompiledDscProps from .file_share_file import FileShareFile, FileShareFileProps +from .file_upload import FileUpload, FileUploadProps from .remote_script import RemoteScript, RemoteScriptProps from .ssl_certificate import SSLCertificate, SSLCertificateProps @@ -14,6 +15,8 @@ "CompiledDscProps", "FileShareFile", "FileShareFileProps", + "FileUpload", + "FileUploadProps", "RemoteScript", "RemoteScriptProps", "SSLCertificate", diff --git a/data_safe_haven/infrastructure/stacks/declarative_sre.py b/data_safe_haven/infrastructure/stacks/declarative_sre.py index 439da4b8ca..907ebfe667 100644 --- a/data_safe_haven/infrastructure/stacks/declarative_sre.py +++ b/data_safe_haven/infrastructure/stacks/declarative_sre.py @@ -253,6 +253,7 @@ def run(self) -> None: storage_account_data_private_user_name=data.storage_account_data_private_user_name, storage_account_data_private_sensitive_name=data.storage_account_data_private_sensitive_name, subnet_workspaces=networking.subnet_workspaces, + subscription_name=self.cfg.subscription_name, virtual_network_resource_group=networking.resource_group, virtual_network=networking.virtual_network, vm_details=list(enumerate(self.cfg.sres[self.sre_name].workspace_skus)), diff --git a/data_safe_haven/infrastructure/stacks/sre/workspaces.py b/data_safe_haven/infrastructure/stacks/sre/workspaces.py index 2821b6f669..64e0c6d4f3 100644 --- a/data_safe_haven/infrastructure/stacks/sre/workspaces.py +++ b/data_safe_haven/infrastructure/stacks/sre/workspaces.py @@ -14,10 +14,13 @@ get_name_from_vnet, ) from data_safe_haven.infrastructure.components import ( + FileUpload, + FileUploadProps, LinuxVMComponentProps, VMComponent, ) from data_safe_haven.resources import resources_path +from data_safe_haven.utility import FileReader class SREWorkspacesProps: @@ -43,6 +46,7 @@ def __init__( storage_account_data_private_user_name: Input[str], storage_account_data_private_sensitive_name: Input[str], subnet_workspaces: Input[network.GetSubnetResult], + subscription_name: Input[str], virtual_network_resource_group: Input[resources.ResourceGroup], virtual_network: Input[network.VirtualNetwork], vm_details: list[tuple[int, str]], # this must *not* be passed as an Input[T] @@ -69,6 +73,7 @@ def __init__( self.storage_account_data_private_sensitive_name = ( storage_account_data_private_sensitive_name ) + self.subscription_name = subscription_name self.virtual_network_name = Output.from_input(virtual_network).apply( get_name_from_vnet ) @@ -170,6 +175,25 @@ def __init__( for vm in vms ] + # Upload smoke tests + run_all_tests = FileReader(resources_path / "workspace" / "run_all_tests.bats") + for vm, vm_output in zip(vms, vm_outputs, strict=True): + file_run_all_tests = FileUpload( + f"{self._name}_file_run_all_tests", + FileUploadProps( + file_contents=run_all_tests.file_contents(), + file_hash=run_all_tests.sha256(), + file_permissions="0444", + file_target=f"/opt/tests/{run_all_tests.name}", + force_refresh=True, + subscription_name=props.subscription_name, + vm_name=vm.vm_name, + vm_resource_group_name=resource_group.name, + ), + opts=child_opts, + ) + vm_output["run_all_tests"] = file_run_all_tests.script_output + # Register outputs self.resource_group = resource_group From cccecbfc4b1af26bdd917223f4d4ca033dcca429 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Tue, 19 Sep 2023 11:22:01 +0100 Subject: [PATCH 06/12] :truck: Use one MSAL cache per tenant --- data_safe_haven/external/api/graph_api.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data_safe_haven/external/api/graph_api.py b/data_safe_haven/external/api/graph_api.py index a329b50437..1b1a00b677 100644 --- a/data_safe_haven/external/api/graph_api.py +++ b/data_safe_haven/external/api/graph_api.py @@ -361,7 +361,9 @@ def create_token_administrator(self) -> str: result = None try: # Load local token cache - local_token_cache = LocalTokenCache(pathlib.Path.home() / ".msal_cache") + local_token_cache = LocalTokenCache( + pathlib.Path.home() / f".msal_cache_{self.tenant_id}" + ) # Use the Powershell application by default as this should be pre-installed app = PublicClientApplication( authority=f"https://login.microsoftonline.com/{self.tenant_id}", From d9edec2b5f449b9b14694cd9db7e670074701bd6 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Tue, 19 Sep 2023 11:22:54 +0100 Subject: [PATCH 07/12] :recycle: Simplify SHA256 calculation --- data_safe_haven/functions/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_safe_haven/functions/strings.py b/data_safe_haven/functions/strings.py index a3dac5e106..27089eeaed 100644 --- a/data_safe_haven/functions/strings.py +++ b/data_safe_haven/functions/strings.py @@ -90,7 +90,7 @@ def seeded_uuid(seed: str) -> uuid.UUID: def sha256hash(input_string: str) -> str: """Return the SHA256 hash of a string as a string.""" - return hashlib.sha256(str.encode(input_string, encoding="utf-8")).hexdigest() + return hashlib.sha256(input_string.encode("utf-8")).hexdigest() def truncate_tokens(tokens: Sequence[str], max_length: int) -> list[str]: From 29249df465109af3e3904247d38ac9acc69fb33a Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 20 Sep 2023 09:34:08 +0100 Subject: [PATCH 08/12] :alien: Allow multiple FileUploads to run at once, with each waiting until it is possible to run --- data_safe_haven/external/api/azure_api.py | 36 +++++++++++++++++++ .../components/dynamic/file_upload.py | 30 ++++++++++++---- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/data_safe_haven/external/api/azure_api.py b/data_safe_haven/external/api/azure_api.py index 552bc35713..0ce8044bce 100644 --- a/data_safe_haven/external/api/azure_api.py +++ b/data_safe_haven/external/api/azure_api.py @@ -1084,6 +1084,42 @@ def run_remote_script( msg = f"Failed to run command on '{vm_name}'.\n{exc}" raise DataSafeHavenAzureError(msg) from exc + def run_remote_script_waiting( + self, + resource_group_name: str, + script: str, + script_parameters: dict[str, str], + vm_name: str, + ) -> str: + """Run a script on a remote virtual machine waiting for other scripts to complete + + Returns: + str: The script output + + Raises: + DataSafeHavenAzureError if running the script failed + """ + while True: + try: + script_output = self.run_remote_script( + resource_group_name=resource_group_name, + script=script, + script_parameters=script_parameters, + vm_name=vm_name, + ) + break + except DataSafeHavenAzureError as exc: + if all( + reason not in str(exc) + for reason in ( + "The request failed due to conflict with a concurrent request", + "Run command extension execution is in progress", + ) + ): + raise + time.sleep(5) + return script_output + def set_blob_container_acl( self, container_name: str, diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py index 067d150dd4..231b583430 100644 --- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py +++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py @@ -2,7 +2,7 @@ from typing import Any from pulumi import Input, Output, ResourceOptions -from pulumi.dynamic import CreateResult, DiffResult, Resource +from pulumi.dynamic import CreateResult, DiffResult, Resource, UpdateResult from data_safe_haven.external import AzureApi from data_safe_haven.functions import b64encode @@ -51,12 +51,19 @@ def create(self, props: dict[str, Any]) -> CreateResult: "target": props["file_target"], } # Run remote script - outs["script_output"] = azure_api.run_remote_script( + script_output = azure_api.run_remote_script_waiting( props["vm_resource_group_name"], script_contents, script_parameters, props["vm_name"], ) + outs["script_output"] = "\n".join( + [ + line.strip() + for line in script_output.replace("Enable succeeded:", "").split("\n") + if line + ] + ) return CreateResult( f"FileUpload-{props['file_hash']}", outs=outs, @@ -65,8 +72,7 @@ def create(self, props: dict[str, Any]) -> CreateResult: def delete(self, id_: str, props: dict[str, Any]) -> None: """Delete the remote file from the VM""" # Use `id` as a no-op to avoid ARG002 while maintaining function signature - id((id_, props)) - outs = dict(**props) + id(id_) azure_api = AzureApi(props["subscription_name"], disable_logging=True) script_contents = """ rm -f "$target"; @@ -76,7 +82,7 @@ def delete(self, id_: str, props: dict[str, Any]) -> None: "target": props["file_target"], } # Run remote script - outs["script_output"] = azure_api.run_remote_script( + azure_api.run_remote_script_waiting( props["vm_resource_group_name"], script_contents, script_parameters, @@ -97,10 +103,22 @@ def diff( changes=True, replaces=list(new_props.keys()), stables=[], - delete_before_replace=True, + delete_before_replace=False, ) return self.partial_diff(old_props, new_props, []) + def update( + self, + id_: str, + old_props: dict[str, Any], + new_props: dict[str, Any], + ) -> UpdateResult: + """Updating is creating without the need to delete.""" + # Use `id` as a no-op to avoid ARG002 while maintaining function signature + id((id_, old_props)) + updated = self.create(new_props) + return UpdateResult(outs=updated.outs) + class FileUpload(Resource): script_output: Output[str] From 7a889ccf9aefb4bbb9a520d39f80475158806765 Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 20 Sep 2023 19:22:02 +0100 Subject: [PATCH 09/12] :bug: Signal if the FileUpload could not be created --- .../infrastructure/components/dynamic/file_upload.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py index 231b583430..cb9f5152f1 100644 --- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py +++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py @@ -4,6 +4,7 @@ from pulumi import Input, Output, ResourceOptions from pulumi.dynamic import CreateResult, DiffResult, Resource, UpdateResult +from data_safe_haven.exceptions import DataSafeHavenAzureError from data_safe_haven.external import AzureApi from data_safe_haven.functions import b64encode @@ -44,7 +45,11 @@ def create(self, props: dict[str, Any]) -> CreateResult: mkdir -p $target_dir 2> /dev/null; echo $contents_b64 | base64 --decode > $target; chmod {props['file_permissions']} $target; - echo "Wrote file to $target"; + if [ -f "$target" ]; then + echo "Wrote file to $target"; + else + echo "Failed to write file to $target"; + fi """ script_parameters = { "contents_b64": b64encode(props["file_contents"]), @@ -64,6 +69,8 @@ def create(self, props: dict[str, Any]) -> CreateResult: if line ] ) + if "Failed to write" in outs["script_output"]: + raise DataSafeHavenAzureError(outs["script_output"]) return CreateResult( f"FileUpload-{props['file_hash']}", outs=outs, From 05c8a160ab5478cc638bc7b3cb0b33f9c7579ddd Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 20 Sep 2023 17:55:45 +0100 Subject: [PATCH 10/12] :white_check_mark: Add smoke tests --- .../components/dynamic/file_upload.py | 2 +- .../infrastructure/stacks/sre/workspaces.py | 42 +++-- .../resources/workspace/run_all_tests.bats | 156 +++++++++++------- .../resources/workspace/test_databases.sh | 51 ++++++ .../resources/workspace/test_databases_R.R | 51 ++++++ .../workspace/test_databases_python.py | 55 ++++++ .../workspace/test_functionality_R.R | 39 +++++ .../workspace/test_functionality_python.py | 37 +++++ .../workspace/test_mounted_drives.sh | 66 ++++++++ .../workspace/test_repository_R.mustache.sh | 49 ++++++ .../test_repository_python.mustache.sh | 42 +++++ 11 files changed, 512 insertions(+), 78 deletions(-) create mode 100644 data_safe_haven/resources/workspace/test_databases.sh create mode 100644 data_safe_haven/resources/workspace/test_databases_R.R create mode 100644 data_safe_haven/resources/workspace/test_databases_python.py create mode 100644 data_safe_haven/resources/workspace/test_functionality_R.R create mode 100644 data_safe_haven/resources/workspace/test_functionality_python.py create mode 100644 data_safe_haven/resources/workspace/test_mounted_drives.sh create mode 100644 data_safe_haven/resources/workspace/test_repository_R.mustache.sh create mode 100644 data_safe_haven/resources/workspace/test_repository_python.mustache.sh diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py index cb9f5152f1..731a662899 100644 --- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py +++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py @@ -29,7 +29,7 @@ def __init__( self.file_hash = file_hash self.file_target = file_target self.file_permissions = file_permissions - self.force_refresh = force_refresh + self.force_refresh = Output.from_input(force_refresh).apply(lambda force: force if force else False) self.subscription_name = subscription_name self.vm_name = vm_name self.vm_resource_group_name = vm_resource_group_name diff --git a/data_safe_haven/infrastructure/stacks/sre/workspaces.py b/data_safe_haven/infrastructure/stacks/sre/workspaces.py index 64e0c6d4f3..fa854168f1 100644 --- a/data_safe_haven/infrastructure/stacks/sre/workspaces.py +++ b/data_safe_haven/infrastructure/stacks/sre/workspaces.py @@ -1,3 +1,4 @@ +import pathlib from collections.abc import Mapping from typing import Any @@ -166,7 +167,7 @@ def __init__( ] # Get details for each deployed VM - vm_outputs = [ + vm_outputs: list[dict[str, Any]] = [ { "ip_address": vm.ip_address_private, "name": vm.vm_name, @@ -176,23 +177,30 @@ def __init__( ] # Upload smoke tests - run_all_tests = FileReader(resources_path / "workspace" / "run_all_tests.bats") + mustache_values={ + "check_uninstallable_packages": "0", + } + file_uploads = [(FileReader(resources_path / "workspace" / "run_all_tests.bats"), "0444")] + for test_file in pathlib.Path(resources_path / "workspace").glob("test*"): + file_uploads.append((FileReader(test_file), "0444")) for vm, vm_output in zip(vms, vm_outputs, strict=True): - file_run_all_tests = FileUpload( - f"{self._name}_file_run_all_tests", - FileUploadProps( - file_contents=run_all_tests.file_contents(), - file_hash=run_all_tests.sha256(), - file_permissions="0444", - file_target=f"/opt/tests/{run_all_tests.name}", - force_refresh=True, - subscription_name=props.subscription_name, - vm_name=vm.vm_name, - vm_resource_group_name=resource_group.name, - ), - opts=child_opts, - ) - vm_output["run_all_tests"] = file_run_all_tests.script_output + outputs: dict[str, Output[str]] = {} + for file_upload, file_permissions in file_uploads: + file_smoke_test = FileUpload( + replace_separators(f"{self._name}_file_{file_upload.name}", "_"), + FileUploadProps( + file_contents=file_upload.file_contents(mustache_values=mustache_values), + file_hash=file_upload.sha256(), + file_permissions=file_permissions, + file_target=f"/opt/tests/{file_upload.name}", + subscription_name=props.subscription_name, + vm_name=vm.vm_name, + vm_resource_group_name=resource_group.name, + ), + opts=child_opts, + ) + outputs[file_upload.name] = file_smoke_test.script_output + vm_output["file_uploads"] = outputs # Register outputs self.resource_group = resource_group diff --git a/data_safe_haven/resources/workspace/run_all_tests.bats b/data_safe_haven/resources/workspace/run_all_tests.bats index b843d046f3..800a55cd3d 100644 --- a/data_safe_haven/resources/workspace/run_all_tests.bats +++ b/data_safe_haven/resources/workspace/run_all_tests.bats @@ -1,47 +1,92 @@ #! /usr/bin/env bats -load "../bats/bats-assert/load" -load "../bats/bats-file/load" -load "../bats/bats-support/load" # Helper functions # ---------------- -install_requirements_python() { - pip install pandas psycopg pymssql +initialise_python_environment() { + ENV_PATH="${HOME}/.local/bats-python-environment" + rm -rf "$ENV_PATH" + python -m venv "$ENV_PATH" + source "${ENV_PATH}/bin/activate" + pip install --upgrade pip --quiet } -install_requirements_R() { - Rscript -e "install.packages(c('DBI', 'odbc', 'RPostgres'))" +initialise_r_environment() { + ENV_PATH="${HOME}/.local/bats-r-environment" + rm -rf "$ENV_PATH" + mkdir -p "$ENV_PATH" } +install_r_package() { + PACKAGE_NAME="$1" + ENV_PATH="${HOME}/.local/bats-r-environment" + Rscript -e "install.packages('$PACKAGE_NAME', lib='$ENV_PATH');" +} -# Python -# ------ -# Test Python functionality -@test "Python functionality" { - run python tests/test_functionality_python.py 2>&1 - assert_output --partial 'All functionality tests passed' +install_r_package_version() { + PACKAGE_NAME="$1" + PACKAGE_VERSION="$2" + ENV_PATH="${HOME}/.local/bats-r-environment" + Rscript -e "install.packages('remotes', lib='$ENV_PATH');" + Rscript -e "library('remotes', lib='$ENV_PATH'); remotes::install_version(package='$PACKAGE_NAME', version='$PACKAGE_VERSION', lib='$ENV_PATH');" } -# Test Python package repository -@test "Python package repository" { - run bash tests/test_repository_python.sh 2>&1 - assert_output --partial 'All package installations behaved as expected' + +check_db_credentials() { + db_credentials="${HOME}/.local/db.dsh" + if [ -f "$db_credentials" ]; then + return 0 + fi + return 1 } -# R -# - -# Test R packages -# Test R functionality -@test "R functionality" { - run Rscript tests/test_functionality_R.R - assert_output --partial 'All functionality tests passed' +# Mounted drives +# -------------- +@test "Mounted drives (/data)" { + run bash test_mounted_drives.sh -d data + [ "$status" -eq 0 ] +} +@test "Mounted drives (/home)" { + run bash test_mounted_drives.sh -d home + [ "$status" -eq 0 ] +} +@test "Mounted drives (/output)" { + run bash test_mounted_drives.sh -d output + [ "$status" -eq 0 ] +} +@test "Mounted drives (/shared)" { + run bash test_mounted_drives.sh -d shared + [ "$status" -eq 0 ] } -# Test R package repository + +# Package repositories +# -------------------- +@test "Python package repository" { + initialise_python_environment + run bash test_repository_python.sh 2>&1 + [ "$status" -eq 0 ] +} @test "R package repository" { - run bash tests/test_repository_R.sh - assert_output --partial 'All package installations behaved as expected' + initialise_r_environment + run bash test_repository_R.sh + [ "$status" -eq 0 ] +} + + +# Language functionality +# ---------------------- +@test "Python functionality" { + initialise_python_environment + pip install numpy pandas scikit-learn --quiet + run python test_functionality_python.py 2>&1 + [ "$status" -eq 0 ] +} +@test "R functionality" { + initialise_r_environment + install_r_package_version "MASS" "7.3-52" + run Rscript test_functionality_R.R + [ "$status" -eq 0 ] } @@ -49,44 +94,35 @@ install_requirements_R() { # --------- # Test MS SQL database @test "MS SQL database (Python)" { - install_requirements_python - run bash tests/test_databases.sh -d mssql -l python - assert_output --partial 'All database tests passed' + check_db_credentials || skip "No database credentials available" + initialise_python_environment + pip install pandas psycopg pymssql --quiet + run bash test_databases.sh -d mssql -l python + [ "$status" -eq 0 ] } @test "MS SQL database (R)" { - install_requirements_R - run bash tests/test_databases.sh -d mssql -l R - assert_output --partial 'All database tests passed' + check_db_credentials || skip "No database credentials available" + initialise_r_environment + install_r_package "DBI" + install_r_package "odbc" + install_r_package "RPostgres" + run bash test_databases.sh -d mssql -l R + [ "$status" -eq 0 ] } - # Test Postgres database @test "Postgres database (Python)" { - install_requirements_python - run bash tests/test_databases.sh -d postgresql -l python - assert_output --partial 'All database tests passed' + check_db_credentials || skip "No database credentials available" + initialise_python_environment + pip install pandas psycopg pymssql --quiet + run bash test_databases.sh -d postgresql -l python + [ "$status" -eq 0 ] } @test "Postgres database (R)" { - install_requirements_R - run bash tests/test_databases.sh -d postgresql -l R - assert_output --partial 'All database tests passed' -} - - -# Mounted drives -# -------------- -@test "Mounted drives (/data)" { - run bash tests/test_mounted_drives.sh -d data - assert_output --partial 'All tests passed' -} -@test "Mounted drives (/home)" { - run bash tests/test_mounted_drives.sh -d home - assert_output --partial 'All tests passed' -} -@test "Mounted drives (/output)" { - run bash tests/test_mounted_drives.sh -d output - assert_output --partial 'All tests passed' -} -@test "Mounted drives (/shared)" { - run bash tests/test_mounted_drives.sh -d shared - assert_output --partial 'All tests passed' + check_db_credentials || skip "No database credentials available" + initialise_r_environment + install_r_package "DBI" + install_r_package "odbc" + install_r_package "RPostgres" + run bash test_databases.sh -d postgresql -l R + [ "$status" -eq 0 ] } diff --git a/data_safe_haven/resources/workspace/test_databases.sh b/data_safe_haven/resources/workspace/test_databases.sh new file mode 100644 index 0000000000..69fd7a456c --- /dev/null +++ b/data_safe_haven/resources/workspace/test_databases.sh @@ -0,0 +1,51 @@ +#! /bin/bash +db_type="" +language="" +while getopts d:l: flag; do + case "${flag}" in + d) db_type=${OPTARG} ;; + l) language=${OPTARG} ;; + *) + echo "Invalid option ${OPTARG}" + exit 1 + ;; + esac +done + +db_credentials="${HOME}/.local/db.dsh" +if [ -f "$db_credentials" ]; then + username="databaseadmin" + password="$(cat "$db_credentials")" +else + echo "Credentials file ($db_credentials) not found." + exit 1 +fi + +sre_fqdn="$(grep trusted /etc/pip.conf | cut -d "." -f 2-99)" +sre_prefix="$(hostname | cut -d "-" -f 1-4)" +if [ "$db_type" == "mssql" ]; then + db_name="master" + port="1433" + server_name="mssql.${sre_fqdn}" + hostname="${sre_prefix}-db-server-mssql" +elif [ "$db_type" == "postgresql" ]; then + db_name="postgres" + port="5432" + server_name="postgresql.${sre_fqdn}" + hostname="${sre_prefix}-db-server-postgresql" +else + echo "Did not recognise database type '$db_type'" + exit 1 +fi + +if [ "$port" == "" ]; then + echo "Database type '$db_type' is not part of this SRE" + exit 1 +else + script_path=$(dirname "$(readlink -f "$0")") + if [ "$language" == "python" ]; then + python "${script_path}"/test_databases_python.py --db-type "$db_type" --db-name "$db_name" --port "$port" --server-name "$server_name" --hostname "$hostname" --username "$username" --password "$password" || exit 1 + elif [ "$language" == "R" ]; then + Rscript "${script_path}"/test_databases_R.R "$db_type" "$db_name" "$port" "$server_name" "$hostname" "$username" "$password" || exit 1 + fi +fi diff --git a/data_safe_haven/resources/workspace/test_databases_R.R b/data_safe_haven/resources/workspace/test_databases_R.R new file mode 100644 index 0000000000..a261f21532 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_databases_R.R @@ -0,0 +1,51 @@ +#!/usr/bin/env Rscript +library(DBI, lib.loc='~/.local/bats-r-environment') +library(odbc, lib.loc='~/.local/bats-r-environment') +library(RPostgres, lib.loc='~/.local/bats-r-environment') + +# Parse command line arguments +args = commandArgs(trailingOnly=TRUE) +if (length(args)!=7) { + stop("Exactly seven arguments are required: db_type, db_name, port, server_name, hostname, username and password") +} +db_type = args[1] +db_name = args[2] +port = args[3] +server_name = args[4] +hostname = args[5] +username = args[6] +password = args[7] + +# Connect to the database +print(paste("Attempting to connect to '", db_name, "' on '", server_name, "' via port '", port, sep="")) +if (db_type == "mssql") { + cnxn <- DBI::dbConnect( + odbc::odbc(), + Driver = "ODBC Driver 17 for SQL Server", + Server = paste(server_name, port, sep=","), + Database = db_name, + # Trusted_Connection = "yes", + UID = paste(username, "@", hostname, sep=""), + PWD = password + ) +} else if (db_type == "postgresql") { + cnxn <- DBI::dbConnect( + RPostgres::Postgres(), + host = server_name, + port = port, + dbname = db_name, + user = paste(username, "@", hostname, sep=""), + password = password + ) +} else { + stop(paste("Database type '", db_type, "' was not recognised", sep="")) +} + +# Run a query and save the output into a dataframe +df <- dbGetQuery(cnxn, "SELECT * FROM information_schema.tables;") +if (dim(df)[1] > 0) { + print(head(df, 5)) + print("All database tests passed") +} else { + stop(paste("Reading from database '", db_name, "' failed", sep="")) +} diff --git a/data_safe_haven/resources/workspace/test_databases_python.py b/data_safe_haven/resources/workspace/test_databases_python.py new file mode 100644 index 0000000000..37a37acb91 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_databases_python.py @@ -0,0 +1,55 @@ +#! /usr/bin/env python +import argparse + +import pandas as pd +import psycopg +import pymssql + + +def test_database(server_name, hostname, port, db_type, db_name, username, password): + print(f"Attempting to connect to '{db_name}' on '{server_name}' via port {port}") + username_full = f"{username}@{hostname}" + cnxn = None + if db_type == "mssql": + cnxn = pymssql.connect( + server=server_name, user=username_full, password=password, database=db_name + ) + elif db_type == "postgresql": + connection_string = f"host={server_name} port={port} dbname={db_name} user={username_full} password={password}" + cnxn = psycopg.connect(connection_string) + else: + raise ValueError(f"Database type '{db_type}' was not recognised") + df = pd.read_sql("SELECT * FROM information_schema.tables;", cnxn) + if df.size: + print(df.head(5)) + print("All database tests passed") + else: + raise ValueError(f"Reading from database '{db_name}' failed.") + + +# Parse command line arguments +parser = argparse.ArgumentParser() +parser.add_argument( + "--db-type", + type=str, + choices=["mssql", "postgresql"], + help="Which database type to use", +) +parser.add_argument("--db-name", type=str, help="Which database to connect to") +parser.add_argument("--port", type=str, help="Which port to connect to") +parser.add_argument("--server-name", type=str, help="Which server to connect to") +parser.add_argument("--username", type=str, help="Database username") +parser.add_argument("--hostname", type=str, help="Azure hostname of the server") +parser.add_argument("--password", type=str, help="Database user password") +args = parser.parse_args() + +# Run database test +test_database( + args.server_name, + args.hostname, + args.port, + args.db_type, + args.db_name, + args.username, + args.password, +) diff --git a/data_safe_haven/resources/workspace/test_functionality_R.R b/data_safe_haven/resources/workspace/test_functionality_R.R new file mode 100644 index 0000000000..94c351e7c3 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_functionality_R.R @@ -0,0 +1,39 @@ +# Test logistic regression using R +library('MASS', lib.loc='~/.local/bats-r-environment') +library('stats') + +gen_data <- function(n = 100, p = 3) { + set.seed(1) + weights <- stats::rgamma(n = n, shape = rep(1, n), rate = rep(1, n)) + y <- stats::rbinom(n = n, size = 1, prob = 0.5) + theta <- stats::rnorm(n = p, mean = 0, sd = 1) + means <- colMeans(as.matrix(y) %*% theta) + x <- MASS::mvrnorm(n = n, means, diag(1, p, p)) + return(list(x = x, y = y, weights = weights, theta = theta)) +} + +run_logistic_regression <- function(data) { + fit <- stats::glm.fit(x = data$x, + y = data$y, + weights = data$weights, + family = stats::quasibinomial(link = "logit")) + return(fit$coefficients) +} + +data <- gen_data() +theta <- run_logistic_regression(data) +print("Logistic regression ran OK") + + +# Test clustering of random data using R +num_clusters <- 5 +N <- 10 +set.seed(0, kind = "Mersenne-Twister") +cluster_means <- runif(num_clusters, 0, 10) +means_selector <- as.integer(runif(N, 1, num_clusters + 1)) +data_means <- cluster_means[means_selector] +data <- rnorm(n = N, mean = data_means, sd = 0.5) +hc <- hclust(dist(data)) +print("Clustering ran OK") + +print("All functionality tests passed") diff --git a/data_safe_haven/resources/workspace/test_functionality_python.py b/data_safe_haven/resources/workspace/test_functionality_python.py new file mode 100644 index 0000000000..9ca9662d98 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_functionality_python.py @@ -0,0 +1,37 @@ +"""Test logistic regression using python""" +import numpy as np +import pandas as pd +from sklearn.linear_model import LogisticRegression + + +def gen_data(n_samples, n_points): + """Generate data for fitting""" + target = np.random.binomial(n=1, p=0.5, size=(n_samples, 1)) + theta = np.random.normal(loc=0.0, scale=1.0, size=(1, n_points)) + means = np.mean(np.multiply(target, theta), axis=0) + values = np.random.multivariate_normal( + means, np.diag([1] * n_points), size=n_samples + ).T + data = dict(("x{}".format(n), values[n]) for n in range(n_points)) + data["y"] = target.reshape((n_samples,)) + data["weights"] = np.random.gamma(shape=1, scale=1.0, size=n_samples) + return pd.DataFrame(data=data) + + +def main(): + """Logistic regression""" + data = gen_data(100, 3) + input_data = data.iloc[:, :-2] + output_data = data["y"] + weights = data["weights"] + + logit = LogisticRegression(solver="liblinear") + logit.fit(input_data, output_data, sample_weight=weights) + logit.score(input_data, output_data, sample_weight=weights) + + print("Logistic model ran OK") + print("All functionality tests passed") + + +if __name__ == "__main__": + main() diff --git a/data_safe_haven/resources/workspace/test_mounted_drives.sh b/data_safe_haven/resources/workspace/test_mounted_drives.sh new file mode 100644 index 0000000000..a1812934b9 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_mounted_drives.sh @@ -0,0 +1,66 @@ +#! /bin/bash +while getopts d: flag +do + case "${flag}" in + d) directory=${OPTARG};; + *) + echo "Usage: $0 -d [directory]" + exit 1 + esac +done + +nfailed=0 +if [[ "$directory" = "home" ]]; then directory_path=$(echo ~); else directory_path="/${directory}"; fi +testfile="$(tr -dc 'a-zA-Z0-9' < /dev/urandom | fold -w 32 | head -n 1)" + +# Check that directory exists +if [ "$(ls "${directory_path}" 2>&1 1>/dev/null)" ]; then + echo "Could not find mount '${directory_path}'" + nfailed=$((nfailed + 1)) +fi + +# Test operations +CAN_CREATE="$([[ "$(touch "${directory_path}/${testfile}" 2>&1 1>/dev/null)" = "" ]] && echo '1' || echo '0')" +CAN_WRITE="$([[ -w "${directory_path}/${testfile}" ]] && echo '1' || echo '0')" +CAN_DELETE="$([[ "$(touch "${directory_path}/${testfile}" 2>&1 1>/dev/null && rm "${directory_path}/${testfile}" 2>&1)" ]] && echo '0' || echo '1')" + +# Check that permissions are as expected for each directory +case "$directory" in + data) + if [ "$CAN_CREATE" = 1 ]; then echo "Able to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_WRITE" = 1 ]; then echo "Able to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_DELETE" = 1 ]; then echo "Able to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + ;; + + home) + if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + ;; + + output) + if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + ;; + + shared) + if [ "$CAN_CREATE" = 0 ]; then echo "Unable to create files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_WRITE" = 0 ]; then echo "Unable to write files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + if [ "$CAN_DELETE" = 0 ]; then echo "Unable to delete files in ${directory_path}!"; nfailed=$((nfailed + 1)); fi + ;; + + *) + echo "Usage: $0 -d [directory]" + exit 1 +esac + +# Cleanup and print output +rm -f "${directory_path}/${testfile}" 2> /dev/null +if [ $nfailed = 0 ]; then + echo "All tests passed for '${directory_path}'" + exit 0 +else + echo "$nfailed tests failed for '${directory_path}'!" + exit $nfailed +fi diff --git a/data_safe_haven/resources/workspace/test_repository_R.mustache.sh b/data_safe_haven/resources/workspace/test_repository_R.mustache.sh new file mode 100644 index 0000000000..03568b1e62 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_repository_R.mustache.sh @@ -0,0 +1,49 @@ +#! /bin/bash +# We need to test packages that are: +# - *not* pre-installed +# - on the tier-3 list (so we can test all tiers) +# - alphabetically early and late (so we can test the progress of the mirror synchronisation) +packages=("askpass" "zeallot") +uninstallable_packages=("aws.s3") + +# Create a temporary library directory +TEST_INSTALL_PATH="${HOME}/.local/bats-r-environment" +# TEST_INSTALL_PATH="${HOME}/test-repository-R" +# rm -rf "$TEST_INSTALL_PATH" +# mkdir -p "$TEST_INSTALL_PATH" + +# Install sample packages to local user library +N_FAILURES=0 +for package in "${packages[@]}"; do + echo "Attempting to install ${package}..." + Rscript -e "options(warn=-1); install.packages('${package}', lib='${TEST_INSTALL_PATH}', quiet=TRUE)" + if (Rscript -e "library('${package}', lib.loc='${TEST_INSTALL_PATH}')"); then + echo "... $package installation succeeded" + else + echo "... $package installation failed" + N_FAILURES=$((N_FAILURES + 1)) + fi +done +# If requested, demonstrate that installation fails for packages *not* on the approved list +TEST_FAILURE="{{check_uninstallable_packages}}" +if [ $TEST_FAILURE -eq 1 ]; then + for package in "${uninstallable_packages[@]}"; do + echo "Attempting to install ${package}..." + Rscript -e "options(warn=-1); install.packages('${package}', lib='${TEST_INSTALL_PATH}', quiet=TRUE)" + if (Rscript -e "library('${package}', lib.loc='${TEST_INSTALL_PATH}')"); then + echo "... $package installation unexpectedly succeeded!" + N_FAILURES=$((N_FAILURES + 1)) + else + echo "... $package installation failed as expected" + fi + done +fi +rm -rf "$TEST_INSTALL_PATH" + +if [ $N_FAILURES -eq 0 ]; then + echo "All package installations behaved as expected" + exit 0 +else + echo "One or more package installations did not behave as expected!" + exit $N_FAILURES +fi diff --git a/data_safe_haven/resources/workspace/test_repository_python.mustache.sh b/data_safe_haven/resources/workspace/test_repository_python.mustache.sh new file mode 100644 index 0000000000..28e46a23e1 --- /dev/null +++ b/data_safe_haven/resources/workspace/test_repository_python.mustache.sh @@ -0,0 +1,42 @@ +#! /bin/bash + +# We need to test packages that are: +# - *not* pre-installed +# - on the allowlist (so we can test this is working) +# - alphabetically early and late (so we can test the progress of the mirror synchronisation) +installable_packages=("contourpy" "tzdata") +uninstallable_packages=("awscli") + +# Install sample packages to local user library +N_FAILURES=0 +for package in "${installable_packages[@]}"; do + echo "Attempting to install ${package}..." + if (pip install "$package" --quiet); then + echo "... $package installation succeeded" + else + echo "... $package installation failed" + N_FAILURES=$((N_FAILURES + 1)) + fi +done +# If requested, demonstrate that installation fails for packages *not* on the approved list +TEST_FAILURE="{{check_uninstallable_packages}}" +if [ $TEST_FAILURE -eq 1 ]; then + for package in "${uninstallable_packages[@]}"; do + echo "Attempting to install ${package}..." + if (pip install "$package" --quiet); then + echo "... $package installation unexpectedly succeeded!" + N_FAILURES=$((N_FAILURES + 1)) + else + echo "... $package installation failed as expected" + fi + done +fi +rm -rf "$TEST_INSTALL_PATH" + +if [ $N_FAILURES -eq 0 ]; then + echo "All package installations behaved as expected" + exit 0 +else + echo "One or more package installations did not behave as expected!" + exit $N_FAILURES +fi From 15668ecbf56c6d1ac0a0becc59221af2984039ec Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 20 Sep 2023 19:23:14 +0100 Subject: [PATCH 11/12] :arrow_up: Add dependencies needed for smoke tests to allowlists --- .../resources/software_repositories/allowlists/cran.allowlist | 1 + .../resources/software_repositories/allowlists/pypi.allowlist | 1 + 2 files changed, 2 insertions(+) diff --git a/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist b/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist index d65ef196ea..9624ec7060 100644 --- a/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist +++ b/data_safe_haven/resources/software_repositories/allowlists/cran.allowlist @@ -1,4 +1,5 @@ DBI +MASS RPostgres Rcpp bit diff --git a/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist b/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist index 3ab3c07dfe..704937893f 100644 --- a/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist +++ b/data_safe_haven/resources/software_repositories/allowlists/pypi.allowlist @@ -15,6 +15,7 @@ pyodbc pyparsing python-dateutil pytz +scikit-learn six typing-extensions tzdata From b039cd7eaabdd76da11467e04aeb0cae07f06b9c Mon Sep 17 00:00:00 2001 From: James Robinson Date: Wed, 20 Sep 2023 19:32:59 +0100 Subject: [PATCH 12/12] :rotating_light: Fix linting errors in smoke tests --- .../components/dynamic/file_upload.py | 4 +++- .../infrastructure/stacks/sre/workspaces.py | 10 +++++--- .../workspace/test_databases_python.py | 23 ++++++++++++++----- .../workspace/test_functionality_python.py | 10 ++++---- pyproject.toml | 4 ++++ 5 files changed, 36 insertions(+), 15 deletions(-) diff --git a/data_safe_haven/infrastructure/components/dynamic/file_upload.py b/data_safe_haven/infrastructure/components/dynamic/file_upload.py index 731a662899..4f1f259c47 100644 --- a/data_safe_haven/infrastructure/components/dynamic/file_upload.py +++ b/data_safe_haven/infrastructure/components/dynamic/file_upload.py @@ -29,7 +29,9 @@ def __init__( self.file_hash = file_hash self.file_target = file_target self.file_permissions = file_permissions - self.force_refresh = Output.from_input(force_refresh).apply(lambda force: force if force else False) + self.force_refresh = Output.from_input(force_refresh).apply( + lambda force: force if force else False + ) self.subscription_name = subscription_name self.vm_name = vm_name self.vm_resource_group_name = vm_resource_group_name diff --git a/data_safe_haven/infrastructure/stacks/sre/workspaces.py b/data_safe_haven/infrastructure/stacks/sre/workspaces.py index fa854168f1..fdf3d46c4d 100644 --- a/data_safe_haven/infrastructure/stacks/sre/workspaces.py +++ b/data_safe_haven/infrastructure/stacks/sre/workspaces.py @@ -177,10 +177,12 @@ def __init__( ] # Upload smoke tests - mustache_values={ + mustache_values = { "check_uninstallable_packages": "0", } - file_uploads = [(FileReader(resources_path / "workspace" / "run_all_tests.bats"), "0444")] + file_uploads = [ + (FileReader(resources_path / "workspace" / "run_all_tests.bats"), "0444") + ] for test_file in pathlib.Path(resources_path / "workspace").glob("test*"): file_uploads.append((FileReader(test_file), "0444")) for vm, vm_output in zip(vms, vm_outputs, strict=True): @@ -189,7 +191,9 @@ def __init__( file_smoke_test = FileUpload( replace_separators(f"{self._name}_file_{file_upload.name}", "_"), FileUploadProps( - file_contents=file_upload.file_contents(mustache_values=mustache_values), + file_contents=file_upload.file_contents( + mustache_values=mustache_values + ), file_hash=file_upload.sha256(), file_permissions=file_permissions, file_target=f"/opt/tests/{file_upload.name}", diff --git a/data_safe_haven/resources/workspace/test_databases_python.py b/data_safe_haven/resources/workspace/test_databases_python.py index 37a37acb91..ab0f01a3fe 100644 --- a/data_safe_haven/resources/workspace/test_databases_python.py +++ b/data_safe_haven/resources/workspace/test_databases_python.py @@ -6,8 +6,17 @@ import pymssql -def test_database(server_name, hostname, port, db_type, db_name, username, password): - print(f"Attempting to connect to '{db_name}' on '{server_name}' via port {port}") +def test_database( + server_name: str, + hostname: str, + port: int, + db_type: str, + db_name: str, + username: str, + password: str, +) -> None: + msg = f"Attempting to connect to '{db_name}' on '{server_name}' via port {port}" + print(msg) # noqa: T201 username_full = f"{username}@{hostname}" cnxn = None if db_type == "mssql": @@ -18,13 +27,15 @@ def test_database(server_name, hostname, port, db_type, db_name, username, passw connection_string = f"host={server_name} port={port} dbname={db_name} user={username_full} password={password}" cnxn = psycopg.connect(connection_string) else: - raise ValueError(f"Database type '{db_type}' was not recognised") + msg = f"Database type '{db_type}' was not recognised" + raise ValueError(msg) df = pd.read_sql("SELECT * FROM information_schema.tables;", cnxn) if df.size: - print(df.head(5)) - print("All database tests passed") + print(df.head(5)) # noqa: T201 + print("All database tests passed") # noqa: T201 else: - raise ValueError(f"Reading from database '{db_name}' failed.") + msg = f"Reading from database '{db_name}' failed." + raise ValueError(msg) # Parse command line arguments diff --git a/data_safe_haven/resources/workspace/test_functionality_python.py b/data_safe_haven/resources/workspace/test_functionality_python.py index 9ca9662d98..855e5e5f15 100644 --- a/data_safe_haven/resources/workspace/test_functionality_python.py +++ b/data_safe_haven/resources/workspace/test_functionality_python.py @@ -4,7 +4,7 @@ from sklearn.linear_model import LogisticRegression -def gen_data(n_samples, n_points): +def gen_data(n_samples: int, n_points: int) -> pd.DataFrame: """Generate data for fitting""" target = np.random.binomial(n=1, p=0.5, size=(n_samples, 1)) theta = np.random.normal(loc=0.0, scale=1.0, size=(1, n_points)) @@ -12,13 +12,13 @@ def gen_data(n_samples, n_points): values = np.random.multivariate_normal( means, np.diag([1] * n_points), size=n_samples ).T - data = dict(("x{}".format(n), values[n]) for n in range(n_points)) + data = {f"x{n}": values[n] for n in range(n_points)} data["y"] = target.reshape((n_samples,)) data["weights"] = np.random.gamma(shape=1, scale=1.0, size=n_samples) return pd.DataFrame(data=data) -def main(): +def main() -> None: """Logistic regression""" data = gen_data(100, 3) input_data = data.iloc[:, :-2] @@ -29,8 +29,8 @@ def main(): logit.fit(input_data, output_data, sample_weight=weights) logit.score(input_data, output_data, sample_weight=weights) - print("Logistic model ran OK") - print("All functionality tests passed") + print("Logistic model ran OK") # noqa: T201 + print("All functionality tests passed") # noqa: T201 if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index e033e8459b..45d64e4052 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,11 +152,15 @@ module = [ "cryptography.*", "dns.*", "msal.*", + "numpy.*", + "pandas.*", "psycopg.*", "pulumi.*", "pulumi_azure_native.*", + "pymssql.*", "rich.*", "simple_acme_dns.*", + "sklearn.*", "typer.*", "websocket.*", ]