Skip to content

Commit

Permalink
MIDRC-536 MIDRC-653 Add expiration bucket lifecycle rule (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
paulineribeyre authored Jul 18, 2024
1 parent 9509951 commit 6ec23fa
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 19 deletions.
2 changes: 2 additions & 0 deletions doc/howto/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ An example manifest entry may look like
"lifecycle-pre-stop": ["su", "-c", "cd /data; for f in *; do fusermount -u $f; rm -rf $f; done", "-s", "/bin/sh", "jovyan"]
},
"nextflow-global": {
"s3-objects-expiration-days": 30,
"sample-config-public-image": "",
"imagebuilder-reader-role-arn": ""
},
Expand Down Expand Up @@ -126,6 +127,7 @@ An example manifest entry may look like
* `command` a string array as the command to run in the container overriding the default.
* `lifecycle-pre-stop` a string array as the container prestop command.
* `nextflow-global` is for global configuration specific to Nextflow containers.
* `s3-objects-expiration-days` (int, default 30): objects created in S3 by Nextflow are deleted after the specified number of days.
* `sample-config-public-image`: a publicly-accessible image that any user can pull to test Nextflow workflows. Will be mentioned in the auto-generated sample configuration and documentation when a user launches a Nextflow workspace.
* `imagebuilder-reader-role-arn`: see the [nextflow-global.imagebuilder-reader-role-arn section](/doc/explanation/nextflow.md#nextflow-globalimagebuilder-reader-role-arn) of the Nextflow workspaces documentation.
* `containers` is the list of workspaces available to be run by this instance of Hatchery. Each container must be a single image and expose a web server.
Expand Down
13 changes: 3 additions & 10 deletions hatchery/authz.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,14 @@ var isUserAuthorizedForPayModels = func(userName string, allowedPayModels []stri
var isUserAuthorizedForResourcePaths = func(userName string, accessToken string, resourcePaths []string) (bool, error) {
Config.Logger.Printf("DEBUG: Checking user '%s' access to resource paths %v (service 'jupyterhub', method 'launch')", userName, resourcePaths)

body := "{ \"requests\": ["
body := fmt.Sprintf("{\"user\": {\"token\": \"%s\"}, \"requests\": [", accessToken)
for _, resource := range resourcePaths {
body += fmt.Sprintf("{\"resource\": \"%s\", \"action\": {\"service\": \"jupyterhub\", \"method\": \"launch\"}},", resource)
}
body = body[:len(body)-1] // remove the last trailing comma
body += "]}"

authorized, err := arboristAuthRequest(accessToken, body)
authorized, err := arboristAuthRequest(body)
if err != nil {
Config.Logger.Printf("something went wrong when making a call to arborist's `/auth/request` endpoint. Denying access. Details: %v", err.Error())
return false, nil
Expand All @@ -249,20 +249,13 @@ var isUserAuthorizedForResourcePaths = func(userName string, accessToken string,
return authorized, nil
}

var arboristAuthRequest = func(accessToken string, body string) (bool, error) {
var arboristAuthRequest = func(body string) (bool, error) {
arboristUrl := "http://arborist-service/auth/request"
req, err := http.NewRequest("POST", arboristUrl, bytes.NewBufferString(body))
if err != nil {
return false, errors.New("Error occurred while generating HTTP request: " + err.Error())
}

headers := map[string]string{
"Authorization": fmt.Sprintf("Bearer %s", accessToken),
}
for k, v := range headers {
req.Header.Add(k, v)
}

client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions hatchery/authz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func TestIsUserAuthorizedForResourcePaths(t *testing.T) {
}

resourcePaths := []string{"/workspace/abc", "/workspace/xyz"}
expectedRequestBody := "{ \"requests\": [{\"resource\": \"/workspace/abc\", \"action\": {\"service\": \"jupyterhub\", \"method\": \"launch\"}},{\"resource\": \"/workspace/xyz\", \"action\": {\"service\": \"jupyterhub\", \"method\": \"launch\"}}]}"
expectedRequestBody := "{\"user\": {\"token\": \"accessToken\"}, \"requests\": [{\"resource\": \"/workspace/abc\", \"action\": {\"service\": \"jupyterhub\", \"method\": \"launch\"}},{\"resource\": \"/workspace/xyz\", \"action\": {\"service\": \"jupyterhub\", \"method\": \"launch\"}}]}"

originalArboristAuthRequest := arboristAuthRequest
defer func() {
Expand All @@ -267,7 +267,7 @@ func TestIsUserAuthorizedForResourcePaths(t *testing.T) {
t.Logf("Running test case: '%s'", testCase.name)

// mock the call to arborist
arboristAuthRequest = func(accessToken string, body string) (bool, error) {
arboristAuthRequest = func(body string) (bool, error) {
if testCase.arboristError {
return false, fmt.Errorf("mocking an error while making call to arborist")
}
Expand Down
1 change: 1 addition & 0 deletions hatchery/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

// Global configuration shared by all Nextflow containers
type NextflowGlobalConfig struct {
S3ObjectsExpirationDays int `json:"s3-objects-expiration-days"`
SampleConfigPublicImage string `json:"sample-config-public-image"`
ImageBuilderReaderRoleArn string `json:"imagebuilder-reader-role-arn"`
}
Expand Down
3 changes: 3 additions & 0 deletions hatchery/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ func getAwsAccountId(sess *session.Session, awsConfig *aws.Config) (string, erro
if err != nil {
return "", err
}
if *req.Account == "" {
return "", fmt.Errorf("unable to find AWS account ID: STS GetCallerIdentity returned: %v", *req)
}
return *req.Account, nil
}

Expand Down
41 changes: 34 additions & 7 deletions hatchery/nextflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
General TODOS:
- Make the AWS region configurable in the hatchery config (although ideally, the user should be able to choose) (MIDRC-743)
- Make the `roleArn` configurable (MIDRC-744)
- The contents of `s3://<nextflow bucket>/<username>` are not deleted because researchers may need to keep the intermediary files. We should set bucket lifecycle rules to delete after X days. (MIDRC-653 and MIDRC-536)
- Can we do this long setup as a separate workspace launch step, instead of in the launch() function? (MIDRC-745)
*/

Expand Down Expand Up @@ -118,14 +117,14 @@ func createNextflowResources(userName string, nextflowGlobalConfig NextflowGloba
}

// Create nextflow compute environment if it does not exist
batchComputeEnvArn, err := createBatchComputeEnvironment(nextflowGlobalConfig, nextflowConfig, userName, hostname, tagsMap, batchSvc, ec2Svc, iamSvc, *vpcid, *subnetids, payModel, awsAccountId)
batchComputeEnvArn, err := createBatchComputeEnvironment(nextflowGlobalConfig, nextflowConfig, userName, hostname, tagsMap, batchSvc, ec2Svc, iamSvc, *vpcid, *subnetids)
if err != nil {
Config.Logger.Printf("Error creating compute environment for user %s: %s", userName, err.Error())
return "", "", err
}

// Create S3 bucket
kmsKeyArn, err := createS3bucket(s3Svc, kmsSvc, bucketName, kmsTags)
kmsKeyArn, err := createS3bucket(nextflowGlobalConfig, s3Svc, kmsSvc, bucketName, kmsTags)
if err != nil {
Config.Logger.Printf("Error creating S3 bucket '%s': %v", bucketName, err)
return "", "", err
Expand Down Expand Up @@ -445,10 +444,11 @@ var getNextflowAwsSettings = func(sess *session.Session, payModel *PayModel, use
Config.Logger.Printf("Info: pay model disabled for user '%s': %s Nextflow resources in main AWS account", userName, action)
awsConfig = aws.Config{}
Config.Logger.Printf("Debug: Getting AWS account ID...")
awsAccountId, err := getAwsAccountId(sess, &awsConfig)
var err error
awsAccountId, err = getAwsAccountId(sess, &awsConfig)
if err != nil {
Config.Logger.Printf("Error getting AWS account ID: %v", err)
return awsAccountId, awsConfig, err
return "", awsConfig, err
}
}
return awsAccountId, awsConfig, nil
Expand Down Expand Up @@ -605,7 +605,7 @@ func ensureLaunchTemplate(ec2Svc *ec2.EC2, userName string, hostname string, job
}

// Create AWS Batch compute environment
func createBatchComputeEnvironment(nextflowGlobalConfig NextflowGlobalConfig, nextflowConfig NextflowConfig, userName string, hostname string, tagsMap map[string]*string, batchSvc *batch.Batch, ec2Svc *ec2.EC2, iamSvc *iam.IAM, vpcid string, subnetids []string, payModel *PayModel, awsAccountId string) (string, error) {
func createBatchComputeEnvironment(nextflowGlobalConfig NextflowGlobalConfig, nextflowConfig NextflowConfig, userName string, hostname string, tagsMap map[string]*string, batchSvc *batch.Batch, ec2Svc *ec2.EC2, iamSvc *iam.IAM, vpcid string, subnetids []string) (string, error) {
instanceProfileArn, err := createEcsInstanceProfile(iamSvc, fmt.Sprintf("%s-nf-ecsInstanceRole", hostname))
if err != nil {
Config.Logger.Printf("Unable to create ECS instance profile: %s", err.Error())
Expand Down Expand Up @@ -692,6 +692,7 @@ func createBatchComputeEnvironment(nextflowGlobalConfig NextflowGlobalConfig, ne
return "", err
}
} else { // compute environment does not exist, create it
Config.Logger.Printf("Debug: Batch compute environment '%s' does not exist, creating it", batchComputeEnvName)
subnets := []*string{}
for _, subnet := range subnetids {
s := subnet
Expand Down Expand Up @@ -861,7 +862,7 @@ func createEcsInstanceProfile(iamSvc *iam.IAM, name string) (*string, error) {
return instanceProfile.InstanceProfile.Arn, nil
}

func createS3bucket(s3Svc *s3.S3, kmsSvc *kms.KMS, bucketName string, kmsTags []*kms.Tag) (string, error) {
func createS3bucket(nextflowGlobalConfig NextflowGlobalConfig, s3Svc *s3.S3, kmsSvc *kms.KMS, bucketName string, kmsTags []*kms.Tag) (string, error) {
// create S3 bucket for nextflow input, output and intermediate files
_, err := s3Svc.CreateBucket(&s3.CreateBucketInput{
Bucket: &bucketName,
Expand Down Expand Up @@ -964,6 +965,32 @@ func createS3bucket(s3Svc *s3.S3, kmsSvc *kms.KMS, bucketName string, kmsTags []
return "", err
}

expirationDays := nextflowGlobalConfig.S3ObjectsExpirationDays
if expirationDays <= 0 {
expirationDays = 30
}
Config.Logger.Printf("DEBUG: Setting bucket objects expiration to %d days", expirationDays)
_, err = s3Svc.PutBucketLifecycleConfiguration(&s3.PutBucketLifecycleConfigurationInput{
Bucket: &bucketName,
LifecycleConfiguration: &s3.BucketLifecycleConfiguration{
Rules: []*s3.LifecycleRule{
{
Expiration: &s3.LifecycleExpiration{
Days: aws.Int64(int64(expirationDays)),
},
Status: aws.String("Enabled"),
Filter: &s3.LifecycleRuleFilter{
Prefix: aws.String(""), // apply to all objects
},
},
},
},
})
if err != nil {
Config.Logger.Printf("Unable to set lifecycle configuration: %v", err)
return "", err
}

Config.Logger.Printf("DEBUG: Done setting up S3 bucket!")
return *kmsKeyArn, nil
}
Expand Down

0 comments on commit 6ec23fa

Please sign in to comment.