Skip to content

Commit

Permalink
Add delete environment Jenkins job
Browse files Browse the repository at this point in the history
  • Loading branch information
lewijacn committed Oct 8, 2024
1 parent 5709add commit 7fdefb1
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 0 deletions.
9 changes: 9 additions & 0 deletions jenkins/migrationIntegPipelines/cleanupDeploymentCover.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def gitBranch = params.GIT_BRANCH ?: 'main'
def gitUrl = params.GIT_REPO_URL ?: 'https://github.com/opensearch-project/opensearch-migrations.git'

library identifier: "migrations-lib@${gitBranch}", retriever: modernSCM(
[$class: 'GitSCMSource',
remote: "${gitUrl}"])

// Shared library function (location from root: vars/cleanupDeployment.groovy)
cleanupDeployment()
11 changes: 11 additions & 0 deletions test/cleanupDeployment/Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
boto3 = "*"
argparse = "*"

[requires]
python_version = "3.11"
78 changes: 78 additions & 0 deletions test/cleanupDeployment/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions test/cleanupDeployment/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Cleanup Deployment

Utility tool for removing deployed resources

### Running Tool

If pipenv is not installed, install with below
```shell
python3 -m pip install --upgrade pipenv
```

Install dependencies
```shell
pipenv install --deploy
```

Run clean deployment
```shell
pipenv run python3 cleanup_deployment.py --stage rfs-integ1
```
144 changes: 144 additions & 0 deletions test/cleanupDeployment/cleanup_deployment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import argparse
import boto3
import logging
import re
import time
from typing import List

from botocore.exceptions import ClientError

logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

INDEPENDENT_STACKS = ['MigrationConsole', 'ReindexFromSnapshot', 'TrafficReplayer', 'TargetClusterProxy',
'CaptureProxy', 'KafkaBroker', 'OpenSearchContainer', 'CaptureProxyES', 'Elasticsearch']
CORE_STACKS_ORDERED = ['MigrationInfra', 'OpenSearchDomain', 'NetworkInfra', 'infra-stack', 'network-stack']
CFN_INITIAL_STATUS_SKIP = ['DELETE_IN_PROGRESS', 'DELETE_COMPLETE']
MAX_DELETE_STACK_RETRIES = 3
MAX_WAIT_MINUTES = 45
WAIT_INTERVAL_SECONDS = 15


class StackDeletionRequest:
def __init__(self, stack_name, client_request_token=None):
self.stack_name = stack_name
self.client_request_token = client_request_token
self.retry_count = 0


def delete_stack(cfn_client, stack_name: str) -> StackDeletionRequest:
describe_stack_response = cfn_client.describe_stacks(StackName=stack_name)
stack_status = describe_stack_response['Stacks'][0]['StackStatus']
if 'IN_PROGRESS' in stack_status:
logger.warning(f"Unexpected status: {stack_status} for {stack_name} when preparing to delete stack")
logger.info(f"Deleting stack: {stack_name}")
cfn_client.delete_stack(StackName=stack_name)
return StackDeletionRequest(stack_name=stack_name)


def retry_delete_stack(cfn_client, deletion_request: StackDeletionRequest):
if deletion_request.retry_count >= MAX_DELETE_STACK_RETRIES:
raise RuntimeError(f"Max attempts of {MAX_DELETE_STACK_RETRIES} have failed to delete stack: "
f"{deletion_request.stack_name}. Please see CFN stack logs for more details")
logger.info(f"Retry attempt {deletion_request.retry_count + 1} of {MAX_DELETE_STACK_RETRIES} for "
f"stack: {deletion_request.stack_name}")
delete_stack(cfn_client=cfn_client, stack_name=deletion_request.stack_name)
deletion_request.retry_count += 1
return deletion_request


def wait_for_stack_deletion(cfn_client, stack_delete_requests: List[StackDeletionRequest]):
wait_time_seconds = 0
remaining_requests = stack_delete_requests[:]

while remaining_requests and wait_time_seconds < (MAX_WAIT_MINUTES * 60):
# Temporary list for stacks that are still being deleted
in_progress_requests = []

for delete_request in remaining_requests:
stack_status = ""
try:
describe_stack_response = cfn_client.describe_stacks(StackName=delete_request.stack_name)
stack_status = describe_stack_response['Stacks'][0].get('StackStatus')
except ClientError as client_error:
if 'does not exist' in client_error.response['Error']['Message']:
continue

if stack_status == 'DELETE_COMPLETE':
logger.info(f"Stack {delete_request.stack_name} deletion completed.")
elif stack_status == 'DELETE_FAILED':
logger.error(f"Stack {delete_request.stack_name} deletion failed, retrying...")
retry_delete_stack(cfn_client=cfn_client, deletion_request=delete_request)
in_progress_requests.append(delete_request) # Keep for further checks after retry
elif stack_status == 'DELETE_IN_PROGRESS':
logger.info(f"Stack {delete_request.stack_name} is currently DELETE_IN_PROGRESS.")
in_progress_requests.append(delete_request) # Still in progress
else:
logger.warning(f"Unexpected status: {stack_status} for stack: {delete_request.stack_name}")
in_progress_requests.append(delete_request) # Unexpected status but still in progress

remaining_requests = in_progress_requests
if remaining_requests:
logger.info(f"Waiting for the following stacks: {[r.stack_name for r in remaining_requests]}")

time.sleep(WAIT_INTERVAL_SECONDS)
wait_time_seconds += WAIT_INTERVAL_SECONDS

if remaining_requests:
logger.error(f"Timeout reached. The following stacks were still in progress: {[r.stack_name for r in remaining_requests]}")
else:
logger.info(f"The following stacks have been deleted successfully: {[s.stack_name for s in stack_delete_requests]}")


def delete_stacks(cfn_client, stack_names):
# Delete independent stacks in batch
independent_stack_delete_requests = [
delete_stack(cfn_client, stack_name)
for stack_name in stack_names
if any(stack_id in stack_name for stack_id in INDEPENDENT_STACKS)
]
if independent_stack_delete_requests:
wait_for_stack_deletion(cfn_client=cfn_client, stack_delete_requests=independent_stack_delete_requests)

# Delete core stacks in order, and batch for a particular stack type
for core_id in CORE_STACKS_ORDERED:
core_delete_requests = []
matching_stacks = [s for s in stack_names if core_id in s]
for stack in matching_stacks:
core_delete_requests.append(delete_stack(cfn_client, stack))
if core_delete_requests:
wait_for_stack_deletion(cfn_client=cfn_client, stack_delete_requests=core_delete_requests)


def delete_stacks_for_environment(stage_name: str):
client = boto3.client('cloudformation')
list_stacks_response = client.list_stacks()
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudformation/client/list_stacks.html
stack_names = [stack['StackName'] for stack in list_stacks_response['StackSummaries'] if stack['StackStatus'] not in CFN_INITIAL_STATUS_SKIP]
next_token = list_stacks_response.get("NextToken", None)
# If list stacks response is paginated, continue till all stacks are retrieved
while next_token is not None:
next_list_stacks_response = client.list_stacks(NextToken=next_token)
next_stack_names = [stack['StackName'] for stack in next_list_stacks_response['StackSummaries'] if stack['StackStatus'] not in CFN_INITIAL_STATUS_SKIP]
stack_names.extend(next_stack_names)
list_stacks_response.get("NextToken", None)

stage_stack_names = []
for name in stack_names:
if re.match(rf".*-{stage_name}-.*|.*-{stage_name}$", name):
stage_stack_names.append(name)
logging.info(f"Collected the following stacks to delete: {stage_stack_names}")
delete_stacks(cfn_client=client, stack_names=stage_stack_names)


def main():
parser = argparse.ArgumentParser(description="Cleanup an opensearch-migrations deployment environment.")
parser.add_argument("--stage", type=str, help="The deployment stage environment to delete")
args = parser.parse_args()

start_time = time.time()
delete_stacks_for_environment(args.stage)
print(f"Total running time: {time.time() - start_time} seconds")

if __name__ == "__main__":
main()
48 changes: 48 additions & 0 deletions vars/cleanupDeployment.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
def call(Map config = [:]) {

pipeline {
agent { label config.workerAgent ?: 'Jenkins-Default-Agent-X64-C5xlarge-Single-Host' }

parameters {
string(name: 'GIT_REPO_URL', defaultValue: 'https://github.com/opensearch-project/opensearch-migrations.git', description: 'Git repository url')
string(name: 'GIT_BRANCH', defaultValue: 'main', description: 'Git branch to use for repository')
string(name: 'STAGE_GROUP', defaultValue: 'rfs-integ', description: 'Deployment stage group name (e.g. rfs-integ)')
string(name: 'STAGE', description: 'Deployment stage name in group to delete (e.g. rfs-integ1)')
}

options {
// Acquire lock on a given deployment stage
lock(label: params.STAGE_GROUP, resource: params.STAGE, quantity: 1, variable: 'stage')
timeout(time: 1, unit: 'HOURS')
buildDiscarder(logRotator(daysToKeepStr: '30'))
}

stages {
stage('Checkout') {
steps {
script {
git branch: "${params.GIT_BRANCH}", url: "${params.GIT_REPO_URL}"
}
}
}

stage('Cleanup Deployment') {
steps {
timeout(time: 1, unit: 'HOURS') {
dir('test/cleanupDeployment') {
script {
sh "sudo --preserve-env pipenv install --deploy"
def command = "pipenv run python3 cleanup_deployment.py --stage ${stage}"
withCredentials([string(credentialsId: 'migrations-test-account-id', variable: 'MIGRATIONS_TEST_ACCOUNT_ID')]) {
withAWS(role: 'JenkinsDeploymentRole', roleAccount: "${MIGRATIONS_TEST_ACCOUNT_ID}", duration: 3600, roleSessionName: 'jenkins-session') {
sh "sudo --preserve-env ${command}"
}
}
}
}
}
}
}
}
}
}

0 comments on commit 7fdefb1

Please sign in to comment.