-
Notifications
You must be signed in to change notification settings - Fork 1
262 lines (251 loc) · 11.4 KB
/
nightly_aws_operational_procedure_snapshot.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
---
name: Nightly AWS EKS Operational Procedure Test Snapshot
on:
schedule:
- cron: 0 2 * * 1 # 02:00 UTC on Monday
workflow_dispatch:
inputs:
helm-versions:
description: The Helm versions to use as comma separated list
type: string
pull_request:
branches-ignore:
- stable/**
# For now limit automatic execution to a minimum, can always be done manually via workflow_dispatch for a branch
paths:
- .github/workflows/nightly_aws_operational_procedure_snapshot.yml
- .github/workflows/reuseable_aws_operational_procedure.yml
- aws/dual-region/kubernetes/**
- aws/dual-region/terraform/**
- test/**
# limit to a single execution per ref (branch) of this workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
AWS_PROFILE: infex
TESTS_TF_BINARY_NAME: terraform
jobs:
cluster-creation:
runs-on: ubuntu-latest
timeout-minutes: 60
if: github.event_name == 'schedule' || contains(github.head_ref, 'SNAPSHOT')
outputs:
cluster_name: ${{ steps.random.outputs.CLUSTER_NAME }}
steps:
################## Checkout ##################
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
############# Tool Installation ##############
- name: Setup AWS and Tools
uses: ./.github/actions/setup-aws
with:
secrets: ${{ toJSON(secrets) }}
################ Env Helper ###################
- name: Generate random cluster_name
id: random
run: |
echo "CLUSTER_NAME=nightly-$(head /dev/urandom | tr -dc 'a-z0-9' | head -c 8)" | tee -a "$GITHUB_ENV" "$GITHUB_OUTPUT"
############# Terraform Apply ################
- name: Configure Terraform Backend
run: |
.github/workflows/scripts/tf_configure_remote_backend.sh ${{ github.workspace }}/aws/dual-region/terraform/config.tf
- name: Terratest Terraform Init And Apply
working-directory: ./test
timeout-minutes: 46
run: |
go test --count=1 -v -timeout 45m -run TestSetupTerraform
- name: Remove profile credentials from ~/.aws/credentials
if: always()
run: |
rm -rf ~/.aws/credentials
cluster-configuration:
runs-on: ubuntu-latest
timeout-minutes: 30
if: github.event_name == 'schedule' || contains(github.head_ref, 'SNAPSHOT')
needs:
- cluster-creation
steps:
################## Checkout ##################
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
############# Tool Installation ##############
- name: Setup AWS and Tools
uses: ./.github/actions/setup-aws
with:
secrets: ${{ toJSON(secrets) }}
########### KubeConfig Generation ############
- name: Export Cluster Name
run: |
echo "CLUSTER_NAME=${{ needs.cluster-creation.outputs.cluster_name }}" >> "$GITHUB_ENV"
- name: KubeConfig generation
working-directory: ./test
timeout-minutes: 5
run: |
go test --count=1 -v -timeout 4m -run TestAWSKubeConfigCreation
########### Parse GHA for versions ###########
- name: Parse GHA for namespace setup
run: .github/workflows/scripts/c8_namespace_parser.sh ${{ github.workspace }}/.github/workflows/nightly_aws_operational_procedure_snapshot.yml
############ Export S3 credentials ############
- name: Configure Terraform Backend
run: |
.github/workflows/scripts/tf_configure_remote_backend.sh ${{ github.workspace }}/aws/dual-region/terraform/config.tf
- name: Get S3 credentials
id: s3-credentials
working-directory: ./aws/dual-region/terraform
run: |
terraform init
# adding mask to treat the value as secret
echo "::add-mask::$(terraform output -raw s3_aws_access_key)"
echo "::add-mask::$(terraform output -raw s3_aws_secret_access_key)"
echo "S3_AWS_ACCESS_KEY=$(terraform output -raw s3_aws_access_key)" >> "$GITHUB_OUTPUT"
echo "S3_AWS_SECRET_KEY=$(terraform output -raw s3_aws_secret_access_key)" >> "$GITHUB_OUTPUT"
- name: Create all required namespaces and secrets
timeout-minutes: 10
working-directory: ./test
env:
S3_AWS_ACCESS_KEY: ${{ steps.s3-credentials.outputs.S3_AWS_ACCESS_KEY }}
S3_AWS_SECRET_KEY: ${{ steps.s3-credentials.outputs.S3_AWS_SECRET_KEY }}
run: |
go test --count=1 -v -timeout 9m -run TestClusterPrerequisites
########### Namespace and DNS setup #########
- name: Do the DNS chaining for all required namespaces
working-directory: ./test
timeout-minutes: 15
env:
# Pick a known namespace for cross cluster testing
CLUSTER_0_NAMESPACE: snapshot-cluster-0
CLUSTER_0_NAMESPACE_FAILOVER: snapshot-cluster-0-failover
CLUSTER_1_NAMESPACE: snapshot-cluster-1
CLUSTER_1_NAMESPACE_FAILOVER: snapshot-cluster-1-failover
run: |
go test --count=1 -v -timeout 44m -run TestAWSDNSChaining
- name: KubeConfig Removal
working-directory: ./test
if: always()
timeout-minutes: 5
run: |
go test --count=1 -v -timeout 4m -run TestAWSKubeConfigRemoval
- name: Remove profile credentials from ~/.aws/credentials
if: always()
run: |
rm -rf ~/.aws/credentials
dynamic-matrix:
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || contains(github.head_ref, 'SNAPSHOT')
outputs:
matrix: ${{ steps.generate-matrix.outputs.matrix }}
steps:
- name: Generate matrix
id: generate-matrix
run: |
c87=SNAPSHOT
if [ "${{ inputs.helm-versions }}" == "" ]; then
versions='{"helm-version":["'${c87}'"]}'
else
output=$(echo "${{ inputs.helm-versions }}" | awk -F, '{
for(i=1;i<=NF;i++) {
if ($i ~ /^".*"$/) {
printf("%s%s", $i, (i==NF)?"":",")
} else {
printf("\"%s\"%s", $i, (i==NF)?"":",")
}
}
}')
versions='{"helm-version":['${output}']}'
fi
echo "${versions}"
echo "matrix=${versions}" >> "$GITHUB_OUTPUT"
operational-procedure:
if: github.event_name == 'schedule' || contains(github.head_ref, 'SNAPSHOT')
needs:
- cluster-creation
- cluster-configuration
- dynamic-matrix
strategy:
fail-fast: false
matrix:
helm-version: ${{fromJson(needs.dynamic-matrix.outputs.matrix).helm-version}}
# we don't include SNAPSHOT in PR due to the instability of its nature except if it's explicitly mentioned
isPR:
- ${{ github.event_name == 'pull_request' && !contains(github.head_ref, 'SNAPSHOT') }}
exclude:
- helm-version: SNAPSHOT
isPR: true
uses: ./.github/workflows/reuseable_aws_operational_procedure.yml
with:
helm-version: ${{ matrix.helm-version }}
cluster-name: ${{ needs.cluster-creation.outputs.cluster_name }}
secrets: inherit
tf-teardown:
runs-on: ubuntu-latest
timeout-minutes: 60
needs:
- operational-procedure
- cluster-creation
if: (github.event_name == 'schedule' || contains(github.head_ref, 'SNAPSHOT')) && always()
steps:
################## Checkout ##################
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
############# Tool Installation ##############
- name: Setup AWS and Tools
uses: ./.github/actions/setup-aws
with:
secrets: ${{ toJSON(secrets) }}
########### KubeConfig Generation ############
- name: Export Cluster Name
run: |
echo "CLUSTER_NAME=${{ needs.cluster-creation.outputs.cluster_name }}" >> "$GITHUB_ENV"
- name: KubeConfig generation
working-directory: ./test
timeout-minutes: 5
run: |
go test --count=1 -v -timeout 4m -run TestAWSKubeConfigCreation
########### Load Balancer Removal ############
- name: Delete LBs to unblock teardown
working-directory: ./test
timeout-minutes: 5
run: |
go test --count=1 -v -timeout 4m -run TestClusterCleanup
############# Terratest Teardown #############
- name: Configure Terraform Backend
run: |
.github/workflows/scripts/tf_configure_remote_backend.sh ${{ github.workspace }}/aws/dual-region/terraform/config.tf
- name: Terraform Destroy
id: terraform-destroy
working-directory: ./test
if: always()
timeout-minutes: 46
run: |
go test --count=1 -v -timeout 45m -run TestTeardownTerraform
- name: KubeConfig Removal
working-directory: ./test
if: always()
timeout-minutes: 5
run: |
go test --count=1 -v -timeout 4m -run TestAWSKubeConfigRemoval
- name: Cleanup S3 state bucket
if: always() && steps.terraform-destroy.outcome == 'success'
run: |
aws s3 rm "s3://tf-state-multi-reg/state/$CLUSTER_NAME/terraform.tfstate"
- name: Remove profile credentials from ~/.aws/credentials
if: always()
run: |
rm -rf ~/.aws/credentials
notify-on-failure:
runs-on: ubuntu-latest
if: (github.event_name == 'schedule' || contains(github.head_ref, 'SNAPSHOT')) && failure()
needs:
- cluster-creation
- cluster-configuration
- operational-procedure
- tf-teardown
steps:
- name: Notify in Slack in case of failure
id: slack-notification
uses: camunda/infraex-common-config/.github/actions/report-failure-on-slack@d809dbc4cd95c04cb3a00fc043aa31ef6b590515 # 1.2.7
with:
vault_addr: ${{ secrets.VAULT_ADDR }}
vault_role_id: ${{ secrets.VAULT_ROLE_ID }}
vault_secret_id: ${{ secrets.VAULT_SECRET_ID }}