Skip to content

Commit

Permalink
Implement etcd lifecycle management for vineyardd (#1932)
Browse files Browse the repository at this point in the history
Fixes #1922

Signed-off-by: Ye Cao <[email protected]>
  • Loading branch information
dashanji authored Jul 18, 2024
1 parent 0f8e617 commit 781e4d1
Show file tree
Hide file tree
Showing 25 changed files with 1,385 additions and 116 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/vineyard-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ jobs:
- e2e-tests-mars-examples
- e2e-tests-vineyardctl
- e2e-tests-deploy-raw-backup-and-recover
- e2e-tests-three-etcd-nodes-failover
- e2e-tests-five-etcd-nodes-failover
steps:
- uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -185,6 +187,8 @@ jobs:
e2e-tests-mars-examples \
e2e-tests-vineyardctl \
e2e-tests-deploy-raw-backup-and-recover \
e2e-tests-three-etcd-nodes-failover \
e2e-tests-five-etcd-nodes-failover \
unit-tests; do
mkdir -p $job-logs
chmod a+rwx $job-logs
Expand Down Expand Up @@ -280,6 +284,16 @@ jobs:
run: |
make -C k8s/test/e2e e2e-tests-deploy-raw-backup-and-recover
- name: e2e-tests-three-etcd-nodes-failover
if: ${{ matrix.job == 'e2e-tests-three-etcd-nodes-failover' }}
run: |
make -C k8s/test/e2e e2e-tests-three-etcd-nodes-failover
- name: e2e-tests-five-etcd-nodes-failover
if: ${{ matrix.job == 'e2e-tests-five-etcd-nodes-failover' }}
run: |
make -C k8s/test/e2e e2e-tests-five-etcd-nodes-failover
- name: Stop to export kubernetes logs
uses: dashanji/kubernetes-log-export-action@v6
if: ${{ failure() }}
Expand Down Expand Up @@ -346,6 +360,12 @@ jobs:
e2e-tests-airflow-integration)
signed_url_key=2e12
;;
e2e-tests-three-etcd-nodes-failover)
signed_url_key=5c91
;;
e2e-tests-five-etcd-nodes-failover)
signed_url_key=7793
;;
esac
echo "::notice:: ${{ matrix.job }}: https://www.stoat.dev/file-viewer?root=https://v6d-io--v6d--$short_sha--e2e-tests-$signed_url_key.stoat.page"
10 changes: 10 additions & 0 deletions .stoat/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,14 @@ plugins:
metadata:
name: "schedule-workload-logs"
path: e2e-tests-schedule-workload-logs
file_viewer: true
e2e-tests-three-etcd-nodes-failover-logs:
metadata:
name: "three-etcd-nodes-failover-logs"
path: e2e-tests-three-etcd-nodes-failover-logs
file_viewer: true
e2e-tests-five-etcd-nodes-failover-logs:
metadata:
name: "five-etcd-nodes-failover-logs"
path: e2e-tests-five-etcd-nodes-failover-logs
file_viewer: true
2 changes: 2 additions & 0 deletions docker/Dockerfile.vineyardd
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ RUN export arch="$PLATFORM" && \
curl -LO https://github.com/etcd-io/etcd/releases/download/v3.5.9/etcd-v3.5.9-linux-$arch.tar.gz && \
tar zxf etcd-v3.5.9-linux-$arch.tar.gz && \
mv /tmp/etcd-v3.5.9-linux-$arch/etcd /usr/bin/etcd && \
mv /tmp/etcd-v3.5.9-linux-$arch/etcdctl /usr/bin/etcdctl && \
curl -LO https://dl.k8s.io/release/v1.24.0/bin/linux/$arch/kubectl && \
chmod +x kubectl && \
mv /tmp/kubectl /usr/bin/kubectl
Expand Down Expand Up @@ -81,6 +82,7 @@ SHELL ["/bin/bash", "-c"]
COPY --from=builder /usr/bin/bash-linux /bin/bash
COPY --from=builder /usr/bin/dumb-init /usr/bin/dumb-init
COPY --from=builder /usr/bin/etcd /usr/bin/etcd
COPY --from=builder /usr/bin/etcdctl /usr/bin/etcdctl
COPY --from=builder /usr/bin/kubectl /usr/bin/kubectl
COPY --from=builder /work/v6d/build/bin/vineyardd /usr/local/bin/vineyardd
RUN ln -s /busybox/env /usr/bin/env
Expand Down
15 changes: 15 additions & 0 deletions k8s/test/e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,21 @@ e2e-tests-failover: prepare-e2e-test install-vineyard-cluster
@make delete-local-cluster
.PHONY: e2e-tests-failover

############# etcd failover testing #############################################

e2e-tests-three-etcd-nodes-failover: prepare-e2e-test build-local-cluster load-vineyardd-image
@echo "Running three etcd nodes failover e2e test..."
@cd ${ROOT_DIR} && ${GOBIN}/e2e run --config=${E2E_DIR}/etcd-failover/three-etcd-nodes-failover-e2e.yaml
@echo "three etcd nodes failover e2e test passed."
@make delete-local-cluster

e2e-tests-five-etcd-nodes-failover: prepare-e2e-test build-local-cluster load-vineyardd-image
@echo "Running five etcd nodes failover e2e test..."
@cd ${ROOT_DIR} && ${GOBIN}/e2e run --config=${E2E_DIR}/etcd-failover/five-etcd-nodes-failover-e2e.yaml
@echo "five etcd nodes failover e2e test passed."
@make delete-local-cluster


############# dask repartition testing #############################################

repartition-images: dask-repartition dask-repartition-job1 dask-repartition-job2 dask-worker-with-vineyard
Expand Down
28 changes: 28 additions & 0 deletions k8s/test/e2e/etcd-failover/consumer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: batch/v1
kind: Job
metadata:
name: consumer
spec:
parallelism: 1
template:
metadata:
labels:
app: consumer
spec:
restartPolicy: Never
containers:
- name: consumer
image: python:3.10
command:
- bash
- -c
- |
pip install vineyard numpy pandas --index-url https://pypi.tuna.tsinghua.edu.cn/simple;
cat << EOF >> consumer.py
import vineyard
client = vineyard.connect(host="vineyardd-svc.default.svc.cluster.local",port=9600)
obj_id = client.get_name("test_data")
print(obj_id)
client.close()
EOF
python consumer.py;
68 changes: 68 additions & 0 deletions k8s/test/e2e/etcd-failover/five-etcd-nodes-failover-e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright 2020-2023 Alibaba Group Holding Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

setup:
env: kind
kubeconfig: /tmp/e2e-k8s.config
steps:
- name: deploy vineyardd
command: |
kubectl apply -f k8s/test/e2e/etcd-failover/vineyardd.yaml
kubectl scale statefulsets vineyardd --replicas=7
kubectl rollout status statefulset/vineyardd
- name: deploy producer
command: |
kubectl apply -f k8s/test/e2e/etcd-failover/producer.yaml
kubectl wait --for=condition=complete job/producer --timeout=300s
- name: random delete two different pods with launched etcd for 5 times
command: |
for i in {1..5}; do
num1=$(shuf -i 0-4 -n 1)
num2=$(shuf -i 0-4 -n 1)
while [ "$num1" -eq "$num2" ]; do
num2=$(shuf -i 0-4 -n 1)
done
kubectl delete pod "vineyardd-$num1" -n default --force
kubectl delete pod "vineyardd-$num2" -n default --force
# wait for the instance quit messages to be propagated
sleep 240
kubectl rollout status statefulset/vineyardd
done
- name: install consumer
command: |
kubectl apply -f k8s/test/e2e/etcd-failover/consumer.yaml
kubectl wait --for=condition=complete job/consumer --timeout=300s
timeout: 60m

cleanup:
# always never success failure
on: success

verify:
# verify with retry strategy
retry:
# max retry count
count: 10
# the interval between two attempts, e.g. 10s, 1m.
interval: 10s
cases:
- query: |
kubectl get pod -l app=consumer -oname | \
awk -F '/' '{print $2}' | \
head -n 1 | \
xargs kubectl logs | \
tail -n 1 | \
awk '{print $1}'
expected: ../verify/object-id.yaml
45 changes: 45 additions & 0 deletions k8s/test/e2e/etcd-failover/producer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2020-2023 Alibaba Group Holding Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

apiVersion: batch/v1
kind: Job
metadata:
name: producer
spec:
parallelism: 1
template:
metadata:
labels:
app: producer
spec:
restartPolicy: Never
containers:
- name: producer
image: python:3.10
command:
- bash
- -c
- |
pip install vineyard numpy pandas --index-url https://pypi.tuna.tsinghua.edu.cn/simple;
cat << EOF >> producer.py
import vineyard
import numpy as np
import pandas as pd
client = vineyard.connect(host="vineyardd-svc.default.svc.cluster.local",port=9600)
data = np.ones((1000, 1000))
client.put(data, persist=True, name="test_data");
client.close()
EOF
python producer.py;
63 changes: 63 additions & 0 deletions k8s/test/e2e/etcd-failover/three-etcd-nodes-failover-e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Copyright 2020-2023 Alibaba Group Holding Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

setup:
env: kind
kubeconfig: /tmp/e2e-k8s.config
steps:
- name: deploy vineyardd
command: |
kubectl apply -f k8s/test/e2e/etcd-failover/vineyardd.yaml
kubectl scale statefulsets vineyardd --replicas=3
kubectl rollout status statefulset/vineyardd
- name: deploy producer
command: |
kubectl apply -f k8s/test/e2e/etcd-failover/producer.yaml
kubectl wait --for=condition=complete job/producer --timeout=300s
- name: random delete one pod for 5 times
command: |
for i in {1..5}; do
kubectl delete pod vineyardd-$(shuf -i 0-2 -n 1) -n default --force
kubectl rollout status statefulset/vineyardd
# wait for the instance quit messages to be propagated
sleep 60
kubectl rollout status statefulset/vineyardd
done
- name: install consumer
command: |
kubectl apply -f k8s/test/e2e/etcd-failover/consumer.yaml
kubectl wait --for=condition=complete job/consumer --timeout=300s
timeout: 60m

cleanup:
# always never success failure
on: success

verify:
# verify with retry strategy
retry:
# max retry count
count: 10
# the interval between two attempts, e.g. 10s, 1m.
interval: 10s
cases:
- query: |
kubectl get pod -l app=consumer -oname | \
awk -F '/' '{print $2}' | \
head -n 1 | \
xargs kubectl logs | \
tail -n 1 | \
awk '{print $1}'
expected: ../verify/object-id.yaml
Loading

0 comments on commit 781e4d1

Please sign in to comment.