e2e: add fuzz test sources for hbm/dram/pmem pods

Adds fuzz test generator script, model and runner for topology-aware policy reliability tests on HBM+DRAM+PMEM platform. Signed-off-by: Antti Kervinen <[email protected]>
containers · Oct 2, 2024 · bbdd080 · bbdd080
1 parent e938c21
commit bbdd080
Show file tree

Hide file tree

Showing 5 changed files with 287 additions and 0 deletions.
diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/code.var.sh b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/code.var.sh
@@ -0,0 +1,53 @@
+helm-terminate
+helm_config=$(instantiate helm-config.yaml) helm-launch topology-aware
+
+source $TEST_DIR/codelib.sh || {
+    echo "error importing codelib.sh"
+    exit 1
+}
+
+# Clean test pods from the kube-system namespace
+cleanup-test-pods() {
+    ( vm-command "kubectl delete pods -n kube-system \$(kubectl get pods -n kube-system | awk '/t[0-9]r[gb][ue]/{print \$1}')" ) || true
+    ( vm-command "kubectl delete pods -n default \$(kubectl get pods -n default | awk '/t[0-9][rgb][ue][0-9]/{print \$1}')" ) || true
+}
+cleanup-test-pods
+
+# Run generated*.sh test scripts in this directory.
+genscriptcount=0
+for genscript in "$TEST_DIR"/generated*.sh; do
+    if [ ! -f "$genscript" ]; then
+        continue
+    fi
+    (
+        paralleloutdir="$outdir/parallel$genscriptcount"
+        [ -d "$paralleloutdir" ] && rm -rf "$paralleloutdir"
+        mkdir "$paralleloutdir"
+        OUTPUT_DIR="$paralleloutdir"
+        COMMAND_OUTPUT_DIR="$paralleloutdir/commands"
+        mkdir "$COMMAND_OUTPUT_DIR"
+        source "$genscript" 2>&1 | sed -u -e "s/^/$(basename "$genscript"): /g"
+    ) &
+    genscriptcount=$(( genscriptcount + 1))
+done
+
+if [[ "$genscriptcount" == "0" ]]; then
+    echo "WARNING:"
+    echo "WARNING: Skipping fuzz tests:"
+    echo "WARNING: - Generated tests not found."
+    echo "WARNING: - Generate a test by running:"
+    echo "WARNING:   $TEST_DIR/generate.sh"
+    echo "WARNING: - See test generation options:"
+    echo "WARNING:   $TEST_DIR/generate.sh --help"
+    echo "WARNING:"
+    sleep 5
+    exit 0
+fi
+
+echo "waiting for $genscriptcount generated tests to finish..."
+wait
+
+cleanup-test-pods
+
+# Restore default test configuration, restart nri-resource-policy.
+helm-terminate
diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/codelib.sh b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/codelib.sh
@@ -0,0 +1,12 @@
+container-exit0() {
+    # Terminate a container by killing the "sleep inf" child process in
+    # echo CONTNAME $(sleep inf)
+    local contname="$1"
+    vm-command "contpid=\$(ps axf | grep -A1 'echo $contname' | grep -v grep | tail -n 1 | awk '{print \$1}'); ( set -x; kill -KILL \$contpid; )"
+}
+
+container-signal() {
+    local contname="$1"
+    local signal="$2"
+    vm-command "pkill -$signal -f 'echo $contname'"
+}
diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.aal
@@ -0,0 +1,153 @@
+language python {
+    max_mem=13500  # maximum memory on VM in MB ## NOTE: generate.sh will overwrite this
+    max_cpu=7000 # maximum CPUs on node in mCPU ## NOTE: generate.sh will overwrite this
+    max_reserved_cpu=1000 # maximum reserved CPUs on node in mCPU ## NOTE: generate.sh will overwrite this
+    class Vars:
+        # namespace for variables in input names
+        def __repr__(self):
+            return "{" + ",".join("%s:%s" % (a, getattr(self, a)) for a in sorted(self.__dict__.keys()) if not a.startswith("_")) + "}\n"
+    def inputvars(input_name):
+        # parse VAR=VALUE's from input_name
+        v = Vars()
+        for word in input_name.split():
+            keyvalue = word.split("=")
+            if len(keyvalue) == 2:
+                if (keyvalue[1].endswith("m") or keyvalue[1].endswith("M")) and len(keyvalue[1]) > 1 and keyvalue[1][-2] in '0123456789':
+                    keyvalue[1] = keyvalue[1][:-1]
+                try:
+                    setattr(v, keyvalue[0], int(keyvalue[1]))
+                except:
+                    setattr(v, keyvalue[0], keyvalue[1])
+        return v
+}
+
+variables {
+    mem, cpu, reserved_cpu, pods
+}
+
+initial_state {
+    mem=0
+    cpu=0
+    reserved_cpu=0
+    pods={}
+}
+
+# Create non-reserved CPU pods
+input
+    "NAME=gu0 CONTCOUNT=1 CPU=200m MEM=1500M create guaranteed",
+    "NAME=gu1 CONTCOUNT=2 CPU=1000m MEM=500M create guaranteed",
+    "NAME=gu1hbm CONTCOUNT=2 CPU=1000m MEM=500M MEMTYPE=hbm create guaranteed",
+    "NAME=gu2 CONTCOUNT=2 CPU=1200m MEM=4500M create guaranteed",
+    "NAME=gu2pmem CONTCOUNT=2 CPU=1200m MEM=4500M MEMTYPE=pmem create guaranteed",
+    "NAME=gu3 CONTCOUNT=3 CPU=2000m MEM=500M create guaranteed",
+    "NAME=gu3dram CONTCOUNT=3 CPU=2000m MEM=500M MEMTYPE=dram create guaranteed",
+    "NAME=gu4 CONTCOUNT=1 CPU=4200m MEM=100M create guaranteed",
+    "NAME=bu0 CONTCOUNT=1 CPU=1200m MEM=50M CPUREQ=900m MEMREQ=49M CPULIM=1200m MEMLIM=50M create burstable",
+    "NAME=bu0hbmpmem CONTCOUNT=1 CPU=1200m MEM=50M CPUREQ=900m MEMREQ=49M CPULIM=1200m MEMLIM=50M MEMTYPE=hbm,pmem create burstable",
+    "NAME=bu1 CONTCOUNT=2 CPU=1900m MEM=300M CPUREQ=1800m MEMREQ=299M CPULIM=1900m MEMLIM=300M create burstable",
+    "NAME=bu1hbmdram CONTCOUNT=2 CPU=1900m MEM=300M CPUREQ=1800m MEMREQ=299M CPULIM=1900m MEMLIM=300M MEMTYPE=hbm,dram create burstable",
+    "NAME=be0 CONTCOUNT=1 CPU=0 MEM=0 create besteffort",
+    "NAME=be1 CONTCOUNT=3 CPU=0 MEM=0 create besteffort"
+{
+    guard {
+        v = inputvars(input_name)
+        return (v.NAME not in pods
+                and (mem + v.MEM * v.CONTCOUNT < max_mem)
+                and (cpu + v.CPU * v.CONTCOUNT < max_cpu))
+    }
+    body {
+        v = inputvars(input_name)
+        v.namespace = getattr(v, "namespace", "default")
+        mem += v.MEM * v.CONTCOUNT
+        cpu += v.CPU * v.CONTCOUNT
+        pods[v.NAME] = v
+    }
+}
+
+# Create pods to the kube-system namespace
+input
+    "NAME=rgu0 CONTCOUNT=2 CPU=100m MEM=1000M namespace=kube-system create guaranteed",
+    "NAME=rgu0pmem CONTCOUNT=2 CPU=100m MEM=1000M namespace=kube-system MEMTYPE=pmem create guaranteed",
+    "NAME=rbu0 CONTCOUNT=1 CPU=100m MEM=100M CPUREQ=99m MEMREQ=99M CPULIM=100m MEMLIM=100M namespace=kube-system create burstable",
+    "NAME=rbe0 CONTCOUNT=2 CPU=0 MEM=0 namespace=kube-system create besteffort"
+{
+    guard {
+        v = inputvars(input_name)
+        return (v.NAME not in pods
+                and (mem + v.MEM * v.CONTCOUNT < max_mem)
+                and (reserved_cpu + v.CPU * v.CONTCOUNT < max_reserved_cpu))
+
+    }
+    body {
+        v = inputvars(input_name)
+        mem += v.MEM * v.CONTCOUNT
+        reserved_cpu += v.CPU * v.CONTCOUNT
+        pods[v.NAME] = v
+    }
+}
+
+# Kill a process in a container
+# - "echo gu0c1" matches and kills process only in container gu0c1 in pod gu0
+# - "echo gu0" matches and kills processes in all containers of pod gu0
+input
+    "NAME=gu0 container-exit0 gu0c0",
+    "NAME=gu1 container-exit0 gu1c0",
+    "NAME=gu1hbm container-exit0 gu1hbmc0",
+    "NAME=gu2 container-exit0 gu2c0",
+    "NAME=gu2pmem container-exit0 gu2pmemc0",
+    "NAME=gu3 container-exit0 gu3",
+    "NAME=gu3dram container-exit0 gu3dramc0",
+    "NAME=gu4 container-exit0 gu4c",
+    "NAME=bu0 container-exit0 bu0c0",
+    "NAME=bu0hbmpmem container-exit0 bu0hbmpmemc0",
+    "NAME=bu1 container-exit0 bu1c0",
+    "NAME=bu1hbmdram container-exit0 bu1hbmdramc0",
+    "NAME=be0 container-exit0 be0c0",
+    "NAME=be1 container-exit0 be0c0",
+    "NAME=rgu0 container-exit0 rgu0c0",
+    "NAME=rgu0pmem container-exit0 rgu0pmemc0",
+    "NAME=rbu0 container-exit0 rbu0c0",
+    "NAME=rbe0 container-exit0 rbe0c0"
+{
+    guard {
+        v = inputvars(input_name)
+        return v.NAME in pods
+    }
+}
+
+# Delete single pod
+input
+    "NAME=gu0 vm-command 'kubectl delete pod gu0 --now'",
+    "NAME=gu1 vm-command 'kubectl delete pod gu1 --now'",
+    "NAME=gu1hbm vm-command 'kubectl delete pod gu1hbm --now'",
+    "NAME=gu2 vm-command 'kubectl delete pod gu2 --now'",
+    "NAME=gu2pmem vm-command 'kubectl delete pod gu2pmem --now'",
+    "NAME=gu3 vm-command 'kubectl delete pod gu3 --now'",
+    "NAME=gu3dram vm-command 'kubectl delete pod gu3dram --now'",
+    "NAME=gu4 vm-command 'kubectl delete pod gu4 --now'",
+    "NAME=bu0 vm-command 'kubectl delete pod bu0 --now'",
+    "NAME=bu0hbmpmem vm-command 'kubectl delete pod bu0hbmpmem --now'",
+    "NAME=bu1 vm-command 'kubectl delete pod bu1 --now'",
+    "NAME=bu1hbmdram vm-command 'kubectl delete pod bu1hbmdram --now'",
+    "NAME=be0 vm-command 'kubectl delete pod be0 --now'",
+    "NAME=be1 vm-command 'kubectl delete pod be1 --now'",
+    "NAME=rgu0 vm-command 'kubectl delete pod rgu0 -n kube-system --now'",
+    "NAME=rgu0pmem vm-command 'kubectl delete pod rgu0pmem -n kube-system --now'",
+    "NAME=rbu0 vm-command 'kubectl delete pod rbu0 -n kube-system --now'",
+    "NAME=rbe0 vm-command 'kubectl delete pod rbe0 -n kube-system --now'"
+{
+    guard {
+        v = inputvars(input_name)
+        return v.NAME in pods
+    }
+    body {
+        v = inputvars(input_name)
+        p = pods[v.NAME]
+        mem -= p.MEM * p.CONTCOUNT
+        if getattr(p, "namespace", "") == "kube-system":
+            reserved_cpu -= p.CPU * p.CONTCOUNT
+        else:
+            cpu -= p.CPU * p.CONTCOUNT
+        del pods[v.NAME]
+    }
+}
diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.fmbt.conf b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/fuzz.fmbt.conf
@@ -0,0 +1,6 @@
+model = aal_remote(remote_pyaal --verbose-fmbt-log fuzz.aal)
+heuristic = mrandom(80,lookahead(1:2),20,random)
+coverage = perm(2)
+
+pass = coverage(10)
+pass = steps(100)
diff --git a/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh b/test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+usage() {
+    cat <<EOF
+generate.sh - generate fuzz tests.
+
+Configuring test generation with environment variables:
+  TESTCOUNT=<NUM>       Number of generated test scripts than run in parallel.
+  MEM=<NUM>             Memory [MB] available for test pods in the system.
+  CPU=<NUM>             Non-reserved CPU [mCPU] available for test pods in the system.
+  RESERVED_CPU=<NUM>    Reserved CPU [mCPU] available for test pods in the system.
+  STEPS=<NUM>           Total number of test steps in all parallel tests.
+
+  FMBT_IMAGE=<IMG:TAG>  Generate the test using fmbt from docker image IMG:TAG.
+                        The default is fmbt-cli:latest.
+EOF
+    exit 0
+}
+
+if [ -n "$1" ]; then
+    usage
+fi
+
+TESTCOUNT=${TESTCOUNT:-1}
+MEM=${MEM:-11000}
+# 950 mCPU taken by the control plane, split the remaining 15050 mCPU
+# available for test pods to CPU and RESERVED_CPU pods.
+CPU=${CPU:-7000}
+RESERVED_CPU=${RESERVED_CPU:-1000}
+STEPS=${STEPS:-100}
+FMBT_IMAGE=${FMBT_IMAGE:-"fmbt-cli:latest"}
+
+mem_per_test=$(( MEM / TESTCOUNT ))
+cpu_per_test=$(( CPU / TESTCOUNT ))
+reserved_cpu_per_test=$(( RESERVED_CPU / TESTCOUNT ))
+steps_per_test=$(( STEPS / TESTCOUNT ))
+
+# Check fmbt Docker image
+docker run "$FMBT_IMAGE" fmbt --version 2>&1 | grep ^Version: || {
+    echo "error: cannot run fmbt from Docker image '$FMBT_IMAGE'"
+    echo "You can build the image locally by running:"
+    echo "( cd /tmp && git clone --branch devel https://github.com/intel/fmbt && cd fmbt && docker build . -t $FMBT_IMAGE -f Dockerfile.fmbt-cli )"
+    exit 1
+}
+
+cd "$(dirname "$0")" || {
+    echo "cannot cd to the directory of $0"
+    exit 1
+}
+
+for testnum in $(seq 1 "$TESTCOUNT"); do
+    testid=$(( testnum - 1))
+    sed -e "s/max_mem=.*/max_mem=${mem_per_test}/" \
+        -e "s/max_cpu=.*/max_cpu=${cpu_per_test}/" \
+        -e "s/max_reserved_cpu=.*/max_reserved_cpu=${reserved_cpu_per_test}/" \
+        < fuzz.aal > tmp.fuzz.aal
+    sed -e "s/fuzz\.aal/tmp.fuzz.aal/" \
+        -e "s/pass = steps(.*/pass = steps(${steps_per_test})/" \
+        < fuzz.fmbt.conf > tmp.fuzz.fmbt.conf
+    OUTFILE=generated${testid}.sh
+    echo "generating $OUTFILE..."
+    docker run -v "$(pwd):/mnt/models" "$FMBT_IMAGE" sh -c 'cd /mnt/models; fmbt tmp.fuzz.fmbt.conf 2>/dev/null | fmbt-log -f STEP\$sn\$as\$al' | grep -v AAL | sed -e 's/^, /  /g' -e '/^STEP/! s/\(^.*\)/echo "TESTGEN: \1"/g' -e 's/^STEP\([0-9]*\)i:\(.*\)/echo "TESTGEN: STEP \1"; vm-command "date +%T.%N"; \2; vm-command "date +%T.%N; kubectl get pods -A"/g' | sed "s/\([^a-z0-9]\)\(r\?\)\(gu\|bu\|be\)\([0-9]\)/\1t${testid}\2\3\4/g" > "$OUTFILE"
+done