Skip to content

Commit

Permalink
e2e: add fuzz test sources for hbm/dram/pmem pods
Browse files Browse the repository at this point in the history
Adds fuzz test generator script, model and runner for topology-aware
policy reliability tests on HBM+DRAM+PMEM platform.

Signed-off-by: Antti Kervinen <[email protected]>
  • Loading branch information
askervin authored and klihub committed Oct 2, 2024
1 parent e938c21 commit bbdd080
Show file tree
Hide file tree
Showing 5 changed files with 287 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
helm-terminate
helm_config=$(instantiate helm-config.yaml) helm-launch topology-aware

source $TEST_DIR/codelib.sh || {
echo "error importing codelib.sh"
exit 1
}

# Clean test pods from the kube-system namespace
cleanup-test-pods() {
( vm-command "kubectl delete pods -n kube-system \$(kubectl get pods -n kube-system | awk '/t[0-9]r[gb][ue]/{print \$1}')" ) || true
( vm-command "kubectl delete pods -n default \$(kubectl get pods -n default | awk '/t[0-9][rgb][ue][0-9]/{print \$1}')" ) || true
}
cleanup-test-pods

# Run generated*.sh test scripts in this directory.
genscriptcount=0
for genscript in "$TEST_DIR"/generated*.sh; do
if [ ! -f "$genscript" ]; then
continue
fi
(
paralleloutdir="$outdir/parallel$genscriptcount"
[ -d "$paralleloutdir" ] && rm -rf "$paralleloutdir"
mkdir "$paralleloutdir"
OUTPUT_DIR="$paralleloutdir"
COMMAND_OUTPUT_DIR="$paralleloutdir/commands"
mkdir "$COMMAND_OUTPUT_DIR"
source "$genscript" 2>&1 | sed -u -e "s/^/$(basename "$genscript"): /g"
) &
genscriptcount=$(( genscriptcount + 1))
done

if [[ "$genscriptcount" == "0" ]]; then
echo "WARNING:"
echo "WARNING: Skipping fuzz tests:"
echo "WARNING: - Generated tests not found."
echo "WARNING: - Generate a test by running:"
echo "WARNING: $TEST_DIR/generate.sh"
echo "WARNING: - See test generation options:"
echo "WARNING: $TEST_DIR/generate.sh --help"
echo "WARNING:"
sleep 5
exit 0
fi

echo "waiting for $genscriptcount generated tests to finish..."
wait

cleanup-test-pods

# Restore default test configuration, restart nri-resource-policy.
helm-terminate
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
container-exit0() {
# Terminate a container by killing the "sleep inf" child process in
# echo CONTNAME $(sleep inf)
local contname="$1"
vm-command "contpid=\$(ps axf | grep -A1 'echo $contname' | grep -v grep | tail -n 1 | awk '{print \$1}'); ( set -x; kill -KILL \$contpid; )"
}

container-signal() {
local contname="$1"
local signal="$2"
vm-command "pkill -$signal -f 'echo $contname'"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
language python {
max_mem=13500 # maximum memory on VM in MB ## NOTE: generate.sh will overwrite this
max_cpu=7000 # maximum CPUs on node in mCPU ## NOTE: generate.sh will overwrite this
max_reserved_cpu=1000 # maximum reserved CPUs on node in mCPU ## NOTE: generate.sh will overwrite this
class Vars:
# namespace for variables in input names
def __repr__(self):
return "{" + ",".join("%s:%s" % (a, getattr(self, a)) for a in sorted(self.__dict__.keys()) if not a.startswith("_")) + "}\n"
def inputvars(input_name):
# parse VAR=VALUE's from input_name
v = Vars()
for word in input_name.split():
keyvalue = word.split("=")
if len(keyvalue) == 2:
if (keyvalue[1].endswith("m") or keyvalue[1].endswith("M")) and len(keyvalue[1]) > 1 and keyvalue[1][-2] in '0123456789':
keyvalue[1] = keyvalue[1][:-1]
try:
setattr(v, keyvalue[0], int(keyvalue[1]))
except:
setattr(v, keyvalue[0], keyvalue[1])
return v
}

variables {
mem, cpu, reserved_cpu, pods
}

initial_state {
mem=0
cpu=0
reserved_cpu=0
pods={}
}

# Create non-reserved CPU pods
input
"NAME=gu0 CONTCOUNT=1 CPU=200m MEM=1500M create guaranteed",
"NAME=gu1 CONTCOUNT=2 CPU=1000m MEM=500M create guaranteed",
"NAME=gu1hbm CONTCOUNT=2 CPU=1000m MEM=500M MEMTYPE=hbm create guaranteed",
"NAME=gu2 CONTCOUNT=2 CPU=1200m MEM=4500M create guaranteed",
"NAME=gu2pmem CONTCOUNT=2 CPU=1200m MEM=4500M MEMTYPE=pmem create guaranteed",
"NAME=gu3 CONTCOUNT=3 CPU=2000m MEM=500M create guaranteed",
"NAME=gu3dram CONTCOUNT=3 CPU=2000m MEM=500M MEMTYPE=dram create guaranteed",
"NAME=gu4 CONTCOUNT=1 CPU=4200m MEM=100M create guaranteed",
"NAME=bu0 CONTCOUNT=1 CPU=1200m MEM=50M CPUREQ=900m MEMREQ=49M CPULIM=1200m MEMLIM=50M create burstable",
"NAME=bu0hbmpmem CONTCOUNT=1 CPU=1200m MEM=50M CPUREQ=900m MEMREQ=49M CPULIM=1200m MEMLIM=50M MEMTYPE=hbm,pmem create burstable",
"NAME=bu1 CONTCOUNT=2 CPU=1900m MEM=300M CPUREQ=1800m MEMREQ=299M CPULIM=1900m MEMLIM=300M create burstable",
"NAME=bu1hbmdram CONTCOUNT=2 CPU=1900m MEM=300M CPUREQ=1800m MEMREQ=299M CPULIM=1900m MEMLIM=300M MEMTYPE=hbm,dram create burstable",
"NAME=be0 CONTCOUNT=1 CPU=0 MEM=0 create besteffort",
"NAME=be1 CONTCOUNT=3 CPU=0 MEM=0 create besteffort"
{
guard {
v = inputvars(input_name)
return (v.NAME not in pods
and (mem + v.MEM * v.CONTCOUNT < max_mem)
and (cpu + v.CPU * v.CONTCOUNT < max_cpu))
}
body {
v = inputvars(input_name)
v.namespace = getattr(v, "namespace", "default")
mem += v.MEM * v.CONTCOUNT
cpu += v.CPU * v.CONTCOUNT
pods[v.NAME] = v
}
}

# Create pods to the kube-system namespace
input
"NAME=rgu0 CONTCOUNT=2 CPU=100m MEM=1000M namespace=kube-system create guaranteed",
"NAME=rgu0pmem CONTCOUNT=2 CPU=100m MEM=1000M namespace=kube-system MEMTYPE=pmem create guaranteed",
"NAME=rbu0 CONTCOUNT=1 CPU=100m MEM=100M CPUREQ=99m MEMREQ=99M CPULIM=100m MEMLIM=100M namespace=kube-system create burstable",
"NAME=rbe0 CONTCOUNT=2 CPU=0 MEM=0 namespace=kube-system create besteffort"
{
guard {
v = inputvars(input_name)
return (v.NAME not in pods
and (mem + v.MEM * v.CONTCOUNT < max_mem)
and (reserved_cpu + v.CPU * v.CONTCOUNT < max_reserved_cpu))

}
body {
v = inputvars(input_name)
mem += v.MEM * v.CONTCOUNT
reserved_cpu += v.CPU * v.CONTCOUNT
pods[v.NAME] = v
}
}

# Kill a process in a container
# - "echo gu0c1" matches and kills process only in container gu0c1 in pod gu0
# - "echo gu0" matches and kills processes in all containers of pod gu0
input
"NAME=gu0 container-exit0 gu0c0",
"NAME=gu1 container-exit0 gu1c0",
"NAME=gu1hbm container-exit0 gu1hbmc0",
"NAME=gu2 container-exit0 gu2c0",
"NAME=gu2pmem container-exit0 gu2pmemc0",
"NAME=gu3 container-exit0 gu3",
"NAME=gu3dram container-exit0 gu3dramc0",
"NAME=gu4 container-exit0 gu4c",
"NAME=bu0 container-exit0 bu0c0",
"NAME=bu0hbmpmem container-exit0 bu0hbmpmemc0",
"NAME=bu1 container-exit0 bu1c0",
"NAME=bu1hbmdram container-exit0 bu1hbmdramc0",
"NAME=be0 container-exit0 be0c0",
"NAME=be1 container-exit0 be0c0",
"NAME=rgu0 container-exit0 rgu0c0",
"NAME=rgu0pmem container-exit0 rgu0pmemc0",
"NAME=rbu0 container-exit0 rbu0c0",
"NAME=rbe0 container-exit0 rbe0c0"
{
guard {
v = inputvars(input_name)
return v.NAME in pods
}
}

# Delete single pod
input
"NAME=gu0 vm-command 'kubectl delete pod gu0 --now'",
"NAME=gu1 vm-command 'kubectl delete pod gu1 --now'",
"NAME=gu1hbm vm-command 'kubectl delete pod gu1hbm --now'",
"NAME=gu2 vm-command 'kubectl delete pod gu2 --now'",
"NAME=gu2pmem vm-command 'kubectl delete pod gu2pmem --now'",
"NAME=gu3 vm-command 'kubectl delete pod gu3 --now'",
"NAME=gu3dram vm-command 'kubectl delete pod gu3dram --now'",
"NAME=gu4 vm-command 'kubectl delete pod gu4 --now'",
"NAME=bu0 vm-command 'kubectl delete pod bu0 --now'",
"NAME=bu0hbmpmem vm-command 'kubectl delete pod bu0hbmpmem --now'",
"NAME=bu1 vm-command 'kubectl delete pod bu1 --now'",
"NAME=bu1hbmdram vm-command 'kubectl delete pod bu1hbmdram --now'",
"NAME=be0 vm-command 'kubectl delete pod be0 --now'",
"NAME=be1 vm-command 'kubectl delete pod be1 --now'",
"NAME=rgu0 vm-command 'kubectl delete pod rgu0 -n kube-system --now'",
"NAME=rgu0pmem vm-command 'kubectl delete pod rgu0pmem -n kube-system --now'",
"NAME=rbu0 vm-command 'kubectl delete pod rbu0 -n kube-system --now'",
"NAME=rbe0 vm-command 'kubectl delete pod rbe0 -n kube-system --now'"
{
guard {
v = inputvars(input_name)
return v.NAME in pods
}
body {
v = inputvars(input_name)
p = pods[v.NAME]
mem -= p.MEM * p.CONTCOUNT
if getattr(p, "namespace", "") == "kube-system":
reserved_cpu -= p.CPU * p.CONTCOUNT
else:
cpu -= p.CPU * p.CONTCOUNT
del pods[v.NAME]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
model = aal_remote(remote_pyaal --verbose-fmbt-log fuzz.aal)
heuristic = mrandom(80,lookahead(1:2),20,random)
coverage = perm(2)

pass = coverage(10)
pass = steps(100)
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash

usage() {
cat <<EOF
generate.sh - generate fuzz tests.
Configuring test generation with environment variables:
TESTCOUNT=<NUM> Number of generated test scripts than run in parallel.
MEM=<NUM> Memory [MB] available for test pods in the system.
CPU=<NUM> Non-reserved CPU [mCPU] available for test pods in the system.
RESERVED_CPU=<NUM> Reserved CPU [mCPU] available for test pods in the system.
STEPS=<NUM> Total number of test steps in all parallel tests.
FMBT_IMAGE=<IMG:TAG> Generate the test using fmbt from docker image IMG:TAG.
The default is fmbt-cli:latest.
EOF
exit 0
}

if [ -n "$1" ]; then
usage
fi

TESTCOUNT=${TESTCOUNT:-1}
MEM=${MEM:-11000}
# 950 mCPU taken by the control plane, split the remaining 15050 mCPU
# available for test pods to CPU and RESERVED_CPU pods.
CPU=${CPU:-7000}
RESERVED_CPU=${RESERVED_CPU:-1000}
STEPS=${STEPS:-100}
FMBT_IMAGE=${FMBT_IMAGE:-"fmbt-cli:latest"}

mem_per_test=$(( MEM / TESTCOUNT ))
cpu_per_test=$(( CPU / TESTCOUNT ))
reserved_cpu_per_test=$(( RESERVED_CPU / TESTCOUNT ))
steps_per_test=$(( STEPS / TESTCOUNT ))

# Check fmbt Docker image
docker run "$FMBT_IMAGE" fmbt --version 2>&1 | grep ^Version: || {
echo "error: cannot run fmbt from Docker image '$FMBT_IMAGE'"
echo "You can build the image locally by running:"
echo "( cd /tmp && git clone --branch devel https://github.com/intel/fmbt && cd fmbt && docker build . -t $FMBT_IMAGE -f Dockerfile.fmbt-cli )"
exit 1
}

cd "$(dirname "$0")" || {
echo "cannot cd to the directory of $0"
exit 1
}

for testnum in $(seq 1 "$TESTCOUNT"); do
testid=$(( testnum - 1))
sed -e "s/max_mem=.*/max_mem=${mem_per_test}/" \
-e "s/max_cpu=.*/max_cpu=${cpu_per_test}/" \
-e "s/max_reserved_cpu=.*/max_reserved_cpu=${reserved_cpu_per_test}/" \
< fuzz.aal > tmp.fuzz.aal
sed -e "s/fuzz\.aal/tmp.fuzz.aal/" \
-e "s/pass = steps(.*/pass = steps(${steps_per_test})/" \
< fuzz.fmbt.conf > tmp.fuzz.fmbt.conf
OUTFILE=generated${testid}.sh
echo "generating $OUTFILE..."
docker run -v "$(pwd):/mnt/models" "$FMBT_IMAGE" sh -c 'cd /mnt/models; fmbt tmp.fuzz.fmbt.conf 2>/dev/null | fmbt-log -f STEP\$sn\$as\$al' | grep -v AAL | sed -e 's/^, / /g' -e '/^STEP/! s/\(^.*\)/echo "TESTGEN: \1"/g' -e 's/^STEP\([0-9]*\)i:\(.*\)/echo "TESTGEN: STEP \1"; vm-command "date +%T.%N"; \2; vm-command "date +%T.%N; kubectl get pods -A"/g' | sed "s/\([^a-z0-9]\)\(r\?\)\(gu\|bu\|be\)\([0-9]\)/\1t${testid}\2\3\4/g" > "$OUTFILE"

Check failure on line 62 in test/e2e/policies.test-suite/topology-aware/n6-hbm-cxl/test02-fuzz-memallocs/generate.sh

View workflow job for this annotation

GitHub Actions / Check for spelling errors

bu ==> by, be, but, bug, bun, bud, buy, bum
done

0 comments on commit bbdd080

Please sign in to comment.