From 34d4ce1ef713d04c46b64d9d5b2f67363b61abc5 Mon Sep 17 00:00:00 2001 From: Mohit Joshi Date: Wed, 11 Oct 2023 07:13:21 +0000 Subject: [PATCH] PSTRESS-154 - Add RR support in pstress Background: =========== What is RR: Record and Replay tool While using pstress, we come across bugs, crashes which are sporadic in nature. It may not be possible to easily reproduce these problems because of its flaky nature. We have gdb (gnu debugger), which is an excellent tool if we can reproduce the problems, but if we run into crashes that occur in multi-threaded setup, we might not be able to repeat the crash deterministicly This is where RR comes handy. It has the ability to capture and then replay bugs. This tool has the ability to debug recorded sessions in a precise, deterministic fashion. --- pstress/pstress-run-57.conf | 6 ++++ pstress/pstress-run-80.conf | 6 ++++ pstress/pstress-run-PXC57.conf | 6 ++++ pstress/pstress-run-PXC80.conf | 6 ++++ pstress/pstress-run-rocksdb.conf | 6 ++++ pstress/pstress-run.sh | 47 ++++++++++++++++++++++++++++---- 6 files changed, 71 insertions(+), 6 deletions(-) diff --git a/pstress/pstress-run-57.conf b/pstress/pstress-run-57.conf index 1fa08ec..f7fa963 100755 --- a/pstress/pstress-run-57.conf +++ b/pstress/pstress-run-57.conf @@ -185,3 +185,9 @@ GRP_RPL_CLUSTER_RUN=0 # Default GR configuration file for multi-node pstress runs # ################################################################################ GR_CLUSTER_CONFIG=${SCRIPT_PWD}/pstress-cluster-run.cfg + +################################################################################ +# To record and replay crashes, run pstress in RR mode # +# To enable set RR_MODE=1 # +################################################################################ +RR_MODE=0 diff --git a/pstress/pstress-run-80.conf b/pstress/pstress-run-80.conf index b6e6c1d..1f25e4d 100755 --- a/pstress/pstress-run-80.conf +++ b/pstress/pstress-run-80.conf @@ -189,3 +189,9 @@ GRP_RPL_CLUSTER_RUN=0 # Default GR configuration file for multi-node pstress runs # ################################################################################ GR_CLUSTER_CONFIG=${SCRIPT_PWD}/pstress-cluster-run.cfg + +################################################################################ +# To record and replay crashes, run pstress in RR mode # +# To enable set RR_MODE=1 # +################################################################################ +RR_MODE=0 diff --git a/pstress/pstress-run-PXC57.conf b/pstress/pstress-run-PXC57.conf index 1f59036..7341661 100755 --- a/pstress/pstress-run-PXC57.conf +++ b/pstress/pstress-run-PXC57.conf @@ -204,3 +204,9 @@ PXC_WSREP_PROVIDER_ADD_RANDOM_WSREP_PROVIDER_CONFIG_OPTIONS=0 # Maximum number of PXC wsrep provider (Galera) configuration options to add # ################################################################################ PXC_WSREP_PROVIDER_MAX_NR_OF_RND_OPTS_TO_ADD=2 + +################################################################################ +# To record and replay crashes, run pstress in RR mode # +# To enable set RR_MODE=1 # +################################################################################ +RR_MODE=0 diff --git a/pstress/pstress-run-PXC80.conf b/pstress/pstress-run-PXC80.conf index 5d8464d..f1ab571 100755 --- a/pstress/pstress-run-PXC80.conf +++ b/pstress/pstress-run-PXC80.conf @@ -216,3 +216,9 @@ PXC_WSREP_PROVIDER_ADD_RANDOM_WSREP_PROVIDER_CONFIG_OPTIONS=0 # Maximum number of PXC wsrep provider (Galera) configuration options to add # ################################################################################ PXC_WSREP_PROVIDER_MAX_NR_OF_RND_OPTS_TO_ADD=2 + +################################################################################ +# To record and replay crashes, run pstress in RR mode # +# To enable set RR_MODE=1 # +################################################################################ +RR_MODE=0 diff --git a/pstress/pstress-run-rocksdb.conf b/pstress/pstress-run-rocksdb.conf index 87ad5f0..250ffef 100755 --- a/pstress/pstress-run-rocksdb.conf +++ b/pstress/pstress-run-rocksdb.conf @@ -152,3 +152,9 @@ MYINIT= # Extra options to pass to mysqld during server start # ################################################################################ MYEXTRA= + +################################################################################ +# To record and replay crashes, run pstress in RR mode # +# To enable set RR_MODE=1 # +################################################################################ +RR_MODE=0 diff --git a/pstress/pstress-run.sh b/pstress/pstress-run.sh index a42937a..da4f6c3 100755 --- a/pstress/pstress-run.sh +++ b/pstress/pstress-run.sh @@ -176,6 +176,24 @@ EOF fi } +# Incase, user starts pstress in RR mode, check if RR is installed on the machine +if [ $RR_MODE -eq 1 ]; then + echoit "Running pstress in RR mode. It is expected that pstress executions will be slower" + if [[ ! -e `which rr` ]];then + echo "rr package is not installed. Exiting" + echo "Install rr: https://github.com/rr-debugger/rr/wiki/Building-And-Installing" + exit 1 + else + perf_event_var=$(cat /proc/sys/kernel/perf_event_paranoid) + if [ $perf_event_var -ne 1 ]; then + echo "rr needs /proc/sys/kernel/perf_event_paranoid <=1, but it is $perf_event_var" + echo "Change it to 1, consider running sudo sysctl -w kernel.perf_event_paranoid=1" + echo "For more information https://github.com/rr-debugger/rr/wiki/Building-And-Installing" + exit 1 + fi + fi +fi + # Find mysqld binary if [ -r ${BASEDIR}/bin/mysqld ]; then BIN=${BASEDIR}/bin/mysqld @@ -480,15 +498,27 @@ pxc_startup(){ sed -i "2i wsrep_cluster_address=gcomm://${PXC_LADDRS[1]},${PXC_LADDRS[2]},${PXC_LADDRS[3]}" ${DATADIR}/n3.cnf get_error_socket_file 1 - ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n1.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA --wsrep-new-cluster > ${ERR_FILE} 2>&1 & + if [ $RR_MODE -eq 1 ]; then + rr ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n1.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA --wsrep-new-cluster > ${ERR_FILE} 2>&1 & + elif [ $RR_MODE -eq 0 ]; then + ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n1.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA --wsrep-new-cluster > ${ERR_FILE} 2>&1 & + fi pxc_startup_status 1 get_error_socket_file 2 - ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n2.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA > ${ERR_FILE} 2>&1 & + if [ $RR_MODE -eq 1 ]; then + rr ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n2.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA > ${ERR_FILE} 2>&1 & + elif [ $RR_MODE -eq 0 ]; then + ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n2.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA > ${ERR_FILE} 2>&1 & + fi pxc_startup_status 2 get_error_socket_file 3 - ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n3.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA > ${ERR_FILE} 2>&1 & + if [ $RR_MODE -eq 1 ]; then + rr ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n3.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA > ${ERR_FILE} 2>&1 & + elif [ $RR_MODE -eq 0 ]; then + ${BASEDIR}/bin/mysqld --defaults-file=${DATADIR}/n3.cnf $STARTUP_OPTION $MYEXTRA $PXC_MYEXTRA > ${ERR_FILE} 2>&1 & + fi pxc_startup_status 3 if [ "$IS_STARTUP" == "startup" ]; then @@ -905,6 +935,9 @@ pstress_test(){ --log-output=none --log-error-verbosity=3 --log-error=${RUNDIR}/${TRIAL}/log/master.err" fi + if [ $RR_MODE -eq 1 ]; then + CMD="rr $CMD" + fi echo $CMD $CMD > ${RUNDIR}/${TRIAL}/log/master.err 2>&1 & MPID="$!" @@ -1481,19 +1514,21 @@ elif [[ ${PXC} -eq 1 || ${GRP_RPL} -eq 1 ]]; then if ${BASEDIR}/bin/mysqladmin -uroot -S${WORKDIR}/node1.template/node1_socket.sock ping > /dev/null 2>&1; then echoit "PXC node1.template started" ; else - echoit "Assert: PXC data template creation failed.." + echoit "Assert: PXC data template1 creation failed.." exit 1 fi + sleep 2 if ${BASEDIR}/bin/mysqladmin -uroot -S${WORKDIR}/node2.template/node2_socket.sock ping > /dev/null 2>&1; then echoit "PXC node2.template started" ; else - echoit "Assert: PXC data template creation failed.." + echoit "Assert: PXC data template2 creation failed.." exit 1 fi + sleep 2 if ${BASEDIR}/bin/mysqladmin -uroot -S${WORKDIR}/node3.template/node3_socket.sock ping > /dev/null 2>&1; then echoit "PXC node3.template started" ; else - echoit "Assert: PXC data template creation failed.." + echoit "Assert: PXC data template3 creation failed.." exit 1 fi echoit "Created PXC data templates for pstress run.."