Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

automate a tcpdump process #104

Merged
merged 4 commits into from
Oct 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions scripts/tcpdump.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/bin/bash
#
# This script collects pcap files from a Kubernetes host, a Flannel or Calico VXLAN interface, and a pod.
#
# This script must be run as root.
#
# Provide the namespace and name of a pod to collect from. The script will inspect `ip route` to determine the interface name of a given Kubernetes pod and start a tcpdump process attached to that interface. Then, another tcpdump process will be attached to either the Flannel or the Calico VXLAN interface, and finally, a tcpdump process will be attached to the primary interface of the host, filtering for VXLAN traffic.
#
# Usage:
# ./tcpdump.sh <namespace> <pod-name> [time frame]
# namespace: string, required
# pod-name: string, required
# time frame: date/time string, optional; defaults to '5m'. Example: 100s, 5m, 1h.

# depends on kubectl, jq, tcpdump, timeout, bash, iproute2, grep, awk, crictl

# TODO: error handling

readonly VXLAN_FLAGS="-lttttnnvv"

namespace=$1
pod=$2
timeframe="5m"

if [[ -z $namespace || -z $pod ]]; then
echo "Usage: ./tcpdump.sh <namespace> <pod-name> [time frame]"
echo " namespace: string, required"
echo " pod-name: string, required"
echo " time frame: integer with optional suffix 's', 'm', 'h', or 'd'; defaults to '5m'"
echo " Example Usage: ./tcpdump.sh default coredns-coredns-74ff55c5d 300s"
echo " "
exit 1
fi

if [[ -n $3 ]]; then
timeframe="$3"
fi

set -euo pipefail

# if not root, then exit
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
exit 1
fi

# check for required binaries
for binary in kubectl jq tcpdump ip timeout grep awk; do
if ! command -v "$binary" &> /dev/null; then
echo "Could not find $binary"
exit 1
fi
done


function getCalicoInterface () {
pod_name="$1"
pod_namespace="$2"
pod_ip=$(kubectl get pod "$pod_name" -n "$pod_namespace" -o json | jq -r '.status.podIP')
# Get the interface name from routing table
pod_interface=$(ip route | grep "$pod_ip" | awk '{print $3}')
echo "$pod_interface"
}

function getFlannelInterface () {
pod_name="$1"
pod_namespace="$2"

# check for crictl binary
if ! command -v crictl &> /dev/null; then
echo "Could not find crictl; please install it"
exit 1
fi

# Figure out the pod's container PID
container_id="$(crictl ps | grep "$pod_name" | awk 'NR==1{print $1}')"
pid="$(crictl inspect "$container_id" | jq .info.pid)"

# link to /var/run/netns so we can use ip netns easily
mkdir -p /var/run/netns
ln -sf "/proc/$pid/ns/net" "/var/run/netns/$pod_name"

# Get the interface index of the container's eth0.
# c_index is the index of the container's eth0, which should be in the form of eth0.if${h_index}
# h_index is the index of the corresponding host veth interface from `ip link show type veth`
local c_index h_index
c_index=$(ip netns exec "$pod_name" ip link show type veth | head -n1 | awk '{print $2}' | sed 's/.*@if//')
h_index=$(ip link show type veth | grep -E "^${c_index}" | awk '{print $2}' | sed 's/@.*//')

# Clean up the netns symlink, since we don't need it anymore
rm -f "/var/run/netns/${1}"

echo "$h_index"
}

# Figure out if we're using Flannel or Calico VXLAN
if ip link show | grep flannel > /dev/null; then
vxlan_interface=flannel.1
vxlan_port=8472
cni=flannel
elif ip link show | grep cali > /dev/null; then
vxlan_interface=vxlan.calico
vxlan_port=4789
cni=calico
else
echo "Could not determine VXLAN interface on host"
exit 1
fi

if [[ $cni == "calico" ]]; then
pod_interface=$(getCalicoInterface "$pod" "$namespace")
elif [[ $cni == "flannel" ]]; then
pod_interface=$(getFlannelInterface "$pod" "$namespace")
else
echo "Could not determine pod interface from namespace $namespace and pod $pod"
exit 1
fi

# Collect tcpdump from the pod's interface
echo "Collecting tcpdump from pod $pod on interface $pod_interface"
timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$pod_interface" -w "$(hostname)-$pod-pod".pcap &

# Collect tcpdump from the VXLAN interface
echo "Collecting tcpdump from VXLAN interface $vxlan_interface on port $vxlan_port"
timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$vxlan_interface" -w "$(hostname)-$vxlan_interface".pcap &

if [[ $cni == "flannel" ]]; then
# Collect tcpdump also from the cni0 bridge
echo "Collecting tcpdump from cni0 bridge"
timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "cni0" -w "$(hostname)-cni0".pcap &
fi

# Figure out the host's primary interface
host_interface=$(ip route | grep '^default' | awk '{print $5}')
echo "Collecting tcpdump from host interface $host_interface"

# Collect tcpdump from the host's primary interface
timeout "$timeframe" tcpdump ${VXLAN_FLAGS} -i "$host_interface" -T vxlan port "$vxlan_port" -w "$(hostname)-$host_interface".pcap &

# Wait for all the tcpdump processes to finish
wait < <(jobs -p)

echo "Done collecting tcpdump files"
Loading