Skip to content

Commit

Permalink
release of field data type clustering, DCDS 2022
Browse files Browse the repository at this point in the history
  • Loading branch information
skleber committed Jun 25, 2022
1 parent bc8acec commit 545fac1
Show file tree
Hide file tree
Showing 65 changed files with 7,202 additions and 616 deletions.
1 change: 1 addition & 0 deletions .idea/dictionaries/stephan.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 2 additions & 14 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ All scripts provide these command line options:
### prep_*
PCAP preparation scripts:

* `prep_deduplicate-trace.py pcapfilename`
* `prep_deduplicate-trace.py pcapfilename`
Detect identical payloads and de-duplicate traces, ignoring encapsulation metadata.


Expand All @@ -121,7 +121,7 @@ Basic checks whether PCAPs are parseable:
The tshark-dissected fields that are contained in the PCAPs need to be known to the message parser.
Therefore, validation.messageParser.ParsingConstants needs to be made aware of any field occuring in the traces.

* `check_parse-pcap.py pcapfilename`
* `check_parse-pcap.py pcapfilename`
Parse a PCAP file and print its dissection for testing. This helps verifying if there are any unknown fields
that need to be added to validation.messageParser.ParsingConstants.
Before starting to validate/use FMS with a new PCAP, first run this check and solve any errors.
Expand Down
78 changes: 78 additions & 0 deletions eval-nemeftr-clustering-iterateeps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env bash
#
# NEMEFTR-full mode 1:
# Clustering of segments on similarity without ground truth.

input="input/maxdiff-fromOrig/*-100*.pcap"


segmenters="nemesys"

# Nemesys options
refines="none original nemetyl"

L2PROTOS="input/awdl-* input/wlan-beacons-*"
LEPROTOS="input/awdl-* input/wlan-beacons-* input/smb* input/*/smb*"

prefix="cft"

cftnpad="245"
for f in reports/${prefix}-* ; do
if [ -e "$f" ] ; then
cftnext=$(expr 1 + $(ls -d reports/${prefix}-* | sed "s/^.*${prefix}-\([0-9]*\)-.*$/\1/" | sort | tail -1))
cftnpad=$(printf "%03d" ${cftnext})
fi
break
done
currcomm=$(git log -1 --format="%h")
report=reports/${prefix}-${cftnpad}-clustering-${currcomm}
mkdir ${report}


for seg in ${segmenters} ; do
for ref in ${refines} ; do
if [[ ${seg} == "zeros" ]] && [[ ! ${ref} =~ ^(none|PCA1|PCAmocoSF)$ ]] ; then
echo ${ref} not suited for zeros segmenter. Ignoring.
continue
fi

pids=()
for fn in ${input} ; do
optargs="-r"
for proto in ${L2PROTOS} ; do
if [[ "${fn}" == ${proto} ]] ; then
# replace
optargs="-l 2"
fi
done
for proto in ${LEPROTOS} ; do
if [[ "${fn}" == $proto ]] ; then
# append
optargs="${optargs} -e" # -e: little endian
fi
done
# fixed sigma 1.2 (nemeftr-paper: "constant σ of 1.2")
python src/nemeftr_cluster-segments_iterate-eps.py -t ${seg} -s 1.2 -p ${optargs} -f ${ref} ${fn} >> "${report}/$(basename -s .pcap ${fn}).log" &
pids+=( $! )
done

for pid in "${pids[@]}"; do
printf 'Waiting for %d...' "$pid"
wait $pid
echo 'done.'
done

mkdir ${report}-${seg}-${ref}
# mv reports/*.pdf ${report}-${seg}-${ref}/
for fn in ${input};
do
bn=$(basename -s .pcap ${fn})
mv reports/${bn}* ${report}-${seg}-${ref}/
done
done
done

python src/transform_cluster-statistics.py
mv reports/*.csv ${report}/

spd-say "Bin fertig!"
89 changes: 89 additions & 0 deletions eval-nemeftr-clustering.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env bash
#
# NEMEFTR-full mode 1:
# Clustering of segments on similarity without ground truth.

input="input/maxdiff-fromOrig/*-100*.pcap"
#input="input/maxdiff-fromOrig/ntp_SMIA-20111010_maxdiff-100.pcap"


segmenters="nemesys"

# Nemesys options
refines="original nemetyl"


L2PROTOS="input/awdl-* input/au-* input/wlan-beacons-*"
LEPROTOS="input/awdl-* input/au-* input/smb* input/*/smb* input/wlan-beacons-*"

prefix="cft"

cftnpad="352"
for f in reports/${prefix}-* ; do
if [ -e "$f" ] ; then
cftnext=$(expr 1 + $(ls -d reports/${prefix}-* | sed "s/^.*${prefix}-\([0-9]*\)-.*$/\1/" | sort | tail -1))
cftnpad=$(printf "%03d" ${cftnext})
fi
break
done
currcomm=$(git log -1 --format="%h")
report=reports/${prefix}-${cftnpad}-clustering-${currcomm}
mkdir ${report}


for seg in ${segmenters} ; do
for ref in ${refines} ; do
if [[ ${seg} == "zeros" ]] && [[ ! ${ref} =~ ^(none|PCA1|PCAmocoSF)$ ]] ; then
echo ${ref} not suited for zeros segmenter. Ignoring.
continue
fi

pids=()
for fn in ${input} ; do
optargs="-r"
for proto in ${L2PROTOS} ; do
if [[ "${fn}" == ${proto} ]] ; then
# replace
optargs="-l 2"
fi
done
for proto in ${LEPROTOS} ; do
if [[ "${fn}" == $proto ]] ; then
# append
optargs="${optargs} -e" # -e: little endian
fi
done
bn=$(basename -- ${fn})
strippedname="${bn%.*}"

# fixed sigma 1.2 (nemeftr-paper: "constant σ of 1.2") ### add -p for plots
python src/nemeftr_cluster-segments.py -pt ${seg} -s 1.2 ${optargs} -f ${ref} ${fn} # >> "${report}/${strippedname}.log" &
pids+=( $! )
# python src/nemeftr_cluster-segments.py -t ${seg} -s 1.2 -p -e ${optargs} -f ${ref} ${fn}

# dynamic sigma:
# python src/nemeftr_cluster-segments.py -p -f ${ref} ${fn}
done

for pid in "${pids[@]}"; do
printf 'Waiting for %d...' "$pid"
wait $pid
echo 'done.'
done

mkdir ${report}-${seg}-${ref}
for fn in ${input};
do
bn=$(basename -- ${fn})
strippedname="${bn%.*}"
mv reports/${strippedname}* ${report}-${seg}-${ref}/
done
done
done

python src/transform_cluster-statistics.py
mv reports/*.csv ${report}/

spd-say "Bin fertig!"


49 changes: 49 additions & 0 deletions eval-nemeftr-truefield-iterateeps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env bash
#
# NEMEFTR: Optimal-segmentation baseline

input="input/maxdiff-fromOrig/*-100*.pcap"


L2PROTOS="input/awdl-* input/au-* input/wlan-beacons-*"

prefix="tft"

numpad="200"
for f in reports/${prefix}-* ; do
if [ -e "$f" ] ; then
numnext=$(expr 1 + $(ls -d reports/${prefix}-* | sed "s/^.*${prefix}-\([0-9]*\)-.*$/\1/" | sort | tail -1))
numpad=$(printf "%03d" ${numnext})
fi
break
done
currcomm=$(git log -1 --format="%h")
report=reports/${prefix}-${numpad}-clustering-${currcomm}
mkdir ${report}


for fn in ${input} ; do
# relative to IP layer
optargs="-r"
for proto in ${L2PROTOS} ; do
if [[ "${fn}" == ${proto} ]] ; then
# replace
optargs="-l 2"
fi
done

python src/nemeftr_cluster-true-fields_iterate-eps.py ${optargs} ${fn}
done




mv reports/*.csv ${report}/
mv reports/*.pdf ${report}/






spd-say "Bin fertig!"
64 changes: 64 additions & 0 deletions eval-nemeftr-truefield.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env bash
#
# NEMEFTR: Optimal-segmentation baseline

#input=input/*-100*.pcap
#input=input/*-1000.pcap
input="input/maxdiff-fromOrig/*-100*.pcap"
#input="input/maxdiff-fromOrig/ntp_SMIA-20111010_maxdiff-100.pcap"


L1PROTOS="input/ari_*"
L2PROTOS="input/awdl-* input/au-* input/wlan-beacons-*"

prefix="tft"

numpad="350"
for f in reports/${prefix}-* ; do
if [ -e "$f" ] ; then
numnext=$(expr 1 + $(ls -d reports/${prefix}-* | sed "s/^.*${prefix}-\([0-9]*\)-.*$/\1/" | sort | tail -1))
numpad=$(printf "%03d" ${numnext})
fi
break
done
currcomm=$(git log -1 --format="%h")
report=reports/${prefix}-${numpad}-clustering-${currcomm}
mkdir ${report}


for fn in ${input} ; do
# relative to IP layer
optargs="-r"
for proto in ${L2PROTOS} ; do
if [[ "${fn}" == ${proto} ]] ; then
# replace
optargs="-l 2"
fi
done
for proto in ${L1PROTOS} ; do
if [[ "${fn}" == ${proto} ]] ; then
# replace
optargs="-l 1"
fi
done

# add -p to write plots ### add -p for plots
python src/nemeftr_cluster-true-fields.py ${optargs} ${fn}
done



for fn in ${input} ; do
bn=$(basename -- ${fn})
strippedname="${bn%.*}"
mv reports/${strippedname}/ ${report}/
done
mv reports/*.csv ${report}/
mv reports/*.pdf ${report}/






spd-say "Bin fertig!"
Loading

0 comments on commit 545fac1

Please sign in to comment.