Skip to content

Commit

Permalink
Add ccds setup
Browse files Browse the repository at this point in the history
  • Loading branch information
jmuecke committed Oct 11, 2024
1 parent 9f2f654 commit 00be25a
Show file tree
Hide file tree
Showing 183 changed files with 85,497 additions and 6 deletions.
127 changes: 127 additions & 0 deletions 05-instant-ack-ccds/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#################################################################################
# GLOBALS #
#################################################################################

PROJECT_NAME = instant-ack
PYTHON_VERSION = 3.10
PYTHON_INTERPRETER = python

#################################################################################
# COMMANDS #
#################################################################################


## Install Python Dependencies
.PHONY: requirements
requirements:
$(PYTHON_INTERPRETER) -m pip install -U pip
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt

## Delete all compiled Python files
.PHONY: clean
clean:
find . -type f -name "*.py[co]" -delete
find . -type d -name "__pycache__" -delete

## Lint using flake8 and black (use `make format` to do formatting)
.PHONY: lint
lint:
flake8 instant_ack
isort --check --diff --profile black instant_ack
black --check --config pyproject.toml instant_ack

## Format source code with black
.PHONY: format
format:
black --config pyproject.toml instant_ack

## Convert notebooks to html
notebooks=$(shell ls notebooks/0*/*.ipynb)
notebooks_html:=$(subst .ipynb,.html,$(notebooks))

%.html: %.ipynb
jupyter nbconvert $(NBCONVERT_PARAMS) --to html $<

nbconvert: $(notebooks_html)

nbconvert-execute: NBCONVERT_PARAMS=--execute
nbconvert-execute: $(notebooks_html)

python_env:
python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt

#################################################################################
# PROJECT RULES #
#################################################################################


## Make Dataset
.PHONY: data
data: requirements cloudflare toplist qlog interop interop-servers pyasn
$(PYTHON_INTERPRETER) instant_ack/dataset.py
clean_cloudflare:
$(PYTHON_INTERPRETER) instant_ack/dataset.py clean-cloudflare

cloudflare:
$(PYTHON_INTERPRETER) instant_ack/dataset.py cloudflare

toplist:
$(PYTHON_INTERPRETER) instant_ack/dataset.py toplist --refresh

clean_toplist:
$(PYTHON_INTERPRETER) instant_ack/dataset.py clean-toplist

interop:
$(PYTHON_INTERPRETER) instant_ack/dataset.py interop

clean_interop:
$(PYTHON_INTERPRETER) instant_ack/dataset.py clean-interop

interop-servers:
$(PYTHON_INTERPRETER) instant_ack/dataset.py interop-servers --refresh

clean_interop-servers:
$(PYTHON_INTERPRETER) instant_ack/dataset.py clean-interop-servers

pyasn:
pyasn_util_convert.py --single data/raw/rib.20240807.0600.bz2 data/interim/rib.20240807.0600.pyasn

## Extract and transform qlog in raw data
MAKEFILE_FOR_EXTRACTION:=$(CURDIR)/data/raw/Makefile

INTEROP_FOLDERS:=$(shell ls --quoting-style=shell-always -d ~+/data/raw/interop-runner/*/logs_delay=*)
INTEROP_FOLDERS_SELECTION:=$(shell ls --quoting-style=shell-always -d ~+/data/raw/interop-runner/*/logs_delay=*$(SELECTION)*)
qlog:
-rm interop.jl
parallel -j3 --bar --joblog interop.jl 'cd {} && make -j 6 -f $(MAKEFILE_FOR_EXTRACTION) qlog_extraction' ::: $(INTEROP_FOLDERS)

qlog_selection:
-rm interop.jl
parallel -j3 --bar --joblog interop.jl 'cd {} && make -j 6 -f $(MAKEFILE_FOR_EXTRACTION) qlog_extraction' ::: $(INTEROP_FOLDERS_SELECTION)

clean_qlog:
-rm interop.jl
parallel -j20 --bar --joblog interop.jl 'cd {} && make -f $(MAKEFILE_FOR_EXTRACTION) clean_qlog_extraction' ::: $(INTEROP_FOLDERS)

clean_qlog_selection:
-rm interop.jl
parallel -j20 --bar --joblog interop.jl 'cd {} && make -f $(MAKEFILE_FOR_EXTRACTION) clean_qlog_extraction' ::: $(INTEROP_FOLDERS_SELECTION)


#################################################################################
# Self Documenting Commands #
#################################################################################

.DEFAULT_GOAL := help

define PRINT_HELP_PYSCRIPT
import re, sys; \
lines = '\n'.join([line for line in sys.stdin]); \
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
print('Available rules:\n'); \
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
endef
export PRINT_HELP_PYSCRIPT

help:
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
84 changes: 84 additions & 0 deletions 05-instant-ack-ccds/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# instant-ack

<a target="_blank" href="https://cookiecutter-data-science.drivendata.org/">
<img src="https://img.shields.io/badge/CCDS-Project%20template-328F97?logo=cookiecutter" />
</a>

Analysis of instant ACK in QUIC

## Project Organization

```
├── Makefile <- Makefile with convenience commands like `make data`. See section below.
├── README.md <- The top-level README for developers using this project.
├── data
│ ├── interim <- Data that has been transformed into parquet files.
│ └── raw <- The original data and slight preprocessed data.
├── notebooks <- Jupyter notebooks. Structurd by topic.
│ ├── 01-numerical_analysis/ <- Numerical analysis.
│ ├── 02-interop-runner/ <- Analysis of QIR emulations, i.e., TTFB, First PTO improvement, # of RTT samples.
│ ├── 03-toplist/ <- Analysis of QScanner connections to Tranco Top 1M.
│ ├── 04-cloudflare/ <- Analysis of Cloudflare hosted otherwise unused domains.
│ └── 05-all-interop-servers/ <- Analysis of ACK delay in QUIC server implementations of public QIR.
├── pyproject.toml <- Project configuration file with package metadata for instant_ack
│ and configuration for tools like black
├── reports
│ └── figures <- Generated graphics and figures to be used in reporting
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
│ generated with `pip freeze > requirements.txt`
├── setup.cfg <- Configuration file for flake8
└── instant_ack <- Source code for use in this project.
├── __init__.py <- Makes instant_ack a Python module
├── data/ <- Scripts to preprocess, load, store and validate data.
├── dataset.py <- Scripts to transform raw data into parquet files.
└── visualization/ <- Scripts to create exploratory and results oriented visualizations
```

--------

## Requirements
```
python3.10
tshark 4.0.15
```

## Makefile
> [!IMPORTANT]
> Make sure you downloaded the required data from: [https://doi.org/10.25532/OPARA-615](https://doi.org/10.25532/OPARA-615).
> Use `tar -xvf $FILENAME` and for compressed files: `tar -xvzf $FILENAME`
The Makefile is used for convenience, the following commands are available:
```
# Python env
make python_env # Example of python env creation, use source .venv/bin/activate to activate the python environment.
# Data preprocessing
make cloudflare # Preprocess Cloudflare hosted domain interactions.
# -> requires raw-cloudflare.tar (140 GB) extracted into data/raw
make toplist # Preprocess data from Tranco Top 1M QUIC connection attempts.
# -> requires raw-toplist.tar (182 GB) extracted into data/raw
make interop # Preprocess data from QIR emulations. (Run make qlog before)
# -> requires raw-interop-runner.tar.gz (200 GB) extracted into data/raw
make interop-servers # Preprocess data from public QIR.
make qlog # Preprocess qlog files.
# -> requires raw-interop-runner.tar (200 GB) extracted into data/raw
make pyasn # Preprocess RIB dump
# -> requires rib.20240807.0600.bz2 (98 MB) extracted into data/raw
make data # Wrapper for all above.
# Reproducing jupyter notebook output
# -> requires either extraction of interim-qlog.tar (572 MB), interim-cloudflare.tar (43 GB), all-interop-servers.pq.zst (300 KB)
# and download of rib.20240807.0600.pyasn (22 MB), toplist.pq.zst(3 GB) into data/interim
or data preprocessing as described above
make nbconvert # Convert jupyter notebooks to HTML.
make nbconvert-execute # Convert jupyter notebooks to HTML but run them before.
```


Empty file.
Empty file.
174 changes: 174 additions & 0 deletions 05-instant-ack-ccds/data/raw/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
### PCAP processing for verification

PCAPS:=$(shell ls */*/*/sim/*.pcap | sort)
PCAPS_CSV:=$(subst .pcap,.pcap.csv,$(PCAPS))

### Existing extracted files
FIND_QLOG_EXTRACTED:=$(shell ls */*/*/client/qlog/*.qlog.extracted | sort)
### QLOG in incorrect folder
CLIENT_QLOG_OTHER_FOLDER:=$(shell ls */*/*/client/*.qlog | sort )
CLIENT_QLOG_OTHER_MOVED:=$(subst /client/,/client/qlog/,$(CLIENT_QLOG_OTHER_FOLDER))

### QLOG with different file extensions
CLIENT_SQLOG:=$(shell ls */*/*/client/qlog/*.sqlog | sort)
CLIENT_SQLOG_RENAMED:=$(patsubst %.sqlog,%.qlog,$(CLIENT_SQLOG))

### Chrome netlog to qlog
CLIENT_NETLOG=$(shell ls */*/*/client/chrome.json | sort)
CLIENT_NETLOG_CONVERTED=$(subst .json,.qlog,$(subst /client/,/client/qlog/,$(CLIENT_NETLOG)))


### All qlog files
CLIENT_QLOG:=$(shell ls */*/*/client/qlog/*.qlog | sort)
CLIENT_QLOG_EXTRACTED:=$(subst .qlog,.qlog.extracted,$(CLIENT_QLOG) $(CLIENT_QLOG_OTHER_MOVED) $(CLIENT_SQLOG_RENAMED))

SERVER=quic-go-instant-ack

echo:
echo $(PCAPS)
echo $(CLIENT_QLOG)
echo $(CLIENT_QLOG_EXTRACTED)


all: tshark_pcaps qlog_extraction

clean: clean_tshark_pcaps clean_qlog_extracted

clean_tshark_pcaps:
rm -f $(PCAPS_CSV)
clean_qlog_extraction:
rm -f $(FIND_QLOG_EXTRACTED)

debug: debug_pcaps debug_pcaps_csv debug_client_qlog debug_client_qlog_extracted debug_client_sqlog_renamed
debug_pcaps:
echo ########## PCAPS ###########
echo $(PCAPS)
debug_pcaps_csv:
echo ########## PCAPS_CSV ###########
echo $(PCAPS_CSV)
debug_client_qlog:
echo ########## QLOG ###########
echo $(CLIENT_QLOG)
debug_client_qlog_extracted:
echo ########## QLOG_EXTRACTED ###########
echo $(CLIENT_QLOG_EXTRACTED)
debug_client_sqlog_renamed:
echo ########## QLOG_RENAMED ##########
echo $(CLIENT_SQLOG_RENAMED)

qlog_extraction: netlog_to_qlog qlog_move $(CLIENT_QLOG_EXTRACTED)

qlog_move: $(CLIENT_SQLOG_RENAMED) $(CLIENT_QLOG_OTHER_MOVED)

%.qlog: %.sqlog
mv "$<" "$@"

netlog_to_qlog: $(CLIENT_NETLOG_CONVERTED)

$(CLIENT_NETLOG_CONVERTED): SOURCE=$(subst .qlog,.json,$(subst /client/qlog/,/client/,$@))
$(CLIENT_NETLOG_CONVERTED): QLOG_DIR=$(subst chrome.qlog,,$@)
$(CLIENT_NETLOG_CONVERTED):
-docker run --rm --mount "type=bind,\"source=$${PWD}/$(QLOG_DIR)\",destination=/data" --mount "type=bind,\"source=$${PWD}/$(SOURCE)\",destination=/data/netlog.netlog" netlog-to-qlog

$(CLIENT_QLOG_OTHER_MOVED):
mv "$(subst /client/qlog/,/client/,$@)" "$@"

tshark_pcaps: $(PCAPS_CSV)

# convert to csv with tshark
%.pcap.csv: keyfile=$(subst sim/trace_node_right.pcap,client/keys.log,$(subst sim/trace_node_left.pcap,server/keys.log,$<))
%.pcap.csv: %.pcap
tshark -2 -d udp.port==443,quic -Y "quic" -r "$<" -T fields \
-o tls.keylog_file:$(keyfile)\
-e _ws.col.Time -t ud \
-e ip.src -e ip.dst \
-e ipv6.src -e ipv6.dst \
-e ip.len \
-e ipv6.plen \
-e udp.srcport -e udp.dstport \
-e quic.long.packet_type \
-e quic.long.packet_type_v2 \
-e quic.header_form \
-e quic.scid -e quic.dcid \
-e quic.packet_number \
-e quic.frame \
-e quic.ack.largest_acknowledged \
-e quic.ack.ack_delay \
-e quic.version -e quic.token_length \
-e quic.frame_type -e tls.handshake.type -e tls.handshake.extensions_server_name \
-e quic.retry_token \
-e quic.decryption_failed \
-e _ws.expert \
-E separator=\| > "$@.tmp"
mv "$@.tmp" "$@"
rm -f "$@.tmp"

# Filter for specific events
EVENT_FILTER=.name == "recovery:metric_update" or .name == "recovery:metrics_updated" or .name == "transport:packet_sent" or .name == "transport:packet_received" or .name == "recovery:congestion_state_updated" or .name == "transport:parameters_set" or .name == "security:key_updated" or .name == "security:key_discarded" or .name == "recovery:loss_timer_updated" or .name == "recovery:ecn_state_updated" or .name == "connectivity:spin_bit_updated"
# flatten frame information
FRAME_INFO=["frame_type", "length", "id", "offset", "ack_delay", "reset_token", "connection_id", "retire_prior_to", "sequence_number", "acked_ranges", "fin", "stream_id", "maximum", "raw_error_code", "error_code", "error_space", "reason", "token"]
REMAP_FRAMES=map(del(.data.frames) +(.data.frames as $$frames| $(FRAME_INFO) as $$info | reduce ($$info|.[]) as $$k ({}; . +{"frame_\($$k)": [($$frames|.[]?[$$k]|tostring|sub("null"; ""; "g"))] | join(",")})))
# Flatten remaining information
FLATTEN_REMAINDER=map([leaf_paths as $$path | { "key": $$path | join("_"), "value": getpath($$path)}] | from_entries)

$(SERVER)_quiche/%.qlog.extracted: $(SERVER)_quiche/%.qlog
cat "$<" | jq --slurp '[{meta: .[0], events: .[1:]}] | map(.events[] + del(.events)) | map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2

# Same Qlog format
$(SERVER)_quic-go/%.qlog.extracted: $(SERVER)_quic-go/%.qlog
cat "$<" | jq --slurp '.' |jq 'map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2
$(SERVER)_go-x-net/%.qlog.extracted: $(SERVER)_go-x-net/%.qlog
cat "$<" | jq --slurp '.' |jq 'map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2

# Same Qlog format
$(SERVER)_neqo/%.qlog.extracted: $(SERVER)_neqo/%.qlog
cat "$<" | jq '.traces | to_entries | map_values(.value + {index: .key}) | map(.events[] + [del(.events)]) | map(.) | map({time: .[0], name: (.[1]+":"+.[2]), data: .[3]} + .[4])| map(.data.header.packet_type=.data.packet_type | del(.data.packet_type)) | map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2
$(SERVER)_picoquic/%.qlog.extracted: $(SERVER)_picoquic/%.qlog
cat "$<" | jq '.traces | to_entries | map_values(.value + {index: .key}) | map(.events[] + [del(.events)]) | map(.) | map({time: .[0], name: (.[1]+":"+.[2]), data: .[3]} + .[4])| map(.data.header.packet_type=.data.packet_type | del(.data.packet_type)) | map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2
$(SERVER)_mvfst/%.qlog.extracted: $(SERVER)_mvfst/%.qlog
cat "$<" | jq '.traces | to_entries | map_values(.value + {index: .key}) | map(.events[] + [del(.events)]) | map(.) | map({time: .[0], name: (.[1]+":"+.[2]), data: .[3]} + .[4])| map(.data.header.packet_type=.data.packet_type | del(.data.packet_type)) | map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2
$(SERVER)_chrome/%.qlog.extracted: $(SERVER)_chrome/%.qlog
cat "$<" | jq '.traces | to_entries | map_values(.value + {index: .key}) | map(.events[] + [del(.events)]) | map(.) | map({time: .[0], name: (.[1]+":"+.[2]), data: .[3]} + .[4])| map(select($(EVENT_FILTER))) | map(.data.frames[]?.acked_ranges[]?[]? |= tonumber )| $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2

$(SERVER)_ngtcp2/%.qlog.extracted: $(SERVER)_ngtcp2/%.qlog
cat "$<" | jq --slurp '[{meta: .[0], events: .[1:]}] | map(.events[] + (.meta)) | map(.trace + del(.trace)) | map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2

# aioquic
%.qlog.extracted: %.qlog
cat "$<" | jq '.traces | to_entries | map_values(.value + {index: .key}) | map(.events[] + del(.events)) | map(select($(EVENT_FILTER))) | $(REMAP_FRAMES) | $(FLATTEN_REMAINDER)' -c > $@.temp ;
add-pto-info -i $@.temp -o $@.temp2
# Fix edge case where uint64 maximum value is interpreted as float64
sed 's/18446744073709552000/18446744073709551615/g' $@.temp2 > $@
rm -f $@.temp $@.temp2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
CLIENT_HANDSHAKE_TRAFFIC_SECRET fb6d359d0d184731468456dd11de2f3fb5ddc673277bec8c431853cd1b28b202 bbe47bea5c9c8109bdafcea8e1234e31d95dca47cad5005a7d5acff7cc643fcf25ee2fa22addf8dbb23ee5121f5787af
SERVER_HANDSHAKE_TRAFFIC_SECRET fb6d359d0d184731468456dd11de2f3fb5ddc673277bec8c431853cd1b28b202 d4272c0318eec4fa7993701c341bb8f92eb8f14d4b2e5fb6d0eca3fa7efdcfaaa3501a308ff72397bbbfe98e9a074dc6
CLIENT_TRAFFIC_SECRET_0 fb6d359d0d184731468456dd11de2f3fb5ddc673277bec8c431853cd1b28b202 654a732ca7db8b886e66f67e0ffaa2f8197963db332f1824bfe2b306f30c53d02f7bcac9013a39d7b47422064a9c7be5
SERVER_TRAFFIC_SECRET_0 fb6d359d0d184731468456dd11de2f3fb5ddc673277bec8c431853cd1b28b202 a591e96c577204389c42cdfb4983770b59e271608384a5623e337309a24cb366fd26b538d0795729b467480059d0e1a1
Loading

0 comments on commit 00be25a

Please sign in to comment.