Skip to content

Commit

Permalink
artifacts for chain-of-trust paper
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjanovsky committed May 1, 2024
1 parent a4aa019 commit d38b6d0
Show file tree
Hide file tree
Showing 57 changed files with 60,030 additions and 428 deletions.
320 changes: 320 additions & 0 deletions notebooks/cc/chain_of_trust_plots.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"import matplotlib\n",
"import matplotlib.dates as mdates\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"from matplotlib import lines\n",
"from sklearn import metrics\n",
"\n",
"# LaTeX plotting\n",
"matplotlib.use(\"pgf\")\n",
"sns.set_palette(\"Set2\")\n",
"sns.set_context(\"paper\")\n",
"\n",
"plt.rcParams[\"pgf.texsystem\"] = \"pdflatex\"\n",
"plt.rcParams[\"font.family\"] = \"serif\"\n",
"plt.rcParams[\"text.usetex\"] = True\n",
"plt.rcParams[\"pgf.rcfonts\"] = False\n",
"\n",
"plt.rcParams[\"axes.linewidth\"] = 0.5\n",
"plt.rcParams[\"axes.labelsize\"] = 14\n",
"\n",
"plt.rcParams[\"xtick.labelsize\"] = 12\n",
"plt.rcParams[\"xtick.bottom\"] = True\n",
"plt.rcParams[\"xtick.major.size\"] = 5\n",
"plt.rcParams[\"xtick.major.width\"] = 0.5\n",
"plt.rcParams[\"xtick.major.pad\"] = 0.1\n",
"\n",
"plt.rcParams[\"ytick.labelsize\"] = 12\n",
"plt.rcParams[\"ytick.left\"] = True\n",
"plt.rcParams[\"ytick.major.size\"] = 5\n",
"plt.rcParams[\"ytick.major.width\"] = 0.5\n",
"plt.rcParams[\"ytick.major.pad\"] = 0.1\n",
"\n",
"plt.rcParams[\"legend.title_fontsize\"] = 12\n",
"plt.rcParams[\"legend.fontsize\"] = 12\n",
"plt.rcParams[\"legend.handletextpad\"] = 0.3\n",
"plt.rcParams[\"lines.markersize\"] = 0.5\n",
"plt.rcParams[\"savefig.pad_inches\"] = 0.01\n",
"\n",
"INPUT_DIR = Path(\"./paper_artifacts/chain_of_trust/data/plots/\")\n",
"OUTPUT_DIR = Path(\"./results/figures/\")\n",
"INPUT_DIR.mkdir(exist_ok=True, parents=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Average number of transitive references over time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_to_plot = pd.read_csv(INPUT_DIR / \"avg_refs_over_time.csv\", parse_dates=[\"date\"])\n",
"df_to_plot[\"category\"] = df_to_plot[\"category\"].map(lambda x: \"others\" if x == \"others categories\" else x)\n",
"\n",
"plt.figure()\n",
"g = sns.lineplot(data=df_to_plot, x=\"date\", y=\"n_references\", hue=\"category\", errorbar=None)\n",
"plt.legend(frameon=True, handlelength=2, title=\"Product category\")\n",
"g.set_xlabel(\"\")\n",
"g.set_ylabel(\"Avg. \\# transitive refs.\")\n",
"\n",
"dtFmt = mdates.DateFormatter(\"%Y\")\n",
"g.xaxis.set_major_formatter(dtFmt)\n",
"g.set_xticks(\n",
" [\n",
" pd.to_datetime(\"1998-01-01\"),\n",
" pd.to_datetime(\"2003-01-01\"),\n",
" pd.to_datetime(\"2008-01-01\"),\n",
" pd.to_datetime(\"2013-01-01\"),\n",
" pd.to_datetime(\"2018-01-01\"),\n",
" pd.to_datetime(\"2023-01-01\"),\n",
" ]\n",
")\n",
"g.figure.set_size_inches(3.9, 3)\n",
"plt.tight_layout(pad=0.1)\n",
"g.figure.savefig(OUTPUT_DIR / \"lineplot_avg_refs.pdf\")\n",
"g.figure.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Average reach over time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_to_plot = pd.read_csv(INPUT_DIR / \"avg_reach_over_time.csv\", parse_dates=[\"date\"])\n",
"\n",
"plt.figure()\n",
"g = sns.lineplot(data=df_to_plot, x=\"date\", y=\"n_references\", hue=\"category\", errorbar=None)\n",
"plt.legend(frameon=True, handlelength=2, title=\"Product category\")\n",
"g.set_xlabel(\"\")\n",
"g.set_ylabel(\"Average certificate reach\")\n",
"dtFmt = mdates.DateFormatter(\"%Y\")\n",
"g.xaxis.set_major_formatter(dtFmt)\n",
"g.set_xticks(\n",
" [\n",
" pd.to_datetime(\"1998-01-01\"),\n",
" pd.to_datetime(\"2003-01-01\"),\n",
" pd.to_datetime(\"2008-01-01\"),\n",
" pd.to_datetime(\"2013-01-01\"),\n",
" pd.to_datetime(\"2018-01-01\"),\n",
" pd.to_datetime(\"2023-01-01\"),\n",
" ]\n",
")\n",
"\n",
"g.figure.set_size_inches(3.9, 3)\n",
"plt.tight_layout(pad=0.1)\n",
"g.figure.savefig(OUTPUT_DIR / \"lineplot_avg_reach.pdf\")\n",
"g.figure.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Area under curve"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure(figsize=(2.8, 1.8))\n",
"sns.set_palette(\"Set2\")\n",
"colors = plt.cm.Dark2(np.linspace(0, 1, 8))\n",
"\n",
"df_sent = pd.read_csv(INPUT_DIR / \"df_pred_sentence_transformers.csv\")\n",
"df_tf_idf = pd.read_csv(INPUT_DIR / \"df_pred_tf_idf.csv\")\n",
"df_baseline = pd.read_csv(INPUT_DIR / \"df_pred_baseline.csv\")\n",
"\n",
"fpr, tpr, thresholds = metrics.roc_curve(df_sent.y_true, df_sent.y_pred)\n",
"auc = metrics.roc_auc_score(df_sent.y_true, df_sent.y_pred)\n",
"plt.plot(fpr, tpr, label=f\"Sent. trans. (AUC={auc:.2f})\", color=colors[0])\n",
"\n",
"fpr, tpr, thresholds = metrics.roc_curve(df_tf_idf.y_true, df_tf_idf.y_pred)\n",
"auc = metrics.roc_auc_score(df_tf_idf.y_true, df_tf_idf.y_pred)\n",
"plt.plot(fpr, tpr, label=f\"TF-IDF (AUC={auc:.2f})\", color=colors[1])\n",
"\n",
"fpr, tpr, thresholds = metrics.roc_curve(df_baseline.y_true, df_baseline.y_pred)\n",
"auc = metrics.roc_auc_score(df_baseline.y_true, df_baseline.y_pred)\n",
"with plt.rc_context({\"legend.fontsize\": 8}):\n",
" plt.plot(fpr, tpr, label=f\"Random guess (AUC={auc:.2f})\", color=colors[2])\n",
"\n",
" plt.legend(loc=\"lower right\")\n",
" plt.savefig(OUTPUT_DIR / \"roc_auc.pdf\")\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Stack-bar plot of annotations in categories"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(INPUT_DIR / \"ref_categories_stackplot.csv\")\n",
"\n",
"ax = df.plot.barh(stacked=True, rot=0, width=0.95)\n",
"ax.set_ylim(-0.6, 2.6)\n",
"ax.set_xlabel(\"\\# references\", fontsize=12)\n",
"ax.set_yticklabels([\"Others\", \"Smartcard-related\", \"Smartcards\"])\n",
"ax.legend(title=\"Reference context\", loc=\"lower right\", frameon=True)\n",
"\n",
"plt.text(0.4, 0.8, df.iloc[2][\"Component reuse\"], transform=ax.transAxes, color=\"white\", fontsize=14)\n",
"plt.text(0.81, 0.8, df.iloc[2][\"Predecessor\"], transform=ax.transAxes, color=\"white\", fontsize=14)\n",
"\n",
"plt.axhline(y=1.21, xmin=0.05, xmax=0.18, color=\"black\", linewidth=0.75)\n",
"plt.axhline(y=0.9, xmin=0.12, xmax=0.18, color=\"black\", linewidth=0.75)\n",
"plt.text(0.2, 0.55, df.iloc[1][\"Component reuse\"], transform=ax.transAxes, color=\"black\", fontsize=14)\n",
"plt.text(0.2, 0.45, df.iloc[1][\"Predecessor\"], transform=ax.transAxes, color=\"black\", fontsize=14)\n",
"\n",
"plt.axhline(y=0.17, xmin=0.02, xmax=0.1, color=\"black\", linewidth=0.75)\n",
"plt.axhline(y=-0.1, xmin=0.05, xmax=0.1, color=\"black\", linewidth=0.75)\n",
"plt.text(0.12, 0.22, df.iloc[0][\"Component reuse\"], transform=ax.transAxes, color=\"black\", fontsize=14)\n",
"plt.text(0.12, 0.13, df.iloc[0][\"Predecessor\"], transform=ax.transAxes, color=\"black\", fontsize=14)\n",
"\n",
"ax.figure.set_size_inches(4, 3)\n",
"plt.tight_layout(pad=0.1)\n",
"plt.savefig(OUTPUT_DIR / \"stacked_barplot.pdf\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Archived certificate half-life"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"\n",
"df = pd.read_csv(INPUT_DIR / \"archived_half_life.csv\")\n",
"\n",
"with plt.rc_context({\"legend.fontsize\": 10, \"legend.title_fontsize\": 10}):\n",
" g = sns.ecdfplot(data=df.n_days, complementary=True)\n",
"\n",
" plt.axvline(x=365, color=\"r\", linestyle=\"--\", linewidth=0.75)\n",
" vertical_line = lines.Line2D(\n",
" [], [], color=\"r\", marker=\"\", linestyle=\"--\", markersize=10, markeredgewidth=1.5, label=\"One year\"\n",
" )\n",
" plt.legend(handles=[vertical_line])\n",
"\n",
" g.figure.set_size_inches(3, 2)\n",
" g.set_xlim(0, 2000)\n",
"\n",
" g.set_xlabel(\"Number of days\")\n",
" g.set_ylabel(\"Proportion\")\n",
"\n",
" g.yaxis.set_major_formatter(matplotlib.ticker.PercentFormatter(xmax=1))\n",
" g.set_yticks([0, 0.25, 0.5, 0.75, 1])\n",
"\n",
" plt.tight_layout(pad=0.05)\n",
" g.figure.savefig(OUTPUT_DIR / \"cdf_half_life.pdf\")\n",
" g.figure.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Age of referenced certificate in composite-evaluation products"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"\n",
"df = pd.read_csv(INPUT_DIR / \"ecdf_archival_data.csv\")\n",
"df = df.loc[df.scheme.isin({\"FR\", \"DE\", \"NL\"})]\n",
"\n",
"with plt.rc_context({\"legend.fontsize\": 10, \"legend.title_fontsize\": 10}):\n",
" g = sns.ecdfplot(data=df, x=\"date_diff\", hue=\"scheme\", legend=True)\n",
" plt.axvline(x=540, color=\"r\", linestyle=\"--\", linewidth=0.75)\n",
"\n",
" vertical_line = lines.Line2D([], [], color=\"r\", linestyle=\"--\", markersize=10, label=\"18 months\")\n",
" unique_hues = df[\"scheme\"].unique()\n",
" handles = [\n",
" plt.Line2D([], [], color=g.lines[color_idx].get_color(), label=label)\n",
" for color_idx, label in enumerate(unique_hues)\n",
" ]\n",
"\n",
" handles.append(vertical_line)\n",
" labels = list(unique_hues) + [\"18 months\"]\n",
"\n",
" g.legend(handles=handles, labels=labels)\n",
"\n",
" g.figure.set_size_inches(3, 2)\n",
" g.yaxis.set_major_formatter(matplotlib.ticker.PercentFormatter(xmax=1))\n",
" g.set_yticks([0, 0.25, 0.5, 0.75, 1])\n",
" g.set_xlim(0, 2000)\n",
" g.set_xlabel(\"Number of days\")\n",
" g.set_ylabel(\"Proportion\")\n",
" plt.tight_layout(pad=0.05)\n",
" g.figure.savefig(OUTPUT_DIR / \"ref_comp_age.pdf\")\n",
" plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit d38b6d0

Please sign in to comment.