diff --git a/filter_reports/analyze_PF_report.ipynb b/filter_reports/analyze_PF_report.ipynb index 50735f3..607c151 100644 --- a/filter_reports/analyze_PF_report.ipynb +++ b/filter_reports/analyze_PF_report.ipynb @@ -12,9 +12,7 @@ "import os\n", "\n", "# Load the CSV file into a DataFrame\n", - "file_path = \"nov9_fn_run_PF_single_theta_dual_radio_NN.csv\"\n", - "output = f\"output/{file_path}/\"\n", - "os.makedirs(output, exist_ok=True)\n", + "file_path = \"nov30_fn_run_PF_single_theta_dual_radio_NN.csv\"\n", "data = pd.read_csv(file_path)\n", "\n", "\n", @@ -24,106 +22,76 @@ "# Plot the heatmap\n", "checkpoint_fns = data[\"checkpoint_fn\"].unique() if \"checkpoint_fn\" in data else [None]\n", "for checkpoint_fn in checkpoint_fns:\n", - " for theta_err in data[\"theta_err\"].unique():\n", - " fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", - " for movement, ax in [(\"bounce\", axs[0]), (\"circle\", axs[1])]:\n", - " df = data[(data[\"movement\"] == movement) & (data[\"theta_err\"] == theta_err)]\n", - " if checkpoint_fn is not None:\n", - " df = df[df[\"checkpoint_fn\"] == checkpoint_fn]\n", - " df = df.copy()\n", - "\n", - " # Convert 'N' to integer for sorting\n", - " df[\"N\"] = df[\"N\"].astype(int)\n", - "\n", - " # Sort by 'N' as integers\n", - " df = df.sort_values(by=\"N\")\n", - "\n", - " # Convert 'N' back to string and set as a categorical type with ordered categories\n", - " df[\"N\"] = df[\"N\"].astype(str)\n", - " df[\"N\"] = pd.Categorical(\n", - " df[\"N\"], categories=sorted(df[\"N\"].unique(), key=int), ordered=True\n", - " )\n", - "\n", - " df[\"theta_dot_err\"] = df[\"theta_dot_err\"].astype(str)\n", - "\n", - " # Average other fields over 'mse_craft_theta' with observed=True to avoid the warning\n", - " heatmap_data = (\n", - " df.groupby([\"N\", \"theta_dot_err\"], observed=True)\n", - " .agg({target_value: \"mean\"})\n", - " .reset_index()\n", - " )\n", - "\n", - " # Pivot the data for the heatmap\n", - " heatmap_pivot = heatmap_data.pivot(\n", - " index=\"theta_dot_err\", columns=\"N\", values=target_value\n", - " )\n", - "\n", - " sns.heatmap(\n", - " heatmap_pivot,\n", - " annot=True,\n", - " fmt=\".2f\",\n", - " cmap=\"YlGnBu\",\n", - " cbar_kws={\"label\": f\"Mean {target_value}\"},\n", - " ax=ax,\n", - " )\n", - " checkpoint_fn_name = (\n", - " os.path.basename(os.path.dirname(checkpoint_fn))\n", - " if checkpoint_fn is not None\n", - " else \"no_NN\"\n", - " )\n", - " ax.set_title(f\"{checkpoint_fn_name} {movement} theta_err:{theta_err}\")\n", - " ax.set_xlabel(\"N\")\n", - " ax.set_ylabel(\"theta_dot_err\")\n", - " fig.set_tight_layout(True)\n", - " fig.savefig(\n", - " f\"{output}/{file_path}_{checkpoint_fn_name}_{movement}_theta_err{theta_err}.png\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "file_path = \"report_nov6_fn_run_EKF_single_theta_dual_radio.csv\"\n", - "data = pd.read_csv(file_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data" + " output = f\"output/{file_path}/{os.path.basename(os.path.dirname(checkpoint_fn))}/\"\n", + " os.makedirs(output, exist_ok=True)\n", + " for segmentation_version in data[\"segmentation_version\"].unique():\n", + " for theta_err in data[\"theta_err\"].unique():\n", + " for frequency in data[\"frequency\"].unique():\n", + " fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", + " for movement, ax in [(\"bounce\", axs[0]), (\"circle\", axs[1])]:\n", + " df = data[\n", + " (data[\"movement\"] == movement)\n", + " & (data[\"theta_err\"] == theta_err)\n", + " & (data[\"frequency\"] == frequency)\n", + " & (data[\"segmentation_version\"] == segmentation_version)\n", + " ]\n", + " if len(df) == 0:\n", + " continue\n", + " if checkpoint_fn is not None:\n", + " df = df[df[\"checkpoint_fn\"] == checkpoint_fn]\n", + " df = df.copy()\n", + "\n", + " # Convert 'N' to integer for sorting\n", + " df[\"N\"] = df[\"N\"].astype(int)\n", + "\n", + " # Sort by 'N' as integers\n", + " df = df.sort_values(by=\"N\")\n", + "\n", + " # Convert 'N' back to string and set as a categorical type with ordered categories\n", + " df[\"N\"] = df[\"N\"].astype(str)\n", + " df[\"N\"] = pd.Categorical(\n", + " df[\"N\"],\n", + " categories=sorted(df[\"N\"].unique(), key=int),\n", + " ordered=True,\n", + " )\n", + "\n", + " df[\"theta_dot_err\"] = df[\"theta_dot_err\"].astype(str)\n", + "\n", + " # Average other fields over 'mse_craft_theta' with observed=True to avoid the warning\n", + " heatmap_data = (\n", + " df.groupby([\"N\", \"theta_dot_err\"], observed=True)\n", + " .agg({target_value: \"mean\"})\n", + " .reset_index()\n", + " )\n", + "\n", + " # Pivot the data for the heatmap\n", + " heatmap_pivot = heatmap_data.pivot(\n", + " index=\"theta_dot_err\", columns=\"N\", values=target_value\n", + " )\n", + "\n", + " sns.heatmap(\n", + " heatmap_pivot,\n", + " annot=True,\n", + " fmt=\".2f\",\n", + " cmap=\"YlGnBu\",\n", + " cbar_kws={\"label\": f\"Mean {target_value}\"},\n", + " ax=ax,\n", + " )\n", + " checkpoint_fn_name = (\n", + " os.path.basename(os.path.dirname(checkpoint_fn))\n", + " if checkpoint_fn is not None\n", + " else \"no_NN\"\n", + " )\n", + " ax.set_title(\n", + " f\"{checkpoint_fn_name} {movement} theta_err:{theta_err}\"\n", + " )\n", + " ax.set_xlabel(\"N\")\n", + " ax.set_ylabel(\"theta_dot_err\")\n", + " fig.set_tight_layout(True)\n", + " output_fn = f\"{output}/{segmentation_version:0.3f}/{frequency:0.4e}/{movement}_theta_err{theta_err}.png\"\n", + " os.makedirs(os.path.dirname(output_fn), exist_ok=True)\n", + " fig.savefig(output_fn)\n", + " plt.close(fig)" ] }, { @@ -137,7 +105,7 @@ "import pandas as pd\n", "\n", "# Load the CSV file into a DataFrame\n", - "file_path = \"nov9_fn_run_EKF_single_theta_dual_radio.csv\"\n", + "file_path = \"nov27_fn_run_EKF_single_theta_dual_radio.csv\"\n", "\n", "output = f\"output/{file_path}/\"\n", "os.makedirs(output, exist_ok=True)\n", @@ -145,61 +113,68 @@ "\n", "# Plot the heatmap\n", "for p in data[\"p\"].unique():\n", - " fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", - " for movement, ax in [(\"bounce\", axs[0]), (\"circle\", axs[1])]:\n", - " df = data[\n", - " (data[\"movement\"] == movement)\n", - " & (data[\"p\"] == p)\n", - " & (data[\"dynamic_R\"] == 0.0)\n", - " ].copy()\n", - "\n", - " # Convert 'N' to integer for sorting\n", - " df[\"noise_std\"] = df[\"noise_std\"].astype(float)\n", - " df[\"phi_std\"] = df[\"phi_std\"].astype(float)\n", - "\n", - " # Sort by 'N' as integers\n", - " df = df.sort_values(by=\"noise_std\")\n", - " df = df.sort_values(by=\"phi_std\")\n", - "\n", - " # Convert 'N' back to string and set as a categorical type with ordered categories\n", - " df[\"noise_std\"] = df[\"noise_std\"].astype(str)\n", - " df[\"noise_std\"] = pd.Categorical(\n", - " df[\"noise_std\"],\n", - " categories=sorted(df[\"noise_std\"].unique(), key=float),\n", - " ordered=True,\n", - " )\n", - "\n", - " df[\"phi_std\"] = pd.Categorical(\n", - " df[\"phi_std\"],\n", - " categories=sorted(df[\"phi_std\"].unique(), key=float),\n", - " ordered=True,\n", - " )\n", - "\n", - " # Average other fields over 'mse_craft_theta' with observed=True to avoid the warning\n", - " heatmap_data = (\n", - " df.groupby([\"noise_std\", \"phi_std\"], observed=True)\n", - " .agg({\"mse_craft_theta\": \"mean\"})\n", - " .reset_index()\n", - " )\n", - "\n", - " # Pivot the data for the heatmap\n", - " heatmap_pivot = heatmap_data.pivot(\n", - " index=\"phi_std\", columns=\"noise_std\", values=\"mse_craft_theta\"\n", - " )\n", - "\n", - " sns.heatmap(\n", - " heatmap_pivot,\n", - " annot=True,\n", - " fmt=\".2f\",\n", - " cmap=\"YlGnBu\",\n", - " cbar_kws={\"label\": \"Mean mse_craft_theta\"},\n", - " ax=ax,\n", - " )\n", - " ax.set_title(f\"{movement} p:{p}\")\n", - " ax.set_xlabel(\"noise_std\")\n", - " ax.set_ylabel(\"phi_std\")\n", - " fig.set_tight_layout(True)\n", - " fig.savefig(f\"{output}/{movement}_p{p}.png\")" + " for segmentation_version in data[\"segmentation_version\"].unique():\n", + " for frequency in data[\"frequency\"].unique():\n", + " fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", + " for movement, ax in [(\"bounce\", axs[0]), (\"circle\", axs[1])]:\n", + " df = data[\n", + " (data[\"movement\"] == movement)\n", + " & (data[\"p\"] == p)\n", + " & (data[\"dynamic_R\"] == 0.0)\n", + " & (data[\"frequency\"] == frequency)\n", + " & (data[\"segmentation_version\"] == segmentation_version)\n", + " ].copy()\n", + " if len(df) == 0:\n", + " continue\n", + "\n", + " # Convert 'N' to integer for sorting\n", + " df[\"noise_std\"] = df[\"noise_std\"].astype(float)\n", + " df[\"phi_std\"] = df[\"phi_std\"].astype(float)\n", + "\n", + " # Sort by 'N' as integers\n", + " df = df.sort_values(by=\"noise_std\")\n", + " df = df.sort_values(by=\"phi_std\")\n", + "\n", + " # Convert 'N' back to string and set as a categorical type with ordered categories\n", + " df[\"noise_std\"] = df[\"noise_std\"].astype(str)\n", + " df[\"noise_std\"] = pd.Categorical(\n", + " df[\"noise_std\"],\n", + " categories=sorted(df[\"noise_std\"].unique(), key=float),\n", + " ordered=True,\n", + " )\n", + "\n", + " df[\"phi_std\"] = pd.Categorical(\n", + " df[\"phi_std\"],\n", + " categories=sorted(df[\"phi_std\"].unique(), key=float),\n", + " ordered=True,\n", + " )\n", + "\n", + " # Average other fields over 'mse_craft_theta' with observed=True to avoid the warning\n", + " heatmap_data = (\n", + " df.groupby([\"noise_std\", \"phi_std\"], observed=True)\n", + " .agg({\"mse_craft_theta\": \"mean\"})\n", + " .reset_index()\n", + " )\n", + "\n", + " # Pivot the data for the heatmap\n", + " heatmap_pivot = heatmap_data.pivot(\n", + " index=\"phi_std\", columns=\"noise_std\", values=\"mse_craft_theta\"\n", + " )\n", + "\n", + " sns.heatmap(\n", + " heatmap_pivot,\n", + " annot=True,\n", + " fmt=\".2f\",\n", + " cmap=\"YlGnBu\",\n", + " cbar_kws={\"label\": \"Mean mse_craft_theta\"},\n", + " ax=ax,\n", + " )\n", + " ax.set_title(f\"{movement} p:{p}\")\n", + " ax.set_xlabel(\"noise_std\")\n", + " ax.set_ylabel(\"phi_std\")\n", + " fig.set_tight_layout(True)\n", + " fig.savefig(f\"{output}/{frequency:0.4e}_{movement}_p{p}.png\")\n", + " plt.close(fig)" ] }, { @@ -213,58 +188,70 @@ "import pandas as pd\n", "\n", "# Load the CSV file into a DataFrame\n", - "file_path = \"nov9_fn_run_PF_single_theta_dual_radio.csv\"\n", + "file_path = \"nov27_fn_run_PF_single_theta_dual_radio.csv\"\n", "data = pd.read_csv(file_path)\n", "\n", "\n", + "output = f\"output/{file_path}/\"\n", + "os.makedirs(output, exist_ok=True)\n", + "data = pd.read_csv(file_path)\n", + "\n", "target_value = \"runtime\"\n", "target_value = \"mse_craft_theta\"\n", "# Plot the heatmap\n", "for theta_err in data[\"theta_err\"].unique():\n", - " fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", - " for movement, ax in [(\"bounce\", axs[0]), (\"circle\", axs[1])]:\n", - " df = data[\n", - " (data[\"movement\"] == movement) & (data[\"theta_err\"] == theta_err)\n", - " ].copy()\n", + " for frequency in data[\"frequency\"].unique():\n", + " fig, axs = plt.subplots(1, 2, figsize=(12, 6))\n", + " for movement, ax in [(\"bounce\", axs[0]), (\"circle\", axs[1])]:\n", + " df = data[\n", + " (data[\"movement\"] == movement)\n", + " & (data[\"theta_err\"] == theta_err) * (data[\"frequency\"] == frequency)\n", + " ].copy()\n", + " if len(df) == 0:\n", + " continue\n", "\n", - " # Convert 'N' to integer for sorting\n", - " df[\"N\"] = df[\"N\"].astype(int)\n", + " # Convert 'N' to integer for sorting\n", + " df[\"N\"] = df[\"N\"].astype(int)\n", "\n", - " # Sort by 'N' as integers\n", - " df = df.sort_values(by=\"N\")\n", + " # Sort by 'N' as integers\n", + " df = df.sort_values(by=\"N\")\n", "\n", - " # Convert 'N' back to string and set as a categorical type with ordered categories\n", - " df[\"N\"] = df[\"N\"].astype(str)\n", - " df[\"N\"] = pd.Categorical(\n", - " df[\"N\"], categories=sorted(df[\"N\"].unique(), key=int), ordered=True\n", - " )\n", + " # Convert 'N' back to string and set as a categorical type with ordered categories\n", + " df[\"N\"] = df[\"N\"].astype(str)\n", + " df[\"N\"] = pd.Categorical(\n", + " df[\"N\"], categories=sorted(df[\"N\"].unique(), key=int), ordered=True\n", + " )\n", "\n", - " df[\"theta_dot_err\"] = df[\"theta_dot_err\"].astype(str)\n", + " df[\"theta_dot_err\"] = df[\"theta_dot_err\"].astype(str)\n", "\n", - " # Average other fields over 'mse_craft_theta' with observed=True to avoid the warning\n", - " heatmap_data = (\n", - " df.groupby([\"N\", \"theta_dot_err\"], observed=True)\n", - " .agg({target_value: \"mean\"})\n", - " .reset_index()\n", - " )\n", + " # Average other fields over 'mse_craft_theta' with observed=True to avoid the warning\n", + " heatmap_data = (\n", + " df.groupby([\"N\", \"theta_dot_err\"], observed=True)\n", + " .agg({target_value: \"mean\"})\n", + " .reset_index()\n", + " )\n", "\n", - " # Pivot the data for the heatmap\n", - " heatmap_pivot = heatmap_data.pivot(\n", - " index=\"theta_dot_err\", columns=\"N\", values=target_value\n", - " )\n", + " # Pivot the data for the heatmap\n", + " heatmap_pivot = heatmap_data.pivot(\n", + " index=\"theta_dot_err\", columns=\"N\", values=target_value\n", + " )\n", "\n", - " sns.heatmap(\n", - " heatmap_pivot,\n", - " annot=True,\n", - " fmt=\".2f\",\n", - " cmap=\"YlGnBu\",\n", - " cbar_kws={\"label\": f\"Mean {target_value}\"},\n", - " ax=ax,\n", + " sns.heatmap(\n", + " heatmap_pivot,\n", + " annot=True,\n", + " fmt=\".2f\",\n", + " cmap=\"YlGnBu\",\n", + " cbar_kws={\"label\": f\"Mean {target_value}\"},\n", + " ax=ax,\n", + " )\n", + " ax.set_title(f\"{movement} theta_err:{theta_err}\")\n", + " ax.set_xlabel(\"N\")\n", + " ax.set_ylabel(\"theta_dot_err\")\n", + " fig.set_tight_layout(True)\n", + " fig.savefig(\n", + " f\"{output}/{file_path}_{frequency:0.4e}_{movement}_theta_err{theta_err}.png\"\n", " )\n", - " ax.set_title(f\"{movement} theta_err:{theta_err}\")\n", - " ax.set_xlabel(\"N\")\n", - " ax.set_ylabel(\"theta_dot_err\")\n", - " fig.set_tight_layout(True)" + " plt.close(fig)" ] }, { diff --git a/latest_configs/dec1_single_config_sigma0p05_rerun_3p11.yaml b/latest_configs/dec1_single_config_sigma0p05_rerun_3p11.yaml new file mode 100644 index 0000000..bce13f4 --- /dev/null +++ b/latest_configs/dec1_single_config_sigma0p05_rerun_3p11.yaml @@ -0,0 +1,65 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.11 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p11 + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/dec1_single_config_sigma0p05_rerun_3p31.yaml b/latest_configs/dec1_single_config_sigma0p05_rerun_3p31.yaml new file mode 100644 index 0000000..722d27e --- /dev/null +++ b/latest_configs/dec1_single_config_sigma0p05_rerun_3p31.yaml @@ -0,0 +1,65 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache_3p31/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.31 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p31 + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/dec1_single_config_sigma0p05_rerun_3p3_signal.yaml b/latest_configs/dec1_single_config_sigma0p05_rerun_3p3_signal.yaml new file mode 100644 index 0000000..c29b6ff --- /dev/null +++ b/latest_configs/dec1_single_config_sigma0p05_rerun_3p3_signal.yaml @@ -0,0 +1,68 @@ +datasets: + batch_size: 64 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache_3p3/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train_fullpath.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val_fullpath.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 8 + segmentation_version: 3.3 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + signal_matrix_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer + signal_matrix_net: {} + windows_stats_net: {} +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p3 + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/dec1_single_config_sigma0p05_rerun_3p3_windows.yaml b/latest_configs/dec1_single_config_sigma0p05_rerun_3p3_windows.yaml new file mode 100644 index 0000000..64aabc3 --- /dev/null +++ b/latest_configs/dec1_single_config_sigma0p05_rerun_3p3_windows.yaml @@ -0,0 +1,67 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache_3p3/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train_fullpath.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val_fullpath.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 8 + segmentation_version: 3.3 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + signal_matrix_input: false + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer + windows_stats_net: {} +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p3 + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/nov28_paired_sigma0p05_rerun.yaml b/latest_configs/nov28_paired_sigma0p05_rerun.yaml new file mode 100644 index 0000000..cbefbdb --- /dev/null +++ b/latest_configs/nov28_paired_sigma0p05_rerun.yaml @@ -0,0 +1,75 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: false + double_flip: True + precompute_cache: /mnt/4tb_ssd/precompute_cache_new/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.2 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + load_single: true + name: pairedbeamformer + norm: layer + single: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + norm: layer +optim: + amp: true + checkpoint: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun/best.pth + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 60 + head_start: 0 + learning_rate: 0.0002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/paired_checkpoints_inputdo0p3_sigma0p05_rerun + resume_step: 0 + save_on: val/paired_loss + scheduler_step: 6 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/nov28_paired_sigma0p05_rerun_3p11.yaml b/latest_configs/nov28_paired_sigma0p05_rerun_3p11.yaml new file mode 100644 index 0000000..3ae43f5 --- /dev/null +++ b/latest_configs/nov28_paired_sigma0p05_rerun_3p11.yaml @@ -0,0 +1,75 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: false + double_flip: True + precompute_cache: /mnt/4tb_ssd/precompute_cache/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.11 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + load_single: true + name: pairedbeamformer + norm: layer + single: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + norm: layer +optim: + amp: true + checkpoint: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p11/best.pth + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 60 + head_start: 0 + learning_rate: 0.0002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/paired_checkpoints_inputdo0p3_sigma0p05_rerun_3p11 + resume_step: 0 + save_on: val/paired_loss + scheduler_step: 6 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/nov28_paired_sigma0p05_rerun_3p3.yaml b/latest_configs/nov28_paired_sigma0p05_rerun_3p3.yaml new file mode 100644 index 0000000..139f661 --- /dev/null +++ b/latest_configs/nov28_paired_sigma0p05_rerun_3p3.yaml @@ -0,0 +1,75 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: false + double_flip: True + precompute_cache: /mnt/4tb_ssd/precompute_cache_3p3/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.3 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + load_single: true + name: pairedbeamformer + norm: layer + single: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + norm: layer +optim: + amp: true + checkpoint: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p3/best.pth + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 60 + head_start: 0 + learning_rate: 0.0002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/paired_checkpoints_inputdo0p3_sigma0p05_rerun_3p3 + resume_step: 0 + save_on: val/paired_loss + scheduler_step: 6 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/nov28_single_config_sigma0p05_rerun.yaml b/latest_configs/nov28_single_config_sigma0p05_rerun.yaml new file mode 100644 index 0000000..b60c640 --- /dev/null +++ b/latest_configs/nov28_single_config_sigma0p05_rerun.yaml @@ -0,0 +1,65 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache_new/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.2 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/nov28_single_config_sigma0p05_rerun_3p11.yaml b/latest_configs/nov28_single_config_sigma0p05_rerun_3p11.yaml new file mode 100644 index 0000000..bce13f4 --- /dev/null +++ b/latest_configs/nov28_single_config_sigma0p05_rerun_3p11.yaml @@ -0,0 +1,65 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.11 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p11 + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/latest_configs/nov28_single_config_sigma0p05_rerun_3p3.yaml b/latest_configs/nov28_single_config_sigma0p05_rerun_3p3.yaml new file mode 100644 index 0000000..9a74791 --- /dev/null +++ b/latest_configs/nov28_single_config_sigma0p05_rerun_3p3.yaml @@ -0,0 +1,65 @@ +datasets: + batch_size: 256 + empirical_data_fn: /home/mouse9911/gits/spf/empirical_dists/full.pkl + empirical_individual_radio: false + empirical_symmetry: true + flip: true + double_flip: false + precompute_cache: /mnt/4tb_ssd/precompute_cache_3p3/ + scatter: continuous + scatter_k: 21 + shuffle: true + sigma: 0.05 + skip_qc: true + snapshots_adjacent_stride: 1 + train_snapshots_per_session: 1 + val_snapshots_per_session: 1 + random_snapshot_size: False + snapshots_stride: 1 + train_paths: + - /mnt/4tb_ssd/nosig_data/train.txt + val_paths: + - /mnt/4tb_ssd/nosig_data/val.txt + val_holdout_fraction: 0.2 + val_subsample_fraction: 0.2 + workers: 20 + segmentation_version: 3.3 +global: + beamformer_input: true + empirical_input: true + n_radios: 2 + nthetas: 65 + phase_input: true + rx_spacing_input: true + seed: 10 +logger: + log_every: 100 + name: wandb + plot_every: 15000 + project: 2024_nov22_single_paired_multi +model: + block: true + bn: true + depth: 4 + detach: true + dropout: 0.0 + hidden: 1024 + input_dropout: 0.3 + name: beamformer + norm: layer +optim: + amp: true + checkpoint_every: 5000 + device: cuda + direct_loss: false + dtype: torch.float32 + epochs: 30 + head_start: 0 + learning_rate: 0.002 + loss: mse + output: /home/mouse9911/gits/spf/nov28_checkpoints/single_checkpoints_inputdo0p3_sigma0p05_rerun_3p3 + resume_step: 0 + save_on: val/single_loss + scheduler_step: 7 + val_every: 10000 + weight_decay: 0.0 diff --git a/spf/data_collector.py b/spf/data_collector.py index 4b5a09f..dd594a4 100644 --- a/spf/data_collector.py +++ b/spf/data_collector.py @@ -167,7 +167,12 @@ def read_forever(self): idx = 0 while self.run: start_time = time.time() - data = self.get_data() + try: + data = self.get_data() + except Exception as e: + logging.error(f"Failed to read data , aborting {e}") + self.run = False + continue put_on_queue = False while self.run and not put_on_queue: try: @@ -412,21 +417,27 @@ def setup_record_matrix(self): def write_to_record_matrix(self, thread_idx, record_idx, read_thread: ThreadedRX): raise NotImplementedError - def run_collector_thread(self): - logging.info("Collector thread is running!") - # https://stackoverflow.com/questions/48263704/threadpoolexecutor-how-to-limit-the-queue-maxsize + def run_inner_collector_thread(self): with ThreadPoolExecutorWithQueueSizeLimit( max_workers=6, maxsize=12 ) as executor: for record_index in tqdm(range(self.yaml_config["n-records-per-receiver"])): for read_thread_idx, read_thread in enumerate(self.read_threads): data = read_thread.read_q.get() + if data is None: + return executor.submit( self.write_to_record_matrix, read_thread_idx, record_idx=record_index, data=data, ) + return + + def run_collector_thread(self): + logging.info("Collector thread is running!") + # https://stackoverflow.com/questions/48263704/threadpoolexecutor-how-to-limit-the-queue-maxsize + self.run_inner_collector_thread() # read_thread.read_q.shutdown() # py 3.13 logging.info("Collector thread is exiting!") self.finished_collecting = True diff --git a/spf/dataset/precompute_3p3_to_3p31.py b/spf/dataset/precompute_3p3_to_3p31.py new file mode 100644 index 0000000..e143d7c --- /dev/null +++ b/spf/dataset/precompute_3p3_to_3p31.py @@ -0,0 +1,59 @@ +import argparse +import concurrent +import pickle + +import numpy as np +import torch +from tqdm import tqdm + +# from spf.dataset.spf_dataset import v5spfdataset +from spf.utils import zarr_open_from_lmdb_store + + +def process_yarr(yarr_fn): + precomputed_zarr = zarr_open_from_lmdb_store(yarr_fn, mode="rw", map_size=2**32) + + precomputed_pkl = pickle.load(open(yarr_fn.replace(".yarr", ".pkl"), "rb")) + + for r_idx in range(2): + mean_phase = np.hstack( + [ + ( + torch.tensor( + [x["mean"] for x in result["simple_segmentation"]] + ).mean() + if len(result) > 0 + else torch.tensor(float("nan")) + ) + for result in precomputed_pkl["segmentation_by_receiver"][f"r{r_idx}"] + ] + ) + # TODO THIS SHOULD BE FIXED!!! + mean_phase[~np.isfinite(mean_phase)] = 0 + # diff_std = nanstd(precomputed_zarr[f"r{r_idx}"]["mean_phase"][:] - mean_phase) + # print(r_idx, diff_std) + precomputed_zarr[f"r{r_idx}"]["mean_phase"][:] = mean_phase + precomputed_zarr["version"][:] = 3.31 + + +def nanstd(x): + return x[np.isfinite(x)].std() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "-y", "--yarrs", type=str, help="yarrs", nargs="+", required=True + ) + parser.add_argument("--debug", default=False, action=argparse.BooleanOptionalAction) + parser.add_argument("-w", "--workers", type=int, default=16, help="n workers") + + args = parser.parse_args() + + if args.debug: + list(map(process_yarr, args.yarrs)) + else: + with concurrent.futures.ProcessPoolExecutor( + max_workers=args.workers + ) as executor: + list(tqdm(executor.map(process_yarr, args.yarrs), total=len(args.yarrs))) diff --git a/spf/rf.py b/spf/rf.py index 6d5fe7c..3aaa7a7 100644 --- a/spf/rf.py +++ b/spf/rf.py @@ -267,17 +267,21 @@ def torch_circular_mean_noweight(angles: torch.Tensor, trim: float): def mean_phase_mean(angles, weights): - if isinstance(weights, np.ndarray): - assert np.isfinite(weights).all() - elif isinstance(weights, torch.tensor): - assert weights.isfinite().all() + assert np.isfinite(weights).all() _sin_angles = np.sin(angles) * weights _cos_angles = np.cos(angles) * weights cm = np.arctan2(_sin_angles.sum(), _cos_angles.sum()) % (2 * np.pi) - return pi_norm(cm) +def torch_mean_phase_mean(angles, weights): + assert weights.isfinite().all() + _sin_angles = np.sin(angles) * weights + _cos_angles = np.cos(angles) * weights + cm = np.arctan2(_sin_angles.sum(), _cos_angles.sum()) % (2 * np.pi) + return torch_pi_norm(cm) + + def torch_circular_mean(angles: torch.Tensor, trim: float, weights=None): assert angles.ndim == 2 _sin_angles = torch.sin(angles) @@ -471,29 +475,47 @@ def drop_noise_windows(windows): return valid_windows -# 3.3 SEGMENTATION VERSION HAS THIS +# 3.11 has this def keep_signal_surrounded_by_noise(windows): valid_windows = [] for window_idx, window in enumerate(windows): if window["type"] == "signal": - if window["stddev"] > 0.03: - # # check if one before was signal - before_is_signal = False - if window_idx > 0 and windows[window_idx - 1]["type"] == "signal": - before_is_signal = True - # check if one after was signal - after_is_signal = False - if ( - window_idx + 1 < len(windows) - and windows[window_idx + 1]["type"] == "signal" - ): - after_is_signal = True - if before_is_signal and after_is_signal: - continue + # check if one before was signal + if window_idx > 0 and windows[window_idx - 1]["type"] == "signal": + continue + # check if one after was signal + if ( + window_idx + 1 < len(windows) + and windows[window_idx + 1]["type"] == "signal" + ): + continue valid_windows.append(window) return valid_windows +# 3.3 SEGMENTATION VERSION HAS THIS +# def keep_signal_surrounded_by_noise(windows): +# valid_windows = [] +# for window_idx, window in enumerate(windows): +# if window["type"] == "signal": +# if window["stddev"] > 0.03: +# # # check if one before was signal +# before_is_signal = False +# if window_idx > 0 and windows[window_idx - 1]["type"] == "signal": +# before_is_signal = True +# # check if one after was signal +# after_is_signal = False +# if ( +# window_idx + 1 < len(windows) +# and windows[window_idx + 1]["type"] == "signal" +# ): +# after_is_signal = True +# if before_is_signal and after_is_signal: +# continue +# valid_windows.append(window) +# return valid_windows + + # 3.2 SEGMENTATION VERSION HAD THIS # def keep_signal_surrounded_by_noise(windows): # valid_windows = [] @@ -560,6 +582,8 @@ def recompute_stats_for_windows(windows, v, pd, trim): for window in windows: _pd = pd[window["start_idx"] : window["end_idx"]] _v = v[:, window["start_idx"] : window["end_idx"]] + # TODO THIS ISNT RIGHT??? + # _v = v[window["start_idx"] : window["end_idx"]] r = get_stats_for_signal(_v, _pd, trim) window["mean"] = r[0] window["stddev"] = r[1] @@ -621,7 +645,7 @@ def simple_segment( # only keep signal windows surounded by noise candidate_windows = keep_signal_surrounded_by_noise(candidate_windows) # - candidate_windows = drop_noise_windows(candidate_windows) + # candidate_windows = drop_noise_windows(candidate_windows) simple_segmentation = recompute_stats_for_windows(candidate_windows, v, pd, trim) downsampled_segmentation_mask = compute_downsampled_segmentation_mask( diff --git a/spf/utils.py b/spf/utils.py index f782f3f..6899be3 100644 --- a/spf/utils.py +++ b/spf/utils.py @@ -13,7 +13,7 @@ from numcodecs import Blosc from torch.utils.data import BatchSampler, DistributedSampler -SEGMENTATION_VERSION = 3.3 +SEGMENTATION_VERSION = 3.4 warnings.simplefilter(action="ignore", category=FutureWarning)