From 91e4aa5b718fc814564541b013eb50bb70ba416e Mon Sep 17 00:00:00 2001
From: Eric Lipe <elipe@teamraft.com>
Date: Tue, 1 Oct 2024 14:25:34 -0400
Subject: [PATCH 01/19] - Moved docs to new dir - Added connector app to
 support restoring DBs

---
 .../db-upgrade}/cloud-foundry-db-upgrade.md   |  60 +++---
 tdrs-backend/db-upgrade/manifest.yml          |  12 ++
 .../new-cloud-foundry-db-upgrade.md           | 198 ++++++++++++++++++
 3 files changed, 241 insertions(+), 29 deletions(-)
 rename {docs/Technical-Documentation => tdrs-backend/db-upgrade}/cloud-foundry-db-upgrade.md (78%)
 create mode 100644 tdrs-backend/db-upgrade/manifest.yml
 create mode 100644 tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md
diff --git a/docs/Technical-Documentation/cloud-foundry-db-upgrade.md b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
similarity index 78%
rename from docs/Technical-Documentation/cloud-foundry-db-upgrade.md
rename to tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
index 466b562f6..6fd95eb9e 100644
--- a/docs/Technical-Documentation/cloud-foundry-db-upgrade.md
+++ b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
@@ -2,35 +2,37 @@
 
 ## Process
 
-If you are performing this process for the staging or production, you need to ensure you are performing the changes through the [HHS](https://github.com/HHS/TANF-app) repo and not the [Raft](https://github.com/raft-tech/TANF-app) repo.
+If you are performing this process for the staging or production, you need to ensure you are performing the changes through the [HHS](https://github.com/HHS/TANF-app) repo and not the [Raft](https://github.com/raft-tech/TANF-app) repo. You also need to have the postgres client binaries installed on your local machine.
 <br/>
 
-### 1. SSH into a backend app in your desired environment
-```bash
-cf ssh tdp-backend-<APP>
+### 1. Open an SSH tunnel to the service
+To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally.
 ```
-<br/>
-
-### 2. Create a backup of all the databases in the ENV's RDS instance
-Note: you can get the required field values from `VCAP_SERVICES`.
-```bash
-/home/vcap/deps/0/apt/usr/lib/postgresql/<CURRENT VERSION>/bin/pg_dump -h <HOST> -p <PORT> -d <DB_NAME> -U <DB_USER> -F c --no-acl --no-owner -f <FILE_NAME>.pg
+cf connect-to-service --no-client <APP_NAME_THAT_IS_BOUND_TO_RDS> <RDS_SERVICE_NAME>
 ```
-<br/>
-
-### 3. Copy the backup(s) to your local machine
-Note: This assumes you ran the backup command above in the home directory of the app. As an added bonus for later steps, you should execute this command from somewhere within `tdrs-backend` directory! Make sure not to commit the files/directories that are copied to your local directory.
-```bash
-cf ssh tdp-backend--<APP> -c 'tar cfz - ~/app/*.pg' | tar xfz - -C .
+You should see out put similar to:
+```
+Finding the service instance details...
+Setting up SSH tunnel...
+SSH tunnel created.
+Skipping call to client CLI. Connection information:
+
+Host: localhost
+Port: 63634
+Username: <REDACTED>
+Password: <REDACTED>
+Name: <REDACTED>
+
+Leave this terminal open while you want to use the SSH tunnel. Press Control-C to stop.
 ```
 <br/>
 
-### 4. Verify backup file size(s) match the backup size(s) in the app
-```bash
-ls -lh <PWD>/home/vcap/app
+### 2. Create a backup of the database(s) in the RDS instance
+Note: the <HOST>, <PORT>, <DB_USER>, and <PASSWORD> are the values you received from the output of the SSH tunnel. The <DB_NAME> parameter is the name of the DB you want to export, e.g `tdp_db_raft`. You will need to run this command for each DB in the instance.
 ```
-As an added verification step, you should consider restoring the backups into a local server and verifying the contents with `psql` or `pgAdmin`.
-<br/><br/>
+pg_dump -h <HOST> -p <PORT> -d <DB_NAME> -U <DB_USER> -F c --no-acl --no-owner -f <FILE_NAME>.pg
+```
+<br/>
 
 ### 5. Update the `version` key in the `json_params` item in the `database` resource in the `main.tf` file in the environment(s) you're upgrading with the new database server version
 ```yaml
@@ -57,7 +59,7 @@ Follow the instuctions in the `terraform/README.md` and proceed from there. Modi
 <br/><br/>
 
 ### 9. Bind backend to the new RDS instance to get credentials
-```bash
+```
 cf bind-service tdp-backend-<APP> tdp-db-<ENV>
 ```
 Be sure to re-stage the app when prompted
@@ -65,37 +67,37 @@ Be sure to re-stage the app when prompted
 
 ### 10. Apply the backend manifest to begin the restore process
 If you copied the backups as mentioned in the note from step 3, the backups will be copied for you to the app instance in the command below. If not, you will need to use `scp` to copy the backups to the app instance after running the command below.
-```bash
+```
 cf push tdp-backend-<APP> --no-route -f manifest.buildpack.yml -t 180 --strategy rolling
 ```
 <br/>
 
 ### 11. SSH into the app you just pushed
-```bash
+```
 cf ssh tdp-backend-<APP>
 ```
 <br/>
 
 ### 12. Create the appropriate database(s) in the new RDS server
 Note: you can get the required field values from `VCAP_SERVICES`.
-```bash
+```
 /home/vcap/deps/0/apt/usr/lib/postgresql/<NEW VERSION>/bin/createdb -U <DB_USER> -h <HOST> <DB_NAME>
 ```
 <br/>
 
 ### 13. Restore the backup(s) to the appropriate database(s)
 Note: you can get the required field values from `VCAP_SERVICES`.
-```bash
+```
 /home/vcap/deps/0/apt/usr/lib/postgresql/<NEW VERSION>/bin/pg_restore -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME> <FILE_NAME>.pg
 ```
 During this step, you may see errors similar to the message below. Note `<DB_USER>` is imputed in the message to avoid leaking environment specific usernames/roles.
-```bash
+```
 pg_restore: from TOC entry 215; 1259 17313 SEQUENCE users_user_user_permissions_id_seq <DB_USER>
 pg_restore: error: could not execute query: ERROR:  role "<DB_USER>" does not exist
 Command was: ALTER TABLE public.users_user_user_permissions_id_seq OWNER TO <DB_USER>;
 ```
 and the result and total amount of these errors should be:
-```bash
+```
 pg_restore: warning: errors ignored on restore: 68
 ```
 If this is what you see, everything is OK. This happens because the `pg_dump` doesn't remove owner associations on sequences for some reason. But you will see in the blocks above that `pg_restore` correctly alters the sequence owner to the new database user.
@@ -103,7 +105,7 @@ If this is what you see, everything is OK. This happens because the `pg_dump` do
 
 ### 14. Use `psql` to get into the database to check state
 Note: you can get the required field values from `VCAP_SERVICES`.
-```bash
+```
 /home/vcap/deps/0/apt/usr/lib/postgresql/<NEW VERSION>/bin/psql <RDS URI>
 ```
 <br/>
diff --git a/tdrs-backend/db-upgrade/manifest.yml b/tdrs-backend/db-upgrade/manifest.yml
new file mode 100644
index 000000000..33f655e96
--- /dev/null
+++ b/tdrs-backend/db-upgrade/manifest.yml
@@ -0,0 +1,12 @@
+version: 1
+applications:
+- name: db-connector
+  instances: 1
+  memory: 512M
+  disk_quota: 2G
+  env:
+    POSTGRES_PASSWORD: password
+  docker:
+    image: postgres:15.7-alpine3.20
+  services:
+    - <DB_SERVICE_TO_CONNECT_TO>
diff --git a/tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md b/tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md
new file mode 100644
index 000000000..13b794281
--- /dev/null
+++ b/tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md
@@ -0,0 +1,198 @@
+# Cloud Foundry, Cloud.gov AWS RDS Database Upgrade
+
+## Process
+
+If you are performing this process for the staging or production, you need to ensure you are performing the changes through the [HHS](https://github.com/HHS/TANF-app) repo and not the [Raft](https://github.com/raft-tech/TANF-app) repo. You also need to have the postgres client binaries installed on your local machine.
+
+### 1. Open an SSH tunnel to the service
+To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally. Keep this tunnel open in a separate terminal window until this process is complete!
+
+```
+cf connect-to-service --no-client <APP_NAME_THAT_IS_BOUND_TO_RDS> <RDS_SERVICE_NAME>
+```
+
+You should see out put similar to:
+
+```
+Finding the service instance details...
+Setting up SSH tunnel...
+SSH tunnel created.
+Skipping call to client CLI. Connection information:
+
+Host: localhost
+Port: 63634
+Username: <REDACTED>
+Password: <REDACTED>
+Name: <REDACTED>
+
+Leave this terminal open while you want to use the SSH tunnel. Press Control-C to stop.
+```
+
+### 2. Create a backup of the database(s) in the RDS instance
+In a separate terminal from your SSH tunnel terminal, generate the `pg_dump` files.
+Note: the <HOST>, <PORT>, <DB_USER>, and <PASSWORD> are the values you received from the output of the SSH tunnel. The <DB_NAME> parameter is the name of the DB you want to export, e.g `tdp_db_raft`. You will need to run this command for each DB in the instance.
+
+```
+pg_dump -h <HOST> -p <PORT> -d <DB_NAME> -U <DB_USER> -F c --no-acl --no-owner -f <FILE_NAME>.pg
+```
+
+After the command finishes, you should see <FILE_NAME>.pg in your current working directory. Do some sanity checks on this backup file to assert it makes sense. Now that we have our backup(s), we need to begin making the Terraform changes required to support the upgrade.
+<br/>
+
+### 3. Update Terraform to create a new RDS instance
+Follow the instructions in the `terraform/README.md` to get Terraform configured. Modify the `main.tf` file in the `terraform/<ENV>` to include a new RDS instance. E.g if you were updating `prod` to version 15.x you would add the following code to the `main.tf` file. We are NOT removing the existing `resource "cloudfoundry_service_instance" "database"` from the `main.tf` file. Note that the resource and the `name` of the new RDS instance are not the same as the original resource name and RDS name. This is on purpose and we will remedy this in later steps.
+
+```yaml
+resource "cloudfoundry_service_instance" "new-database" {
+  name             = "tdp-db-prod-new"
+  space            = data.cloudfoundry_space.space.id
+  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
+  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
+  recursive_delete = true
+  timeouts {
+    create = "60m"
+    update = "60m"
+    delete = "2h"
+  }
+}
+```
+After adding the new RDS resource to `main.tf`, you can follow the rest of the instructions in the `terraform/README.md` to plan and then apply this change with Terraform.
+
+### 4. Bind an app to the new RDS instance
+In the `tdrs-backend/db-upgrade` directory, open the `manifest.yml` file and update the `services` block to reference the new RDS service you just created: in the example this would be: `- tdp-db-prod-new`. Then deploy this manifest: `cf push --no-route -f manifest.yml -t 180`. Wait for the connector app to deploy. We need to deploy a temporary app to avoid too much downtime for the backend app(s) and so that we can start new SSH tunnel to the new RDS instance. You should now close the original SSH tunnel we opened in step 1.
+
+### 5. Open an SSH tunnel to the new RDS instance
+Again, in a separate terminal execute the following command and leave that terminal/connection alive until further notice.
+```
+cf connect-to-service --no-client db-connector <NEW_RDS_SERVICE_NAME>
+```
+
+### 6. Create the appropriate database(s) in the new RDS server
+Using the credentials from the new SSH tunnel, create the same DB(s) you dumped in the new RDS instance.
+```
+createdb -U <DB_USER> -h <HOST> -p <PORT> <DB_NAME>
+```
+
+### 7. Restore the backup(s) to the appropriate database(s)
+Using the credentials from the new SSH tunnel, restore the backups to the appropriate DBs.
+```
+pg_restore -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME> <FILE_NAME>.pg
+```
+
+During this step, you may see errors similar to the message below. Note `<DB_USER>` is imputed in the message to avoid leaking environment specific usernames/roles.
+
+```
+pg_restore: from TOC entry 215; 1259 17313 SEQUENCE users_user_user_permissions_id_seq <DB_USER>
+pg_restore: error: could not execute query: ERROR:  role "<DB_USER>" does not exist
+Command was: ALTER TABLE public.users_user_user_permissions_id_seq OWNER TO <DB_USER>;
+```
+
+and the result and total amount of these errors should be something like:
+
+```
+pg_restore: warning: errors ignored on restore: 68
+```
+
+If this is what you see, everything is OK. This happens because the `pg_dump` doesn't remove owner associations on sequences for some reason. But you will see in the blocks above that `pg_restore` correctly alters the sequence owner to the new database user.
+
+### 8. Use `psql` to get into the database(s) to check state
+Using the credentials from the new SSH tunnel, use the psql cli to inspect the restored DBs.
+```
+psql -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME>
+```
+<br/>
+
+### 9. Rename and Move RDS instances
+Now that we have verified that the data in our new RDS instance looks good. We need to lift and shift the backend app(s) to point to our new RDS instance as if it is the existing (now old) RDS instance.
+
+First we need to unbind the existing RDS instance from the backend app(s) so that way we can make name changes.
+```
+cf unbind service <BACKEND_APP_NAME> <OLD_RDS_SERVICE_NAME>
+```
+
+After unbinding the service we want to update the "old RDS" service `name` to something different, plan, and then apply those changes with Terraform.
+```yaml
+resource "cloudfoundry_service_instance" "database" {
+  name             = "something-that-isnt-tdp-db-prod"
+  space            = data.cloudfoundry_space.space.id
+  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
+  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
+  recursive_delete = true
+  timeouts {
+    create = "60m"
+    update = "60m"
+    delete = "2h"
+  }
+}
+```
+
+Now we can name our "new RDS" service to the expected `name`. Then we can also plan and apply those changes with Terraform
+
+```yaml
+resource "cloudfoundry_service_instance" "new-database" {
+  name             = "tdp-db-prod"
+  space            = data.cloudfoundry_space.space.id
+  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
+  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
+  recursive_delete = true
+  timeouts {
+    create = "60m"
+    update = "60m"
+    delete = "2h"
+  }
+}
+```
+
+Now we will bind the new RDS service back to the backend app(s) and restage it. Be sure to monitor the app's logs to ensure it connects to the instance.
+
+```
+cf bind service <BACKEND_APP_NAME> <RDS_SERVICE_NAME>
+```
+
+Then
+
+```
+cf restage <BACKEND_APP_NAME>
+```
+
+If the backend app is running with no issues, we can now safely remove the "old RDS" service from Terraform. Remove the entire resource block named `database` from `main.tf` re-plan and then apply the changes to remove that instance with Terraform.
+
+Finally, to get our Terraform state looking like it originally did, we want to rename our `new-database` resource back to `database`. That way we are consistent. To do so we rename the resource, and to avoid Terraform from deleting it (since `database` won't exist in the state) we want to inform Terraform that we have "moved" the resource. We do so by adding the following code to the `main.tf`. Note, when running `terraform plan ...` it will not show any infrastructure changes, only a name change. Ensure you still apply even if it looks like there are no changes!
+
+```yaml
+moved {
+  from = cloudfoundry_service_instance.new-database
+  to   = cloudfoundry_service_instance.database
+}
+```
+
+After adding the above code, re-plan and apply the changes with Terrform. Once Terraform has successfully applied the change, remove the `moved` block from `main.tf`. Re-plan with Terraform and assert it agrees that there are no changes to be made. If Terraform reports changes, you have made a mistake and need to figure out where you made the mistake.
+
+### 10. Access the re-staged app(s) and run a smoke test
+- Log in
+- Submit a few datafiles
+- Make sure new and existing submission histories populate correctly
+- Checkout the DACs data
+
+If everything looks good, there is nothing to do. If apps aren't working/connecting to the new RDS instance, you will need to debug manually and determine if/where you made a mistake.
+
+### 11. Update the `postgresql-client` version to the new version in `tdrs-backend/apt.yml`
+```yaml
+- postgresql-client-<NEW VERSION>
+```
+Note: if the underlying OS for CloudFoundry is no longer `cflinuxfs4` (code name `jammy`) you may also need to update the repo we point to for the postgres client binaries.
+
+### 12. Update the postgres container version in `tdrs-backend/docker-compose.yml`
+```yaml
+postgres:
+image: postgres:<NEW VERSION>
+```
+
+### 13. Commit and push correct changes, revert unnecessary changes.
+Commit and push the changes for:
+- `main.tf`
+- `tdrs-backend/apt.yml`
+- `tdrs-backend/docker-compose.yml`
+
+Revert the changes for:
+- `manifest.yml`

From 2c83c9601716615591b0b4ab624dba87da31a8a9 Mon Sep 17 00:00:00 2001
From: Eric Lipe <elipe@teamraft.com>
Date: Wed, 2 Oct 2024 12:06:29 -0400
Subject: [PATCH 02/19] - remove unnecessary document

---
 .../db-upgrade/cloud-foundry-db-upgrade.md    | 184 +++++++++++-----
 .../new-cloud-foundry-db-upgrade.md           | 198 ------------------
 2 files changed, 130 insertions(+), 252 deletions(-)
 delete mode 100644 tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md

diff --git a/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
index 6fd95eb9e..13b794281 100644
--- a/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
+++ b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
@@ -3,14 +3,16 @@
 ## Process
 
 If you are performing this process for the staging or production, you need to ensure you are performing the changes through the [HHS](https://github.com/HHS/TANF-app) repo and not the [Raft](https://github.com/raft-tech/TANF-app) repo. You also need to have the postgres client binaries installed on your local machine.
-<br/>
 
 ### 1. Open an SSH tunnel to the service
-To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally.
+To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally. Keep this tunnel open in a separate terminal window until this process is complete!
+
 ```
 cf connect-to-service --no-client <APP_NAME_THAT_IS_BOUND_TO_RDS> <RDS_SERVICE_NAME>
 ```
+
 You should see out put similar to:
+
 ```
 Finding the service instance details...
 Setting up SSH tunnel...
@@ -25,98 +27,172 @@ Name: <REDACTED>
 
 Leave this terminal open while you want to use the SSH tunnel. Press Control-C to stop.
 ```
-<br/>
 
 ### 2. Create a backup of the database(s) in the RDS instance
+In a separate terminal from your SSH tunnel terminal, generate the `pg_dump` files.
 Note: the <HOST>, <PORT>, <DB_USER>, and <PASSWORD> are the values you received from the output of the SSH tunnel. The <DB_NAME> parameter is the name of the DB you want to export, e.g `tdp_db_raft`. You will need to run this command for each DB in the instance.
+
 ```
 pg_dump -h <HOST> -p <PORT> -d <DB_NAME> -U <DB_USER> -F c --no-acl --no-owner -f <FILE_NAME>.pg
 ```
-<br/>
 
-### 5. Update the `version` key in the `json_params` item in the `database` resource in the `main.tf` file in the environment(s) you're upgrading with the new database server version
-```yaml
-json_params      = "{\"version\": \"<NEW VERSION>\"}"
-```
+After the command finishes, you should see <FILE_NAME>.pg in your current working directory. Do some sanity checks on this backup file to assert it makes sense. Now that we have our backup(s), we need to begin making the Terraform changes required to support the upgrade.
 <br/>
 
-### 6. Update the `postgresql-client` version to the new version in `tdrs-backend/apt.yml`
-```yaml
-- postgresql-client-<NEW VERSION>
-```
-Note: if the underlying OS for CloudFoundry is no longer `cflinuxfs4` you may also need to update the repo we point to for the postgres client binaries.
-<br/><br/>
+### 3. Update Terraform to create a new RDS instance
+Follow the instructions in the `terraform/README.md` to get Terraform configured. Modify the `main.tf` file in the `terraform/<ENV>` to include a new RDS instance. E.g if you were updating `prod` to version 15.x you would add the following code to the `main.tf` file. We are NOT removing the existing `resource "cloudfoundry_service_instance" "database"` from the `main.tf` file. Note that the resource and the `name` of the new RDS instance are not the same as the original resource name and RDS name. This is on purpose and we will remedy this in later steps.
 
-### 7. Update the postgres container version in `tdrs-backend/docker-compose.yml`
 ```yaml
-postgres:
-image: postgres:<NEW VERSION>
+resource "cloudfoundry_service_instance" "new-database" {
+  name             = "tdp-db-prod-new"
+  space            = data.cloudfoundry_space.space.id
+  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
+  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
+  recursive_delete = true
+  timeouts {
+    create = "60m"
+    update = "60m"
+    delete = "2h"
+  }
+}
 ```
-<br/>
+After adding the new RDS resource to `main.tf`, you can follow the rest of the instructions in the `terraform/README.md` to plan and then apply this change with Terraform.
 
-### 8. Update Terraform state to delete then re-create RDS instance
-Follow the instuctions in the `terraform/README.md` and proceed from there. Modify the `main.tf` file in the `terraform/<ENV>` directory to inform TF of the changes. To delete the existing RDS instance you can simply comment out the whole database `resource` in the file (even though you made changes in the steps above). TF will see that the resource is no longer there, delete it, and appropriately update it's state. Then you simply re-comment the database `resource` back in with the changes you made in previous steps. TF will create the new RDS instance with your new updates, and also update the state in S3.
-<br/><br/>
+### 4. Bind an app to the new RDS instance
+In the `tdrs-backend/db-upgrade` directory, open the `manifest.yml` file and update the `services` block to reference the new RDS service you just created: in the example this would be: `- tdp-db-prod-new`. Then deploy this manifest: `cf push --no-route -f manifest.yml -t 180`. Wait for the connector app to deploy. We need to deploy a temporary app to avoid too much downtime for the backend app(s) and so that we can start new SSH tunnel to the new RDS instance. You should now close the original SSH tunnel we opened in step 1.
 
-### 9. Bind backend to the new RDS instance to get credentials
+### 5. Open an SSH tunnel to the new RDS instance
+Again, in a separate terminal execute the following command and leave that terminal/connection alive until further notice.
 ```
-cf bind-service tdp-backend-<APP> tdp-db-<ENV>
+cf connect-to-service --no-client db-connector <NEW_RDS_SERVICE_NAME>
 ```
-Be sure to re-stage the app when prompted
-<br/><br/>
 
-### 10. Apply the backend manifest to begin the restore process
-If you copied the backups as mentioned in the note from step 3, the backups will be copied for you to the app instance in the command below. If not, you will need to use `scp` to copy the backups to the app instance after running the command below.
+### 6. Create the appropriate database(s) in the new RDS server
+Using the credentials from the new SSH tunnel, create the same DB(s) you dumped in the new RDS instance.
 ```
-cf push tdp-backend-<APP> --no-route -f manifest.buildpack.yml -t 180 --strategy rolling
+createdb -U <DB_USER> -h <HOST> -p <PORT> <DB_NAME>
 ```
-<br/>
 
-### 11. SSH into the app you just pushed
+### 7. Restore the backup(s) to the appropriate database(s)
+Using the credentials from the new SSH tunnel, restore the backups to the appropriate DBs.
 ```
-cf ssh tdp-backend-<APP>
+pg_restore -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME> <FILE_NAME>.pg
 ```
-<br/>
-
-### 12. Create the appropriate database(s) in the new RDS server
-Note: you can get the required field values from `VCAP_SERVICES`.
-```
-/home/vcap/deps/0/apt/usr/lib/postgresql/<NEW VERSION>/bin/createdb -U <DB_USER> -h <HOST> <DB_NAME>
-```
-<br/>
 
-### 13. Restore the backup(s) to the appropriate database(s)
-Note: you can get the required field values from `VCAP_SERVICES`.
-```
-/home/vcap/deps/0/apt/usr/lib/postgresql/<NEW VERSION>/bin/pg_restore -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME> <FILE_NAME>.pg
-```
 During this step, you may see errors similar to the message below. Note `<DB_USER>` is imputed in the message to avoid leaking environment specific usernames/roles.
+
 ```
 pg_restore: from TOC entry 215; 1259 17313 SEQUENCE users_user_user_permissions_id_seq <DB_USER>
 pg_restore: error: could not execute query: ERROR:  role "<DB_USER>" does not exist
 Command was: ALTER TABLE public.users_user_user_permissions_id_seq OWNER TO <DB_USER>;
 ```
-and the result and total amount of these errors should be:
+
+and the result and total amount of these errors should be something like:
+
 ```
 pg_restore: warning: errors ignored on restore: 68
 ```
+
 If this is what you see, everything is OK. This happens because the `pg_dump` doesn't remove owner associations on sequences for some reason. But you will see in the blocks above that `pg_restore` correctly alters the sequence owner to the new database user.
-<br/><br/>
 
-### 14. Use `psql` to get into the database to check state
-Note: you can get the required field values from `VCAP_SERVICES`.
+### 8. Use `psql` to get into the database(s) to check state
+Using the credentials from the new SSH tunnel, use the psql cli to inspect the restored DBs.
 ```
-/home/vcap/deps/0/apt/usr/lib/postgresql/<NEW VERSION>/bin/psql <RDS URI>
+psql -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME>
 ```
 <br/>
 
-### 15. Re-deploy or Re-stage the backend and frontend apps
-Pending your environment you can do this GitHub labels or you can re-stage the apps from Cloud.gov.
-<br/><br/>
+### 9. Rename and Move RDS instances
+Now that we have verified that the data in our new RDS instance looks good. We need to lift and shift the backend app(s) to point to our new RDS instance as if it is the existing (now old) RDS instance.
+
+First we need to unbind the existing RDS instance from the backend app(s) so that way we can make name changes.
+```
+cf unbind service <BACKEND_APP_NAME> <OLD_RDS_SERVICE_NAME>
+```
+
+After unbinding the service we want to update the "old RDS" service `name` to something different, plan, and then apply those changes with Terraform.
+```yaml
+resource "cloudfoundry_service_instance" "database" {
+  name             = "something-that-isnt-tdp-db-prod"
+  space            = data.cloudfoundry_space.space.id
+  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
+  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
+  recursive_delete = true
+  timeouts {
+    create = "60m"
+    update = "60m"
+    delete = "2h"
+  }
+}
+```
+
+Now we can name our "new RDS" service to the expected `name`. Then we can also plan and apply those changes with Terraform
+
+```yaml
+resource "cloudfoundry_service_instance" "new-database" {
+  name             = "tdp-db-prod"
+  space            = data.cloudfoundry_space.space.id
+  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
+  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
+  recursive_delete = true
+  timeouts {
+    create = "60m"
+    update = "60m"
+    delete = "2h"
+  }
+}
+```
+
+Now we will bind the new RDS service back to the backend app(s) and restage it. Be sure to monitor the app's logs to ensure it connects to the instance.
+
+```
+cf bind service <BACKEND_APP_NAME> <RDS_SERVICE_NAME>
+```
 
-### 16. Access the re-deployed/re-staged apps and run a smoke test
+Then
+
+```
+cf restage <BACKEND_APP_NAME>
+```
+
+If the backend app is running with no issues, we can now safely remove the "old RDS" service from Terraform. Remove the entire resource block named `database` from `main.tf` re-plan and then apply the changes to remove that instance with Terraform.
+
+Finally, to get our Terraform state looking like it originally did, we want to rename our `new-database` resource back to `database`. That way we are consistent. To do so we rename the resource, and to avoid Terraform from deleting it (since `database` won't exist in the state) we want to inform Terraform that we have "moved" the resource. We do so by adding the following code to the `main.tf`. Note, when running `terraform plan ...` it will not show any infrastructure changes, only a name change. Ensure you still apply even if it looks like there are no changes!
+
+```yaml
+moved {
+  from = cloudfoundry_service_instance.new-database
+  to   = cloudfoundry_service_instance.database
+}
+```
+
+After adding the above code, re-plan and apply the changes with Terrform. Once Terraform has successfully applied the change, remove the `moved` block from `main.tf`. Re-plan with Terraform and assert it agrees that there are no changes to be made. If Terraform reports changes, you have made a mistake and need to figure out where you made the mistake.
+
+### 10. Access the re-staged app(s) and run a smoke test
 - Log in
 - Submit a few datafiles
 - Make sure new and existing submission histories populate correctly
 - Checkout the DACs data
-<br/>
+
+If everything looks good, there is nothing to do. If apps aren't working/connecting to the new RDS instance, you will need to debug manually and determine if/where you made a mistake.
+
+### 11. Update the `postgresql-client` version to the new version in `tdrs-backend/apt.yml`
+```yaml
+- postgresql-client-<NEW VERSION>
+```
+Note: if the underlying OS for CloudFoundry is no longer `cflinuxfs4` (code name `jammy`) you may also need to update the repo we point to for the postgres client binaries.
+
+### 12. Update the postgres container version in `tdrs-backend/docker-compose.yml`
+```yaml
+postgres:
+image: postgres:<NEW VERSION>
+```
+
+### 13. Commit and push correct changes, revert unnecessary changes.
+Commit and push the changes for:
+- `main.tf`
+- `tdrs-backend/apt.yml`
+- `tdrs-backend/docker-compose.yml`
+
+Revert the changes for:
+- `manifest.yml`
diff --git a/tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md b/tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md
deleted file mode 100644
index 13b794281..000000000
--- a/tdrs-backend/db-upgrade/new-cloud-foundry-db-upgrade.md
+++ /dev/null
@@ -1,198 +0,0 @@
-# Cloud Foundry, Cloud.gov AWS RDS Database Upgrade
-
-## Process
-
-If you are performing this process for the staging or production, you need to ensure you are performing the changes through the [HHS](https://github.com/HHS/TANF-app) repo and not the [Raft](https://github.com/raft-tech/TANF-app) repo. You also need to have the postgres client binaries installed on your local machine.
-
-### 1. Open an SSH tunnel to the service
-To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally. Keep this tunnel open in a separate terminal window until this process is complete!
-
-```
-cf connect-to-service --no-client <APP_NAME_THAT_IS_BOUND_TO_RDS> <RDS_SERVICE_NAME>
-```
-
-You should see out put similar to:
-
-```
-Finding the service instance details...
-Setting up SSH tunnel...
-SSH tunnel created.
-Skipping call to client CLI. Connection information:
-
-Host: localhost
-Port: 63634
-Username: <REDACTED>
-Password: <REDACTED>
-Name: <REDACTED>
-
-Leave this terminal open while you want to use the SSH tunnel. Press Control-C to stop.
-```
-
-### 2. Create a backup of the database(s) in the RDS instance
-In a separate terminal from your SSH tunnel terminal, generate the `pg_dump` files.
-Note: the <HOST>, <PORT>, <DB_USER>, and <PASSWORD> are the values you received from the output of the SSH tunnel. The <DB_NAME> parameter is the name of the DB you want to export, e.g `tdp_db_raft`. You will need to run this command for each DB in the instance.
-
-```
-pg_dump -h <HOST> -p <PORT> -d <DB_NAME> -U <DB_USER> -F c --no-acl --no-owner -f <FILE_NAME>.pg
-```
-
-After the command finishes, you should see <FILE_NAME>.pg in your current working directory. Do some sanity checks on this backup file to assert it makes sense. Now that we have our backup(s), we need to begin making the Terraform changes required to support the upgrade.
-<br/>
-
-### 3. Update Terraform to create a new RDS instance
-Follow the instructions in the `terraform/README.md` to get Terraform configured. Modify the `main.tf` file in the `terraform/<ENV>` to include a new RDS instance. E.g if you were updating `prod` to version 15.x you would add the following code to the `main.tf` file. We are NOT removing the existing `resource "cloudfoundry_service_instance" "database"` from the `main.tf` file. Note that the resource and the `name` of the new RDS instance are not the same as the original resource name and RDS name. This is on purpose and we will remedy this in later steps.
-
-```yaml
-resource "cloudfoundry_service_instance" "new-database" {
-  name             = "tdp-db-prod-new"
-  space            = data.cloudfoundry_space.space.id
-  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
-  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
-  recursive_delete = true
-  timeouts {
-    create = "60m"
-    update = "60m"
-    delete = "2h"
-  }
-}
-```
-After adding the new RDS resource to `main.tf`, you can follow the rest of the instructions in the `terraform/README.md` to plan and then apply this change with Terraform.
-
-### 4. Bind an app to the new RDS instance
-In the `tdrs-backend/db-upgrade` directory, open the `manifest.yml` file and update the `services` block to reference the new RDS service you just created: in the example this would be: `- tdp-db-prod-new`. Then deploy this manifest: `cf push --no-route -f manifest.yml -t 180`. Wait for the connector app to deploy. We need to deploy a temporary app to avoid too much downtime for the backend app(s) and so that we can start new SSH tunnel to the new RDS instance. You should now close the original SSH tunnel we opened in step 1.
-
-### 5. Open an SSH tunnel to the new RDS instance
-Again, in a separate terminal execute the following command and leave that terminal/connection alive until further notice.
-```
-cf connect-to-service --no-client db-connector <NEW_RDS_SERVICE_NAME>
-```
-
-### 6. Create the appropriate database(s) in the new RDS server
-Using the credentials from the new SSH tunnel, create the same DB(s) you dumped in the new RDS instance.
-```
-createdb -U <DB_USER> -h <HOST> -p <PORT> <DB_NAME>
-```
-
-### 7. Restore the backup(s) to the appropriate database(s)
-Using the credentials from the new SSH tunnel, restore the backups to the appropriate DBs.
-```
-pg_restore -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME> <FILE_NAME>.pg
-```
-
-During this step, you may see errors similar to the message below. Note `<DB_USER>` is imputed in the message to avoid leaking environment specific usernames/roles.
-
-```
-pg_restore: from TOC entry 215; 1259 17313 SEQUENCE users_user_user_permissions_id_seq <DB_USER>
-pg_restore: error: could not execute query: ERROR:  role "<DB_USER>" does not exist
-Command was: ALTER TABLE public.users_user_user_permissions_id_seq OWNER TO <DB_USER>;
-```
-
-and the result and total amount of these errors should be something like:
-
-```
-pg_restore: warning: errors ignored on restore: 68
-```
-
-If this is what you see, everything is OK. This happens because the `pg_dump` doesn't remove owner associations on sequences for some reason. But you will see in the blocks above that `pg_restore` correctly alters the sequence owner to the new database user.
-
-### 8. Use `psql` to get into the database(s) to check state
-Using the credentials from the new SSH tunnel, use the psql cli to inspect the restored DBs.
-```
-psql -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME>
-```
-<br/>
-
-### 9. Rename and Move RDS instances
-Now that we have verified that the data in our new RDS instance looks good. We need to lift and shift the backend app(s) to point to our new RDS instance as if it is the existing (now old) RDS instance.
-
-First we need to unbind the existing RDS instance from the backend app(s) so that way we can make name changes.
-```
-cf unbind service <BACKEND_APP_NAME> <OLD_RDS_SERVICE_NAME>
-```
-
-After unbinding the service we want to update the "old RDS" service `name` to something different, plan, and then apply those changes with Terraform.
-```yaml
-resource "cloudfoundry_service_instance" "database" {
-  name             = "something-that-isnt-tdp-db-prod"
-  space            = data.cloudfoundry_space.space.id
-  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
-  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
-  recursive_delete = true
-  timeouts {
-    create = "60m"
-    update = "60m"
-    delete = "2h"
-  }
-}
-```
-
-Now we can name our "new RDS" service to the expected `name`. Then we can also plan and apply those changes with Terraform
-
-```yaml
-resource "cloudfoundry_service_instance" "new-database" {
-  name             = "tdp-db-prod"
-  space            = data.cloudfoundry_space.space.id
-  service_plan     = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"]
-  json_params      = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}"
-  recursive_delete = true
-  timeouts {
-    create = "60m"
-    update = "60m"
-    delete = "2h"
-  }
-}
-```
-
-Now we will bind the new RDS service back to the backend app(s) and restage it. Be sure to monitor the app's logs to ensure it connects to the instance.
-
-```
-cf bind service <BACKEND_APP_NAME> <RDS_SERVICE_NAME>
-```
-
-Then
-
-```
-cf restage <BACKEND_APP_NAME>
-```
-
-If the backend app is running with no issues, we can now safely remove the "old RDS" service from Terraform. Remove the entire resource block named `database` from `main.tf` re-plan and then apply the changes to remove that instance with Terraform.
-
-Finally, to get our Terraform state looking like it originally did, we want to rename our `new-database` resource back to `database`. That way we are consistent. To do so we rename the resource, and to avoid Terraform from deleting it (since `database` won't exist in the state) we want to inform Terraform that we have "moved" the resource. We do so by adding the following code to the `main.tf`. Note, when running `terraform plan ...` it will not show any infrastructure changes, only a name change. Ensure you still apply even if it looks like there are no changes!
-
-```yaml
-moved {
-  from = cloudfoundry_service_instance.new-database
-  to   = cloudfoundry_service_instance.database
-}
-```
-
-After adding the above code, re-plan and apply the changes with Terrform. Once Terraform has successfully applied the change, remove the `moved` block from `main.tf`. Re-plan with Terraform and assert it agrees that there are no changes to be made. If Terraform reports changes, you have made a mistake and need to figure out where you made the mistake.
-
-### 10. Access the re-staged app(s) and run a smoke test
-- Log in
-- Submit a few datafiles
-- Make sure new and existing submission histories populate correctly
-- Checkout the DACs data
-
-If everything looks good, there is nothing to do. If apps aren't working/connecting to the new RDS instance, you will need to debug manually and determine if/where you made a mistake.
-
-### 11. Update the `postgresql-client` version to the new version in `tdrs-backend/apt.yml`
-```yaml
-- postgresql-client-<NEW VERSION>
-```
-Note: if the underlying OS for CloudFoundry is no longer `cflinuxfs4` (code name `jammy`) you may also need to update the repo we point to for the postgres client binaries.
-
-### 12. Update the postgres container version in `tdrs-backend/docker-compose.yml`
-```yaml
-postgres:
-image: postgres:<NEW VERSION>
-```
-
-### 13. Commit and push correct changes, revert unnecessary changes.
-Commit and push the changes for:
-- `main.tf`
-- `tdrs-backend/apt.yml`
-- `tdrs-backend/docker-compose.yml`
-
-Revert the changes for:
-- `manifest.yml`

From e57e58ba8d0af0b7f1c4a677ca9309c2e02eea2d Mon Sep 17 00:00:00 2001
From: Eric Lipe <elipe@teamraft.com>
Date: Wed, 2 Oct 2024 14:37:31 -0400
Subject: [PATCH 03/19] - add intro

---
 tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
index 13b794281..cec613351 100644
--- a/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
+++ b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
@@ -1,9 +1,8 @@
 # Cloud Foundry, Cloud.gov AWS RDS Database Upgrade
+The process below provides a guide to roll our backend applications over to a new RDS version and instance. The entire process can take several hours and does involve downtime for the environment which you are upgrading. Be sure to take those factors into account when commencing the process.
 
 ## Process
 
-If you are performing this process for the staging or production, you need to ensure you are performing the changes through the [HHS](https://github.com/HHS/TANF-app) repo and not the [Raft](https://github.com/raft-tech/TANF-app) repo. You also need to have the postgres client binaries installed on your local machine.
-
 ### 1. Open an SSH tunnel to the service
 To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally. Keep this tunnel open in a separate terminal window until this process is complete!
 

From 17bdbcb364f41608d42b04c4dcc38b76e13262e0 Mon Sep 17 00:00:00 2001
From: Eric Lipe <elipe@teamraft.com>
Date: Thu, 3 Oct 2024 09:47:32 -0400
Subject: [PATCH 04/19] - Clean up docs

---
 .../db-upgrade/cloud-foundry-db-upgrade.md    | 39 +++++++++++--------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
index cec613351..abb9caa30 100644
--- a/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
+++ b/tdrs-backend/db-upgrade/cloud-foundry-db-upgrade.md
@@ -4,13 +4,13 @@ The process below provides a guide to roll our backend applications over to a ne
 ## Process
 
 ### 1. Open an SSH tunnel to the service
-To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands locally. Keep this tunnel open in a separate terminal window until this process is complete!
+To execute commands on the RDS instance we can open an SSH tunnel to the service and run all our commands from our local machine. Keep this tunnel open in a separate terminal window until this process is complete!
 
 ```
 cf connect-to-service --no-client <APP_NAME_THAT_IS_BOUND_TO_RDS> <RDS_SERVICE_NAME>
 ```
 
-You should see out put similar to:
+You should see output similar to:
 
 ```
 Finding the service instance details...
@@ -29,17 +29,16 @@ Leave this terminal open while you want to use the SSH tunnel. Press Control-C t
 
 ### 2. Create a backup of the database(s) in the RDS instance
 In a separate terminal from your SSH tunnel terminal, generate the `pg_dump` files.
-Note: the <HOST>, <PORT>, <DB_USER>, and <PASSWORD> are the values you received from the output of the SSH tunnel. The <DB_NAME> parameter is the name of the DB you want to export, e.g `tdp_db_raft`. You will need to run this command for each DB in the instance.
+Note: the HOST, PORT, DB_USER, and PASSWORD are the values you received from the output of the SSH tunnel. The DB_NAME parameter is the name of the DB you want to export, e.g `tdp_db_raft`. You will need to run this command for each DB in the instance.
 
 ```
 pg_dump -h <HOST> -p <PORT> -d <DB_NAME> -U <DB_USER> -F c --no-acl --no-owner -f <FILE_NAME>.pg
 ```
 
-After the command finishes, you should see <FILE_NAME>.pg in your current working directory. Do some sanity checks on this backup file to assert it makes sense. Now that we have our backup(s), we need to begin making the Terraform changes required to support the upgrade.
-<br/>
+After the command finishes, you should see <FILE_NAME>.pg in your current working directory.
 
 ### 3. Update Terraform to create a new RDS instance
-Follow the instructions in the `terraform/README.md` to get Terraform configured. Modify the `main.tf` file in the `terraform/<ENV>` to include a new RDS instance. E.g if you were updating `prod` to version 15.x you would add the following code to the `main.tf` file. We are NOT removing the existing `resource "cloudfoundry_service_instance" "database"` from the `main.tf` file. Note that the resource and the `name` of the new RDS instance are not the same as the original resource name and RDS name. This is on purpose and we will remedy this in later steps.
+Follow the instructions in the `terraform/README.md` to get Terraform configured. Modify the `main.tf` file in the `terraform/<ENV>` to include a new RDS instance. E.g if you were updating `prod` to version 15.x you would add the following code to the `main.tf` file. We are **NOT** removing the existing `resource "cloudfoundry_service_instance" "database"` from the `main.tf` file. Note that the resource name (i.e. `new-database`) and the `name` of the new RDS instance are not the same as the original resource name and RDS name. This is on purpose and we will remedy this in later steps.
 
 ```yaml
 resource "cloudfoundry_service_instance" "new-database" {
@@ -55,25 +54,29 @@ resource "cloudfoundry_service_instance" "new-database" {
   }
 }
 ```
+
 After adding the new RDS resource to `main.tf`, you can follow the rest of the instructions in the `terraform/README.md` to plan and then apply this change with Terraform.
 
 ### 4. Bind an app to the new RDS instance
-In the `tdrs-backend/db-upgrade` directory, open the `manifest.yml` file and update the `services` block to reference the new RDS service you just created: in the example this would be: `- tdp-db-prod-new`. Then deploy this manifest: `cf push --no-route -f manifest.yml -t 180`. Wait for the connector app to deploy. We need to deploy a temporary app to avoid too much downtime for the backend app(s) and so that we can start new SSH tunnel to the new RDS instance. You should now close the original SSH tunnel we opened in step 1.
+In the `tdrs-backend/db-upgrade` directory, open the `manifest.yml` file and update the `services` block to reference the new RDS service you just created: in the example this would be: `- tdp-db-prod-new`. Then deploy this manifest: `cf push --no-route -f manifest.yml -t 180`. Wait for the connector app to deploy. We need to deploy a temporary app to avoid too much downtime for the backend app(s), erroneous transactions on the new RDS instance, and so that we can start a new SSH tunnel to the new RDS instance. If you haven't already, you should now close the original SSH tunnel we opened in step 1.
 
 ### 5. Open an SSH tunnel to the new RDS instance
 Again, in a separate terminal execute the following command and leave that terminal/connection alive until further notice.
+
 ```
 cf connect-to-service --no-client db-connector <NEW_RDS_SERVICE_NAME>
 ```
 
 ### 6. Create the appropriate database(s) in the new RDS server
 Using the credentials from the new SSH tunnel, create the same DB(s) you dumped in the new RDS instance.
+
 ```
 createdb -U <DB_USER> -h <HOST> -p <PORT> <DB_NAME>
 ```
 
 ### 7. Restore the backup(s) to the appropriate database(s)
 Using the credentials from the new SSH tunnel, restore the backups to the appropriate DBs.
+
 ```
 pg_restore -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME> <FILE_NAME>.pg
 ```
@@ -92,24 +95,26 @@ and the result and total amount of these errors should be something like:
 pg_restore: warning: errors ignored on restore: 68
 ```
 
-If this is what you see, everything is OK. This happens because the `pg_dump` doesn't remove owner associations on sequences for some reason. But you will see in the blocks above that `pg_restore` correctly alters the sequence owner to the new database user.
+If this is what you see, everything is OK. This happens because the `pg_dump` doesn't remove all owner associations on DB objects for some reason. But you will see in the blocks above that `pg_restore` correctly alters the object owner to the new database user.
 
 ### 8. Use `psql` to get into the database(s) to check state
-Using the credentials from the new SSH tunnel, use the psql cli to inspect the restored DBs.
+Using the credentials from the new SSH tunnel, use the psql cli to inspect the restored DBs. You should consider counting the number of tables in the new and old DBs, counting some records across different tables, etc...
+
 ```
 psql -p <PORT> -h <HOST> -U <DB_USER> -d <DB_NAME>
 ```
-<br/>
 
 ### 9. Rename and Move RDS instances
-Now that we have verified that the data in our new RDS instance looks good. We need to lift and shift the backend app(s) to point to our new RDS instance as if it is the existing (now old) RDS instance.
+Now that we have verified the data in our new RDS instance looks good, we need to lift and shift the backend app(s) to point to our new RDS instance as if it is the existing (now old) RDS instance.
+
+First we need to unbind the existing RDS instance from the backend app(s) it is bound to.
 
-First we need to unbind the existing RDS instance from the backend app(s) so that way we can make name changes.
 ```
 cf unbind service <BACKEND_APP_NAME> <OLD_RDS_SERVICE_NAME>
 ```
 
 After unbinding the service we want to update the "old RDS" service `name` to something different, plan, and then apply those changes with Terraform.
+
 ```yaml
 resource "cloudfoundry_service_instance" "database" {
   name             = "something-that-isnt-tdp-db-prod"
@@ -125,7 +130,7 @@ resource "cloudfoundry_service_instance" "database" {
 }
 ```
 
-Now we can name our "new RDS" service to the expected `name`. Then we can also plan and apply those changes with Terraform
+Now we can name our "new RDS" service to the expected `name` (i.e. the original `name` field from our old RDS instance). Then we plan and apply those changes with Terraform.
 
 ```yaml
 resource "cloudfoundry_service_instance" "new-database" {
@@ -142,7 +147,7 @@ resource "cloudfoundry_service_instance" "new-database" {
 }
 ```
 
-Now we will bind the new RDS service back to the backend app(s) and restage it. Be sure to monitor the app's logs to ensure it connects to the instance.
+Next we will bind the new RDS service back to the backend app(s) we unbound the old instance from and restage them. Be sure to monitor the backend app's logs to ensure it connects to the instance and starts as expected.
 
 ```
 cf bind service <BACKEND_APP_NAME> <RDS_SERVICE_NAME>
@@ -154,7 +159,7 @@ Then
 cf restage <BACKEND_APP_NAME>
 ```
 
-If the backend app is running with no issues, we can now safely remove the "old RDS" service from Terraform. Remove the entire resource block named `database` from `main.tf` re-plan and then apply the changes to remove that instance with Terraform.
+If the backend app(s) are running with no issues, we can now safely remove the "old RDS" service from Terraform. Remove the entire resource block named `database` from `main.tf`, plan and then apply the changes to remove that instance with Terraform.
 
 Finally, to get our Terraform state looking like it originally did, we want to rename our `new-database` resource back to `database`. That way we are consistent. To do so we rename the resource, and to avoid Terraform from deleting it (since `database` won't exist in the state) we want to inform Terraform that we have "moved" the resource. We do so by adding the following code to the `main.tf`. Note, when running `terraform plan ...` it will not show any infrastructure changes, only a name change. Ensure you still apply even if it looks like there are no changes!
 
@@ -165,7 +170,7 @@ moved {
 }
 ```
 
-After adding the above code, re-plan and apply the changes with Terrform. Once Terraform has successfully applied the change, remove the `moved` block from `main.tf`. Re-plan with Terraform and assert it agrees that there are no changes to be made. If Terraform reports changes, you have made a mistake and need to figure out where you made the mistake.
+After adding the above code, plan and apply the changes with Terrform. Once Terraform has successfully applied the change, remove the `moved` block from `main.tf`. Run `terraform plan ...` again and assert it agrees that there are no changes to be made. If Terraform reports changes, you have made a mistake and need to figure out where you made the mistake.
 
 ### 10. Access the re-staged app(s) and run a smoke test
 - Log in
@@ -176,9 +181,11 @@ After adding the above code, re-plan and apply the changes with Terrform. Once T
 If everything looks good, there is nothing to do. If apps aren't working/connecting to the new RDS instance, you will need to debug manually and determine if/where you made a mistake.
 
 ### 11. Update the `postgresql-client` version to the new version in `tdrs-backend/apt.yml`
+
 ```yaml
 - postgresql-client-<NEW VERSION>
 ```
+
 Note: if the underlying OS for CloudFoundry is no longer `cflinuxfs4` (code name `jammy`) you may also need to update the repo we point to for the postgres client binaries.
 
 ### 12. Update the postgres container version in `tdrs-backend/docker-compose.yml`

From 073c82ffaab17de41c37461831ff1b1946152df2 Mon Sep 17 00:00:00 2001
From: Eric Lipe <elipe@teamraft.com>
Date: Thu, 3 Oct 2024 09:57:19 -0400
Subject: [PATCH 05/19] - remove credentials key

---
 docs/Technical-Documentation/nexus-repo.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Technical-Documentation/nexus-repo.md b/docs/Technical-Documentation/nexus-repo.md
index 5e504a384..2cf5190be 100644
--- a/docs/Technical-Documentation/nexus-repo.md
+++ b/docs/Technical-Documentation/nexus-repo.md
@@ -123,7 +123,7 @@ Now you will no longer have to enter the password when logging in.
 ## Local Docker Login
 After logging into the `tanf-dev` space with the `cf` cli, execute the following commands to authenticate your local docker daemon
 ```
-export NEXUS_DOCKER_PASSWORD=`cf service-key tanf-keys nexus-dev | tail -n +2 | jq .credentials.password`
+export NEXUS_DOCKER_PASSWORD=`cf service-key tanf-keys nexus-dev | tail -n +2 | jq .password`
 echo "$NEXUS_DOCKER_PASSWORD" | docker login https://tdp-docker.dev.raftlabs.tech -u tdp-dev --password-stdin
 ```
 

From 7806be7e727eb6d130f11235ce369b85ad4700a7 Mon Sep 17 00:00:00 2001
From: Eric Lipe <elipe@teamraft.com>
Date: Tue, 8 Oct 2024 08:43:40 -0400
Subject: [PATCH 06/19] - get rid of circular import

---
 .../tdpservice/data_files/admin/admin.py      |  2 +-
 .../tdpservice/data_files/admin/filters.py    |  2 +-
 .../migrations/0014_reparsefilemeta.py        | 29 ++++++++++++++
 .../migrations/0015_datafile_reparses.py      | 20 ++++++++++
 ...016_remove_datafile_reparse_meta_models.py | 38 +++++++++++++++++++
 tdrs-backend/tdpservice/data_files/models.py  | 29 ++++++++++++--
 tdrs-backend/tdpservice/data_files/tasks.py   |  8 ++--
 .../data_files/test/test_stuck_files.py       | 22 +++++------
 tdrs-backend/tdpservice/parsers/parse.py      |  4 +-
 .../tdpservice/scheduling/parser_task.py      |  4 +-
 .../management/commands/clean_and_reparse.py  |  2 +-
 .../search_indexes/models/reparse_meta.py     | 18 ++++-----
 .../search_indexes/test/test_reparse.py       | 24 ++++++------
 13 files changed, 155 insertions(+), 47 deletions(-)
 create mode 100644 tdrs-backend/tdpservice/data_files/migrations/0014_reparsefilemeta.py
 create mode 100644 tdrs-backend/tdpservice/data_files/migrations/0015_datafile_reparses.py
 create mode 100644 tdrs-backend/tdpservice/data_files/migrations/0016_remove_datafile_reparse_meta_models.py

diff --git a/tdrs-backend/tdpservice/data_files/admin/admin.py b/tdrs-backend/tdpservice/data_files/admin/admin.py
index 27c9b8868..09c6d9b30 100644
--- a/tdrs-backend/tdpservice/data_files/admin/admin.py
+++ b/tdrs-backend/tdpservice/data_files/admin/admin.py
@@ -15,7 +15,7 @@
 class DataFileInline(admin.TabularInline):
     """Inline model for many to many relationship."""
 
-    model = DataFile.reparse_meta_models.through
+    model = DataFile.reparses.through
     can_delete = False
     ordering = ["-pk"]
 
diff --git a/tdrs-backend/tdpservice/data_files/admin/filters.py b/tdrs-backend/tdpservice/data_files/admin/filters.py
index 0f991d882..dc547db88 100644
--- a/tdrs-backend/tdpservice/data_files/admin/filters.py
+++ b/tdrs-backend/tdpservice/data_files/admin/filters.py
@@ -56,7 +56,7 @@ def queryset(self, request, queryset):
         if self.value() is not None and queryset.exists():
             latest_meta = ReparseMeta.get_latest()
             if latest_meta is not None:
-                queryset = queryset.filter(reparse_meta_models=latest_meta)
+                queryset = queryset.filter(reparses=latest_meta)
         return queryset
 
 
diff --git a/tdrs-backend/tdpservice/data_files/migrations/0014_reparsefilemeta.py b/tdrs-backend/tdpservice/data_files/migrations/0014_reparsefilemeta.py
new file mode 100644
index 000000000..d9cb6ee8b
--- /dev/null
+++ b/tdrs-backend/tdpservice/data_files/migrations/0014_reparsefilemeta.py
@@ -0,0 +1,29 @@
+# Generated by Django 3.2.15 on 2024-10-08 12:18
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('search_indexes', '0031_alter_tribal_tanf_t4_closure_reason'),
+        ('data_files', '0013_datafile_reparse_meta'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='ReparseFileMeta',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('finished', models.BooleanField(default=False)),
+                ('success', models.BooleanField(default=False)),
+                ('started_at', models.DateTimeField(null=True)),
+                ('finished_at', models.DateTimeField(null=True)),
+                ('num_records_created', models.PositiveIntegerField(default=0)),
+                ('cat_4_errors_generated', models.PositiveIntegerField(default=0)),
+                ('data_file', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='reparse_file_metas', to='data_files.datafile')),
+                ('reparse_meta', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='reparse_file_metas', to='search_indexes.reparsemeta')),
+            ],
+        ),
+    ]
diff --git a/tdrs-backend/tdpservice/data_files/migrations/0015_datafile_reparses.py b/tdrs-backend/tdpservice/data_files/migrations/0015_datafile_reparses.py
new file mode 100644
index 000000000..c4cdea583
--- /dev/null
+++ b/tdrs-backend/tdpservice/data_files/migrations/0015_datafile_reparses.py
@@ -0,0 +1,20 @@
+# Generated by Django 3.2.15 on 2024-10-04 12:17
+
+from django.db import migrations, models
+from tdpservice.data_files.models import ReparseFileMeta
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_files', '0014_reparsefilemeta'),
+        ('search_indexes', '0031_alter_tribal_tanf_t4_closure_reason'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='datafile',
+            name='reparses',
+            field=models.ManyToManyField(help_text='Reparse events this file has been associated with.', related_name='files', through="data_files.ReparseFileMeta", to='search_indexes.ReparseMeta'),
+        ),
+    ]
diff --git a/tdrs-backend/tdpservice/data_files/migrations/0016_remove_datafile_reparse_meta_models.py b/tdrs-backend/tdpservice/data_files/migrations/0016_remove_datafile_reparse_meta_models.py
new file mode 100644
index 000000000..8a8796e92
--- /dev/null
+++ b/tdrs-backend/tdpservice/data_files/migrations/0016_remove_datafile_reparse_meta_models.py
@@ -0,0 +1,38 @@
+# Generated by Django 3.2.15 on 2024-10-04 12:17
+
+from django.db import migrations
+
+
+def switch_reparse_meta_through_model(apps, schema_editor):
+    DataFile=apps.get_model("data_files","DataFile")
+    ReparseMeta=apps.get_model("search_indexes","ReparseMeta")
+    OldThru=DataFile.reparse_meta_models.through
+    ReparseFileMeta=apps.get_model("data_files", "ReparseFileMeta")
+
+    q = OldThru.objects.all()
+
+    print(f'switching {q.count()} through models')
+
+    for m in q:
+        ReparseFileMeta.objects.create(
+            data_file_id=m.datafile.pk,
+            reparse_meta_id=m.reparsemeta.pk
+        )
+        m.delete()
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('data_files', '0015_datafile_reparses'),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            switch_reparse_meta_through_model,
+        ),
+        migrations.RemoveField(
+            model_name='datafile',
+            name='reparse_meta_models',
+        ),
+    ]
diff --git a/tdrs-backend/tdpservice/data_files/models.py b/tdrs-backend/tdpservice/data_files/models.py
index 6fe5355e0..a892f21c9 100644
--- a/tdrs-backend/tdpservice/data_files/models.py
+++ b/tdrs-backend/tdpservice/data_files/models.py
@@ -79,6 +79,20 @@ class Meta:
     # separately
     extension = models.CharField(max_length=8, default="txt")
 
+class ReparseFileMeta(models.Model):
+    """Meta data model representing a single file parse within a reparse execution."""
+    data_file = models.ForeignKey('data_files.DataFile', on_delete=models.CASCADE, related_name='reparse_file_metas')
+    reparse_meta = models.ForeignKey('search_indexes.ReparseMeta', on_delete=models.CASCADE, related_name='reparse_file_metas')
+
+    finished = models.BooleanField(default=False)
+    success = models.BooleanField(default=False)
+    started_at = models.DateTimeField(auto_now_add=False, null=True)  # set at beg of parse run
+    finished_at = models.DateTimeField(auto_now_add=False, null=True)
+
+    # num_records_deleted = models.PositiveIntegerField(default=0)
+    num_records_created = models.PositiveIntegerField(default=0)
+    cat_4_errors_generated = models.PositiveIntegerField(default=0)
+
 
 class DataFile(FileRecord):
     """Represents a version of a data file."""
@@ -153,10 +167,17 @@ class Meta:
                                         null=True
                                         )
 
-    reparse_meta_models = models.ManyToManyField("search_indexes.ReparseMeta",
-                                                 help_text="Reparse events this file has been associated with.",
-                                                 related_name="datafiles"
-                                                 )
+    # reparse_meta_models = models.ManyToManyField("search_indexes.ReparseMeta",
+    #                                              help_text="Reparse events this file has been associated with.",
+    #                                              related_name="datafiles"
+    #                                              )
+
+    reparses = models.ManyToManyField(
+        "search_indexes.ReparseMeta",
+        through="data_files.ReparseFileMeta",
+        help_text="Reparse events this file has been associated with.",
+        related_name="files"
+    )
 
     @property
     def prog_type(self):
diff --git a/tdrs-backend/tdpservice/data_files/tasks.py b/tdrs-backend/tdpservice/data_files/tasks.py
index 16e35de79..4beb1fe67 100644
--- a/tdrs-backend/tdpservice/data_files/tasks.py
+++ b/tdrs-backend/tdpservice/data_files/tasks.py
@@ -13,7 +13,7 @@
 
 def get_stuck_files():
     """Return a queryset containing files in a 'stuck' state."""
-    stuck_files = DataFile.objects.annotate(reparse_count=Count('reparse_meta_models')).filter(
+    stuck_files = DataFile.objects.annotate(reparse_count=Count('reparses')).filter(
         # non-reparse submissions over an hour old
         Q(
             reparse_count=0,
@@ -22,9 +22,9 @@ def get_stuck_files():
         # reparse submissions past the timeout, where the reparse did not complete
         Q(
             reparse_count__gt=0,
-            reparse_meta_models__timeout_at__lte=timezone.now(),
-            reparse_meta_models__finished=False,
-            reparse_meta_models__success=False
+            reparses__timeout_at__lte=timezone.now(),
+            reparses__finished=False,
+            reparses__success=False
         )
     ).filter(
         # where there is NO summary or the summary is in PENDING status
diff --git a/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py b/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
index 95f4f8f3a..fb4242834 100644
--- a/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
+++ b/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
@@ -55,7 +55,7 @@ def test_find_pending_submissions__none_stuck(stt_user, stt):
     df2.save()
     make_summary(df2, DataFileSummary.Status.ACCEPTED)
     rpm = make_reparse_meta(True, True)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     # a pending standard submission, less than an hour old
     df3 = make_datafile(stt_user, stt, 3)
@@ -82,7 +82,7 @@ def test_find_pending_submissions__non_reparse_stuck(stt_user, stt):
     df2.save()
     make_summary(df2, DataFileSummary.Status.ACCEPTED)
     rpm = make_reparse_meta(True, True)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -103,7 +103,7 @@ def test_find_pending_submissions__non_reparse_stuck__no_dfs(stt_user, stt):
     df2.save()
     make_summary(df2, DataFileSummary.Status.ACCEPTED)
     rpm = make_reparse_meta(True, True)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -125,7 +125,7 @@ def test_find_pending_submissions__reparse_stuck(stt_user, stt):
     df2.save()
     make_summary(df2, DataFileSummary.Status.PENDING)
     rpm = make_reparse_meta(False, False)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -146,7 +146,7 @@ def test_find_pending_submissions__reparse_stuck__no_dfs(stt_user, stt):
     df2.created_at = _time_ago(hours=1)
     df2.save()
     rpm = make_reparse_meta(False, False)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -168,7 +168,7 @@ def test_find_pending_submissions__reparse_and_non_reparse_stuck(stt_user, stt):
     df2.save()
     make_summary(df2, DataFileSummary.Status.PENDING)
     rpm = make_reparse_meta(False, False)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 2
@@ -189,7 +189,7 @@ def test_find_pending_submissions__reparse_and_non_reparse_stuck_no_dfs(stt_user
     df2.created_at = _time_ago(hours=1)
     df2.save()
     rpm = make_reparse_meta(False, False)
-    df2.reparse_meta_models.add(rpm)
+    df2.reparses.add(rpm)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 2
@@ -208,7 +208,7 @@ def test_find_pending_submissions__old_reparse_stuck__new_not_stuck(stt_user, st
 
     # reparse fails the first time
     rpm1 = make_reparse_meta(False, False)
-    df1.reparse_meta_models.add(rpm1)
+    df1.reparses.add(rpm1)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -218,7 +218,7 @@ def test_find_pending_submissions__old_reparse_stuck__new_not_stuck(stt_user, st
     make_summary(df1, DataFileSummary.Status.ACCEPTED)
 
     rpm2 = make_reparse_meta(True, True)
-    df1.reparse_meta_models.add(rpm2)
+    df1.reparses.add(rpm2)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 0
@@ -235,7 +235,7 @@ def test_find_pending_submissions__new_reparse_stuck__old_not_stuck(stt_user, st
 
     # reparse succeeds
     rpm1 = make_reparse_meta(True, True)
-    df1.reparse_meta_models.add(rpm1)
+    df1.reparses.add(rpm1)
 
     # reparse again, fails this time
     dfs1.delete()  # reparse deletes the original dfs and creates the new one
@@ -245,7 +245,7 @@ def test_find_pending_submissions__new_reparse_stuck__old_not_stuck(stt_user, st
     )
 
     rpm2 = make_reparse_meta(False, False)
-    df1.reparse_meta_models.add(rpm2)
+    df1.reparses.add(rpm2)
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py
index b2b9f0445..fc2919266 100644
--- a/tdrs-backend/tdpservice/parsers/parse.py
+++ b/tdrs-backend/tdpservice/parsers/parse.py
@@ -114,8 +114,8 @@ def parse_datafile(datafile, dfs):
 
 def update_meta_model(datafile, dfs):
     """Update appropriate meta models."""
-    ReparseMeta.increment_records_created(datafile.reparse_meta_models, dfs.total_number_of_records_created)
-    ReparseMeta.increment_files_completed(datafile.reparse_meta_models)
+    ReparseMeta.increment_records_created(datafile.reparses, dfs.total_number_of_records_created)
+    ReparseMeta.increment_files_completed(datafile.reparses)
 
 def bulk_create_records(unsaved_records, line_number, header_count, datafile, dfs, flush=False):
     """Bulk create passed in records."""
diff --git a/tdrs-backend/tdpservice/scheduling/parser_task.py b/tdrs-backend/tdpservice/scheduling/parser_task.py
index 2b1fb3d51..219da9916 100644
--- a/tdrs-backend/tdpservice/scheduling/parser_task.py
+++ b/tdrs-backend/tdpservice/scheduling/parser_task.py
@@ -54,7 +54,7 @@ def parse(data_file_id, should_send_submission_email=True):
                              f"Encountered Database exception in parser_task.py: \n{e}",
                              "error"
                              )
-        ReparseMeta.increment_files_failed(data_file.reparse_meta_models)
+        ReparseMeta.increment_files_failed(data_file.reparses)
     except Exception as e:
         generate_error = make_generate_parser_error(data_file, None)
         error = generate_error(schema=None,
@@ -72,4 +72,4 @@ def parse(data_file_id, should_send_submission_email=True):
                              (f"Uncaught exception while parsing datafile: {data_file.pk}! Please review the logs to "
                               f"see if manual intervention is required. Exception: \n{e}"),
                              "critical")
-        ReparseMeta.increment_files_failed(data_file.reparse_meta_models)
+        ReparseMeta.increment_files_failed(data_file.reparses)
diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
index d0c7a9934..172bf1a6c 100644
--- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
@@ -182,7 +182,7 @@ def _handle_datafiles(self, files, meta_model, log_context):
         """Delete, re-save, and reparse selected datafiles."""
         for file in files:
             try:
-                file.reparse_meta_models.add(meta_model)
+                file.reparses.add(meta_model)
                 file.save()
                 parser_task.parse.delay(file.pk, should_send_submission_email=False)
             except DatabaseError as e:
diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
index ddbf4ce4a..91ff067b1 100644
--- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
@@ -88,7 +88,7 @@ def set_reparse_finished(meta_model):
         meta_model.save()
 
     @staticmethod
-    def increment_files_completed(reparse_meta_models):
+    def increment_files_completed(reparses):
         """
         Increment the count of files that have completed parsing for the datafile's current/latest reparse model.
 
@@ -96,10 +96,10 @@ def increment_files_completed(reparse_meta_models):
         referrence the same ReparseMeta object that is being queried below. `select_for_update` provides a DB lock on
         the object and forces other transactions on the object to wait until this one completes.
         """
-        if reparse_meta_models.exists():
+        if reparses.exists():
             with transaction.atomic():
                 try:
-                    meta_model = reparse_meta_models.select_for_update().latest("pk")
+                    meta_model = reparses.select_for_update().latest("pk")
                     meta_model.files_completed += 1
                     if ReparseMeta.file_counts_match(meta_model):
                         ReparseMeta.set_reparse_finished(meta_model)
@@ -109,7 +109,7 @@ def increment_files_completed(reparse_meta_models):
                                      f"ReparseMeta object with ID: {meta_model.pk}.")
 
     @staticmethod
-    def increment_files_failed(reparse_meta_models):
+    def increment_files_failed(reparses):
         """
         Increment the count of files that failed parsing for the datafile's current/latest reparse meta model.
 
@@ -117,10 +117,10 @@ def increment_files_failed(reparse_meta_models):
         referrence the same ReparseMeta object that is being queried below. `select_for_update` provides a DB lock on
         the object and forces other transactions on the object to wait until this one completes.
         """
-        if reparse_meta_models.exists():
+        if reparses.exists():
             with transaction.atomic():
                 try:
-                    meta_model = reparse_meta_models.select_for_update().latest("pk")
+                    meta_model = reparses.select_for_update().latest("pk")
                     meta_model.files_failed += 1
                     if ReparseMeta.file_counts_match(meta_model):
                         ReparseMeta.set_reparse_finished(meta_model)
@@ -130,7 +130,7 @@ def increment_files_failed(reparse_meta_models):
                                      f"ReparseMeta object with ID: {meta_model.pk}.")
 
     @staticmethod
-    def increment_records_created(reparse_meta_models, num_created):
+    def increment_records_created(reparses, num_created):
         """
         Increment the count of records created for the datafile's current/latest reparse meta model.
 
@@ -138,10 +138,10 @@ def increment_records_created(reparse_meta_models, num_created):
         referrence the same ReparseMeta object that is being queried below. `select_for_update` provides a DB lock on
         the object and forces other transactions on the object to wait until this one completes.
         """
-        if reparse_meta_models.exists():
+        if reparses.exists():
             with transaction.atomic():
                 try:
-                    meta_model = reparse_meta_models.select_for_update().latest("pk")
+                    meta_model = reparses.select_for_update().latest("pk")
                     meta_model.num_records_created += num_created
                     meta_model.save()
                 except DatabaseError:
diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
index 2c8647cea..118591a47 100644
--- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
@@ -401,16 +401,16 @@ def test_mm_all_files_done():
 def test_mm_increment_files_completed(big_file):
     """Test meta model increment files completed."""
     meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
-    big_file.reparse_meta_models.add(meta_model)
+    big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_files_completed(big_file.reparse_meta_models)
+    ReparseMeta.increment_files_completed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.finished is False
     assert meta_model.files_completed == 1
     assert meta_model.files_failed == 0
 
-    ReparseMeta.increment_files_completed(big_file.reparse_meta_models)
+    ReparseMeta.increment_files_completed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.finished is True
     assert meta_model.files_completed == 2
@@ -424,16 +424,16 @@ def test_mm_increment_files_completed(big_file):
 def test_mm_increment_files_failed(big_file):
     """Test meta model increment files failed."""
     meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
-    big_file.reparse_meta_models.add(meta_model)
+    big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_files_failed(big_file.reparse_meta_models)
+    ReparseMeta.increment_files_failed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.finished is False
     assert meta_model.files_completed == 0
     assert meta_model.files_failed == 1
 
-    ReparseMeta.increment_files_failed(big_file.reparse_meta_models)
+    ReparseMeta.increment_files_failed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.finished is True
     assert meta_model.files_completed == 0
@@ -447,16 +447,16 @@ def test_mm_increment_files_failed(big_file):
 def test_mm_increment_files_failed_and_passed(big_file):
     """Test meta model both increment failed and passed files."""
     meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
-    big_file.reparse_meta_models.add(meta_model)
+    big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_files_completed(big_file.reparse_meta_models)
+    ReparseMeta.increment_files_completed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.finished is False
     assert meta_model.files_completed == 1
     assert meta_model.files_failed == 0
 
-    ReparseMeta.increment_files_failed(big_file.reparse_meta_models)
+    ReparseMeta.increment_files_failed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.finished is True
     assert meta_model.files_completed == 1
@@ -470,14 +470,14 @@ def test_mm_increment_files_failed_and_passed(big_file):
 def test_mm_increment_records_created(big_file):
     """Test meta model increment records created."""
     meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
-    big_file.reparse_meta_models.add(meta_model)
+    big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_records_created(big_file.reparse_meta_models, 500)
+    ReparseMeta.increment_records_created(big_file.reparses, 500)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.num_records_created == 500
 
-    ReparseMeta.increment_records_created(big_file.reparse_meta_models, 888)
+    ReparseMeta.increment_records_created(big_file.reparses, 888)
     meta_model = ReparseMeta.get_latest()
     assert meta_model.num_records_created == 1388
 

From 887334579a7058871101ae2cc17925ce5efaa5a6 Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Tue, 8 Oct 2024 13:56:11 -0400
Subject: [PATCH 07/19] implement reparse_file_meta

---
 tdrs-backend/tdpservice/parsers/parse.py      | 13 -------
 .../tdpservice/scheduling/parser_task.py      | 38 +++++++++++++++----
 .../management/commands/clean_and_reparse.py  |  2 +-
 3 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py
index fc2919266..47adeedcc 100644
--- a/tdrs-backend/tdpservice/parsers/parse.py
+++ b/tdrs-backend/tdpservice/parsers/parse.py
@@ -34,7 +34,6 @@ def parse_datafile(datafile, dfs):
         logger.info(f"Preparser Error: {len(header_errors)} header errors encountered.")
         errors['header'] = header_errors
         bulk_create_errors({1: header_errors}, 1, flush=True)
-        update_meta_model(datafile, dfs)
         return errors
     elif header_is_valid and len(header_errors) > 0:
         logger.info(f"Preparser Warning: {len(header_errors)} header warnings encountered.")
@@ -75,7 +74,6 @@ def parse_datafile(datafile, dfs):
                     f"({header['program_type']}) and FIPS Code ({field_values['state_fips']}).",)
         errors['header'] = [tribe_error]
         bulk_create_errors({1: [tribe_error]}, 1, flush=True)
-        update_meta_model(datafile, dfs)
         return errors
 
     # Ensure file section matches upload section
@@ -90,7 +88,6 @@ def parse_datafile(datafile, dfs):
         errors['document'] = [section_error]
         unsaved_parser_errors = {1: [section_error]}
         bulk_create_errors(unsaved_parser_errors, 1, flush=True)
-        update_meta_model(datafile, dfs)
         return errors
 
     rpt_month_year_is_valid, rpt_month_year_error = category1.validate_header_rpt_month_year(
@@ -103,7 +100,6 @@ def parse_datafile(datafile, dfs):
         errors['document'] = [rpt_month_year_error]
         unsaved_parser_errors = {1: [rpt_month_year_error]}
         bulk_create_errors(unsaved_parser_errors, 1, flush=True)
-        update_meta_model(datafile, dfs)
         return errors
 
     line_errors = parse_datafile_lines(datafile, dfs, program_type, section, is_encrypted, case_consistency_validator)
@@ -112,11 +108,6 @@ def parse_datafile(datafile, dfs):
 
     return errors
 
-def update_meta_model(datafile, dfs):
-    """Update appropriate meta models."""
-    ReparseMeta.increment_records_created(datafile.reparses, dfs.total_number_of_records_created)
-    ReparseMeta.increment_files_completed(datafile.reparses)
-
 def bulk_create_records(unsaved_records, line_number, header_count, datafile, dfs, flush=False):
     """Bulk create passed in records."""
     batch_size = settings.BULK_CREATE_BATCH_SIZE
@@ -385,7 +376,6 @@ def parse_datafile_lines(datafile, dfs, program_type, section, is_encrypted, cas
             rollback_records(unsaved_records.get_bulk_create_struct(), datafile)
             rollback_parser_errors(datafile)
             bulk_create_errors(preparse_error, num_errors, flush=True)
-            update_meta_model(datafile, dfs)
             return errors
 
         if prev_sum != header_count + trailer_count:
@@ -448,7 +438,6 @@ def parse_datafile_lines(datafile, dfs, program_type, section, is_encrypted, cas
         rollback_parser_errors(datafile)
         preparse_error = {line_number: [err_obj]}
         bulk_create_errors(preparse_error, num_errors, flush=True)
-        update_meta_model(datafile, dfs)
         return errors
 
     should_remove = validate_case_consistency(case_consistency_validator)
@@ -469,7 +458,6 @@ def parse_datafile_lines(datafile, dfs, program_type, section, is_encrypted, cas
         logger.error(f"Not all parsed records created for file: {datafile.id}!")
         rollback_records(unsaved_records.get_bulk_create_struct(), datafile)
         bulk_create_errors(unsaved_parser_errors, num_errors, flush=True)
-        update_meta_model(datafile, dfs)
         return errors
 
     # Add any generated cat4 errors to our error data structure & clear our caches errors list
@@ -486,7 +474,6 @@ def parse_datafile_lines(datafile, dfs, program_type, section, is_encrypted, cas
                  f"validated {case_consistency_validator.total_cases_validated} of them.")
     dfs.save()
 
-    update_meta_model(datafile, dfs)
 
     return errors
 
diff --git a/tdrs-backend/tdpservice/scheduling/parser_task.py b/tdrs-backend/tdpservice/scheduling/parser_task.py
index 219da9916..65ea8fe95 100644
--- a/tdrs-backend/tdpservice/scheduling/parser_task.py
+++ b/tdrs-backend/tdpservice/scheduling/parser_task.py
@@ -2,23 +2,23 @@
 from __future__ import absolute_import
 from celery import shared_task
 import logging
+from django.utils import timezone
 from django.contrib.auth.models import Group
 from django.db.utils import DatabaseError
 from tdpservice.users.models import AccountApprovalStatusChoices, User
-from tdpservice.data_files.models import DataFile
+from tdpservice.data_files.models import DataFile, ReparseFileMeta
 from tdpservice.parsers.parse import parse_datafile
-from tdpservice.parsers.models import DataFileSummary, ParserErrorCategoryChoices
+from tdpservice.parsers.models import DataFileSummary, ParserErrorCategoryChoices, ParserError
 from tdpservice.parsers.aggregates import case_aggregates_by_month, total_errors_by_month
 from tdpservice.parsers.util import log_parser_exception, make_generate_parser_error
 from tdpservice.email.helpers.data_file import send_data_submitted_email
-from tdpservice.search_indexes.models.reparse_meta import ReparseMeta
 
 
 logger = logging.getLogger(__name__)
 
 
 @shared_task
-def parse(data_file_id, should_send_submission_email=True):
+def parse(data_file_id, reparse_id=None):
     """Send data file for processing."""
     # passing the data file FileField across redis was rendering non-serializable failures, doing the below lookup
     # to avoid those. I suppose good practice to not store/serializer large file contents in memory when stored in redis
@@ -27,6 +27,12 @@ def parse(data_file_id, should_send_submission_email=True):
         data_file = DataFile.objects.get(id=data_file_id)
         logger.info(f"DataFile parsing started for file {data_file.filename}")
 
+        file_meta = None
+        if reparse_id:
+            file_meta = ReparseFileMeta.objects.get(data_file_id=data_file_id, reparse_meta_id=reparse_id)
+            file_meta.started_at = timezone.now()
+            file_meta.save()
+
         dfs = DataFileSummary.objects.create(datafile=data_file, status=DataFileSummary.Status.PENDING)
         errors = parse_datafile(data_file, dfs)
         dfs.status = dfs.get_status()
@@ -41,7 +47,17 @@ def parse(data_file_id, should_send_submission_email=True):
         logger.info(f"Parsing finished for file -> {repr(data_file)} with status "
                     f"{dfs.status} and {len(errors)} errors.")
 
-        if should_send_submission_email is True:
+        if reparse_id is not None:
+            file_meta.num_records_created = dfs.total_number_of_records_created
+            file_meta.cat_4_errors_generated = ParserError.objects.filter(
+                file_id=data_file_id,
+                error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY
+            ).count()
+            file_meta.finished = True
+            file_meta.success = True
+            file_meta.finished_at = timezone.now()
+            file_meta.save()
+        else:
             recipients = User.objects.filter(
                 stt=data_file.stt,
                 account_approval_status=AccountApprovalStatusChoices.APPROVED,
@@ -54,7 +70,11 @@ def parse(data_file_id, should_send_submission_email=True):
                              f"Encountered Database exception in parser_task.py: \n{e}",
                              "error"
                              )
-        ReparseMeta.increment_files_failed(data_file.reparses)
+        if reparse_id:
+            file_meta.finished = True
+            file_meta.success = False
+            file_meta.finished_at = timezone.now()
+            file_meta.save()
     except Exception as e:
         generate_error = make_generate_parser_error(data_file, None)
         error = generate_error(schema=None,
@@ -72,4 +92,8 @@ def parse(data_file_id, should_send_submission_email=True):
                              (f"Uncaught exception while parsing datafile: {data_file.pk}! Please review the logs to "
                               f"see if manual intervention is required. Exception: \n{e}"),
                              "critical")
-        ReparseMeta.increment_files_failed(data_file.reparses)
+        if reparse_id:
+            file_meta.finished = True
+            file_meta.success = False
+            file_meta.finished_at = timezone.now()
+            file_meta.save()
diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
index 172bf1a6c..286bcccf8 100644
--- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
@@ -184,7 +184,7 @@ def _handle_datafiles(self, files, meta_model, log_context):
             try:
                 file.reparses.add(meta_model)
                 file.save()
-                parser_task.parse.delay(file.pk, should_send_submission_email=False)
+                parser_task.parse.delay(file.pk, reparse_id=meta_model.pk)
             except DatabaseError as e:
                 log('Encountered a DatabaseError while re-creating datafiles. The database '
                     'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!',

From 101dd3fe7c61fb645830924a701eca481bcd343b Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Tue, 8 Oct 2024 13:56:32 -0400
Subject: [PATCH 08/19] remove unused fields on meta model

---
 tdrs-backend/tdpservice/data_files/models.py  |   5 -
 .../search_indexes/admin/reparse_meta.py      |  25 +++-
 .../migrations/0032_auto_20241008_1745.py     |  37 +++++
 .../search_indexes/models/reparse_meta.py     | 135 ++++++------------
 4 files changed, 100 insertions(+), 102 deletions(-)
 create mode 100644 tdrs-backend/tdpservice/search_indexes/migrations/0032_auto_20241008_1745.py

diff --git a/tdrs-backend/tdpservice/data_files/models.py b/tdrs-backend/tdpservice/data_files/models.py
index a892f21c9..b4444a6a2 100644
--- a/tdrs-backend/tdpservice/data_files/models.py
+++ b/tdrs-backend/tdpservice/data_files/models.py
@@ -167,11 +167,6 @@ class Meta:
                                         null=True
                                         )
 
-    # reparse_meta_models = models.ManyToManyField("search_indexes.ReparseMeta",
-    #                                              help_text="Reparse events this file has been associated with.",
-    #                                              related_name="datafiles"
-    #                                              )
-
     reparses = models.ManyToManyField(
         "search_indexes.ReparseMeta",
         through="data_files.ReparseFileMeta",
diff --git a/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
index f030501f8..4b902bc26 100644
--- a/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
@@ -8,18 +8,35 @@ class ReparseMetaAdmin(ReadOnlyAdminMixin):
 
     inlines = [DataFileInline]
 
+    def reparse_is_finished(self, instance):
+        return instance.is_finished
+    reparse_is_finished.boolean = True
+
+    def reparse_is_success(self, instance):
+        return instance.is_success
+    reparse_is_success.boolean = True
+
     list_display = [
         'id',
         'created_at',
         'timeout_at',
-        'success',
-        'finished',
+        'reparse_is_finished',
+        'reparse_is_success',
         'db_backup_location',
     ]
 
     list_filter = [
-        'success',
-        'finished',
         'fiscal_year',
         'fiscal_quarter',
     ]
+
+    readonly_fields = [
+        'reparse_is_finished',
+        'reparse_is_success',
+        'finished_at',
+        'num_files',
+        'num_files_completed',
+        'num_files_succeeded',
+        'num_files_failed',
+        'num_records_created',
+    ]
diff --git a/tdrs-backend/tdpservice/search_indexes/migrations/0032_auto_20241008_1745.py b/tdrs-backend/tdpservice/search_indexes/migrations/0032_auto_20241008_1745.py
new file mode 100644
index 000000000..4724f0a3f
--- /dev/null
+++ b/tdrs-backend/tdpservice/search_indexes/migrations/0032_auto_20241008_1745.py
@@ -0,0 +1,37 @@
+# Generated by Django 3.2.15 on 2024-10-08 17:45
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('search_indexes', '0031_alter_tribal_tanf_t4_closure_reason'),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='reparsemeta',
+            name='files_completed',
+        ),
+        migrations.RemoveField(
+            model_name='reparsemeta',
+            name='files_failed',
+        ),
+        migrations.RemoveField(
+            model_name='reparsemeta',
+            name='finished',
+        ),
+        migrations.RemoveField(
+            model_name='reparsemeta',
+            name='num_files_to_reparse',
+        ),
+        migrations.RemoveField(
+            model_name='reparsemeta',
+            name='num_records_created',
+        ),
+        migrations.RemoveField(
+            model_name='reparsemeta',
+            name='success',
+        ),
+    ]
diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
index 91ff067b1..c7c9b2f08 100644
--- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
@@ -12,7 +12,6 @@
 class ReparseMeta(models.Model):
     """
     Meta data model representing a single execution of `clean_and_reparse`.
-
     Because this model is intended to be queried in a distributed and parrallel fashion, all queries should rely on
     database level locking to ensure race conditions aren't introduced. See `increment_files_reparsed` for an example.
     """
@@ -25,129 +24,79 @@ class Meta:
     created_at = models.DateTimeField(auto_now_add=True)
     timeout_at = models.DateTimeField(auto_now_add=False, null=True)
 
-    finished = models.BooleanField(default=False)
-    success = models.BooleanField(default=False, help_text="All files completed parsing.")
-
-    num_files_to_reparse = models.PositiveIntegerField(default=0)
-    files_completed = models.PositiveIntegerField(default=0)
-    files_failed = models.PositiveIntegerField(default=0)
-
     num_records_deleted = models.PositiveIntegerField(default=0)
-    num_records_created = models.PositiveIntegerField(default=0)
-
     total_num_records_initial = models.PositiveBigIntegerField(default=0)
     total_num_records_post = models.PositiveBigIntegerField(default=0)
 
     db_backup_location = models.CharField(max_length=512)
 
-    # Options used to select the files to reparse
+    # Options used to select the files to reparse (from mgmt cmd only, remove if command deprecated)
     fiscal_quarter = models.CharField(max_length=2, null=True)
     fiscal_year = models.PositiveIntegerField(null=True)
     all = models.BooleanField(default=False)
     new_indices = models.BooleanField(default=False)
     delete_old_indices = models.BooleanField(default=False)
 
+    @property
+    def is_finished(self):
+        return all([r.finished for r in self.reparse_file_metas.all()])
+
+    @property
+    def is_success(self):
+        return all([r.success for r in self.reparse_file_metas.all()])
+
+    @property
+    def finished_at(self):
+        last_parse = self.reparse_file_metas.order_by('-finished_at').first()
+        return last_parse.finished_at if last_parse else None
+
+    @property
+    def num_files(self):
+        return self.reparse_file_metas.count()
+
+    @property
+    def num_files_completed(self):
+        return self.reparse_file_metas.filter(finished=True).count()
+
+    @property
+    def num_files_succeeded(self):
+        return self.reparse_file_metas.filter(finished=True, success=True).count()
+
+    @property
+    def num_files_failed(self):
+        return self.reparse_file_metas.filter(finished=True, success=False).count()
+
+    @property
+    def num_records_created(self):
+        return sum([r.num_records_created for r in self.reparse_file_metas.all()])
+
+
+    # remove unused statics or change to utils funcs in own app and/or make new cleanup ticket for future
+
     @staticmethod
     def file_counts_match(meta_model):
         """
         Check whether the file counts match.
-
         This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row
         containing this model has not been locked the caller will experience race issues.
         """
         print("\n\nINSIDE FILE COUNTS MATCH:")
-        print(f"{meta_model.num_files_to_reparse }, {meta_model.files_completed}, {meta_model.files_failed}\n\n")
-        return (meta_model.files_completed == meta_model.num_files_to_reparse or
-                meta_model.files_completed + meta_model.files_failed ==
-                meta_model.num_files_to_reparse or
-                meta_model.files_failed == meta_model.num_files_to_reparse)
+        print(f"{meta_model.num_files }, {meta_model.num_files_completed}, {meta_model.num_files_failed}\n\n")
+        return (meta_model.num_files_completed == meta_model.num_files or
+                meta_model.num_files_completed + meta_model.num_files_failed ==
+                meta_model.num_files or meta_model.num_files_failed == meta_model.num_files)
 
     @staticmethod
     def assert_all_files_done(meta_model):
         """
         Check if all files have been parsed with or without exceptions.
-
         This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row
         containing this model has not been locked the caller will experience race issues.
         """
-        if meta_model.finished and ReparseMeta.file_counts_match(meta_model):
+        if meta_model.is_finished and ReparseMeta.file_counts_match(meta_model):
             return True
         return False
 
-    @staticmethod
-    def set_reparse_finished(meta_model):
-        """
-        Set status/completion fields to appropriate values.
-
-        This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row
-        containing this model has not been locked the caller will experience race issues.
-        """
-        meta_model.finished = True
-        meta_model.success = meta_model.files_completed == meta_model.num_files_to_reparse
-        meta_model.total_num_records_post = count_all_records()
-        meta_model.save()
-
-    @staticmethod
-    def increment_files_completed(reparses):
-        """
-        Increment the count of files that have completed parsing for the datafile's current/latest reparse model.
-
-        Because this function can be called in parallel we use `select_for_update` because multiple parse tasks can
-        referrence the same ReparseMeta object that is being queried below. `select_for_update` provides a DB lock on
-        the object and forces other transactions on the object to wait until this one completes.
-        """
-        if reparses.exists():
-            with transaction.atomic():
-                try:
-                    meta_model = reparses.select_for_update().latest("pk")
-                    meta_model.files_completed += 1
-                    if ReparseMeta.file_counts_match(meta_model):
-                        ReparseMeta.set_reparse_finished(meta_model)
-                    meta_model.save()
-                except DatabaseError:
-                    logger.exception("Encountered exception while trying to update the `files_reparsed` field on the "
-                                     f"ReparseMeta object with ID: {meta_model.pk}.")
-
-    @staticmethod
-    def increment_files_failed(reparses):
-        """
-        Increment the count of files that failed parsing for the datafile's current/latest reparse meta model.
-
-        Because this function can be called in parallel we use `select_for_update` because multiple parse tasks can
-        referrence the same ReparseMeta object that is being queried below. `select_for_update` provides a DB lock on
-        the object and forces other transactions on the object to wait until this one completes.
-        """
-        if reparses.exists():
-            with transaction.atomic():
-                try:
-                    meta_model = reparses.select_for_update().latest("pk")
-                    meta_model.files_failed += 1
-                    if ReparseMeta.file_counts_match(meta_model):
-                        ReparseMeta.set_reparse_finished(meta_model)
-                    meta_model.save()
-                except DatabaseError:
-                    logger.exception("Encountered exception while trying to update the `files_failed` field on the "
-                                     f"ReparseMeta object with ID: {meta_model.pk}.")
-
-    @staticmethod
-    def increment_records_created(reparses, num_created):
-        """
-        Increment the count of records created for the datafile's current/latest reparse meta model.
-
-        Because this function can be called in parallel we use `select_for_update` because multiple parse tasks can
-        referrence the same ReparseMeta object that is being queried below. `select_for_update` provides a DB lock on
-        the object and forces other transactions on the object to wait until this one completes.
-        """
-        if reparses.exists():
-            with transaction.atomic():
-                try:
-                    meta_model = reparses.select_for_update().latest("pk")
-                    meta_model.num_records_created += num_created
-                    meta_model.save()
-                except DatabaseError:
-                    logger.exception("Encountered exception while trying to update the `files_failed` field on the "
-                                     f"ReparseMeta object with ID: {meta_model.pk}.")
-
     @staticmethod
     def get_latest():
         """Get the ReparseMeta model with the greatest pk."""

From d497562a48bdd89fe446c1b4529529c1e99f4349 Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Tue, 8 Oct 2024 14:04:56 -0400
Subject: [PATCH 09/19] rm comment

---
 tdrs-backend/tdpservice/data_files/models.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tdrs-backend/tdpservice/data_files/models.py b/tdrs-backend/tdpservice/data_files/models.py
index b4444a6a2..e2cc293de 100644
--- a/tdrs-backend/tdpservice/data_files/models.py
+++ b/tdrs-backend/tdpservice/data_files/models.py
@@ -89,7 +89,6 @@ class ReparseFileMeta(models.Model):
     started_at = models.DateTimeField(auto_now_add=False, null=True)  # set at beg of parse run
     finished_at = models.DateTimeField(auto_now_add=False, null=True)
 
-    # num_records_deleted = models.PositiveIntegerField(default=0)
     num_records_created = models.PositiveIntegerField(default=0)
     cat_4_errors_generated = models.PositiveIntegerField(default=0)
 

From 45e0306de166bc729f543017bb4356a288389de1 Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Tue, 8 Oct 2024 16:07:14 -0400
Subject: [PATCH 10/19] fix tests

---
 tdrs-backend/tdpservice/data_files/tasks.py   |   4 +-
 .../data_files/test/test_stuck_files.py       |  50 ++++---
 .../tdpservice/parsers/test/factories.py      |   5 -
 .../management/commands/clean_and_reparse.py  |   3 +-
 .../management/commands/tdp_search_index.py   |   2 +-
 .../search_indexes/test/test_reparse.py       | 125 ++++++++----------
 .../tdpservice/users/test/test_permissions.py |   3 +
 7 files changed, 87 insertions(+), 105 deletions(-)

diff --git a/tdrs-backend/tdpservice/data_files/tasks.py b/tdrs-backend/tdpservice/data_files/tasks.py
index 4beb1fe67..0ea5446af 100644
--- a/tdrs-backend/tdpservice/data_files/tasks.py
+++ b/tdrs-backend/tdpservice/data_files/tasks.py
@@ -23,8 +23,8 @@ def get_stuck_files():
         Q(
             reparse_count__gt=0,
             reparses__timeout_at__lte=timezone.now(),
-            reparses__finished=False,
-            reparses__success=False
+            reparse_file_metas__finished=False,
+            reparse_file_metas__success=False
         )
     ).filter(
         # where there is NO summary or the summary is in PENDING status
diff --git a/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py b/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
index fb4242834..10a480ec4 100644
--- a/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
+++ b/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
@@ -31,12 +31,10 @@ def make_summary(datafile, status):
     )
 
 
-def make_reparse_meta(finished, success):
+def make_reparse_meta():
     """Create a test reparse meta model."""
     return ReparseMetaFactory.create(
-        timeout_at=_time_ago(hours=1),
-        finished=finished,
-        success=success
+        timeout_at=_time_ago(hours=1)
     )
 
 
@@ -54,8 +52,8 @@ def test_find_pending_submissions__none_stuck(stt_user, stt):
     df2.created_at = _time_ago(hours=1)
     df2.save()
     make_summary(df2, DataFileSummary.Status.ACCEPTED)
-    rpm = make_reparse_meta(True, True)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': True, 'success': True})
 
     # a pending standard submission, less than an hour old
     df3 = make_datafile(stt_user, stt, 3)
@@ -81,8 +79,8 @@ def test_find_pending_submissions__non_reparse_stuck(stt_user, stt):
     df2.created_at = _time_ago(hours=1)
     df2.save()
     make_summary(df2, DataFileSummary.Status.ACCEPTED)
-    rpm = make_reparse_meta(True, True)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': True, 'success': True})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -102,8 +100,8 @@ def test_find_pending_submissions__non_reparse_stuck__no_dfs(stt_user, stt):
     df2.created_at = _time_ago(hours=1)
     df2.save()
     make_summary(df2, DataFileSummary.Status.ACCEPTED)
-    rpm = make_reparse_meta(True, True)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': True, 'success': True})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -124,8 +122,8 @@ def test_find_pending_submissions__reparse_stuck(stt_user, stt):
     df2.created_at = _time_ago(hours=1)
     df2.save()
     make_summary(df2, DataFileSummary.Status.PENDING)
-    rpm = make_reparse_meta(False, False)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': False, 'success': False})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -145,8 +143,8 @@ def test_find_pending_submissions__reparse_stuck__no_dfs(stt_user, stt):
     df2 = make_datafile(stt_user, stt, 2)
     df2.created_at = _time_ago(hours=1)
     df2.save()
-    rpm = make_reparse_meta(False, False)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': False, 'success': False})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -167,8 +165,8 @@ def test_find_pending_submissions__reparse_and_non_reparse_stuck(stt_user, stt):
     df2.created_at = _time_ago(hours=1)
     df2.save()
     make_summary(df2, DataFileSummary.Status.PENDING)
-    rpm = make_reparse_meta(False, False)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': False, 'success': False})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 2
@@ -188,8 +186,8 @@ def test_find_pending_submissions__reparse_and_non_reparse_stuck_no_dfs(stt_user
     df2 = make_datafile(stt_user, stt, 2)
     df2.created_at = _time_ago(hours=1)
     df2.save()
-    rpm = make_reparse_meta(False, False)
-    df2.reparses.add(rpm)
+    rpm = make_reparse_meta()
+    df2.reparses.add(rpm, through_defaults={'finished': False, 'success': False})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 2
@@ -207,8 +205,8 @@ def test_find_pending_submissions__old_reparse_stuck__new_not_stuck(stt_user, st
     dfs1 = make_summary(df1, DataFileSummary.Status.PENDING)
 
     # reparse fails the first time
-    rpm1 = make_reparse_meta(False, False)
-    df1.reparses.add(rpm1)
+    rpm1 = make_reparse_meta()
+    df1.reparses.add(rpm1, through_defaults={'finished': False, 'success': False})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
@@ -217,8 +215,8 @@ def test_find_pending_submissions__old_reparse_stuck__new_not_stuck(stt_user, st
     dfs1.delete()  # reparse deletes the original dfs and creates the new one
     make_summary(df1, DataFileSummary.Status.ACCEPTED)
 
-    rpm2 = make_reparse_meta(True, True)
-    df1.reparses.add(rpm2)
+    rpm2 = make_reparse_meta()
+    df1.reparses.add(rpm2, through_defaults={'finished': True, 'success': True})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 0
@@ -234,8 +232,8 @@ def test_find_pending_submissions__new_reparse_stuck__old_not_stuck(stt_user, st
     dfs1 = make_summary(df1, DataFileSummary.Status.REJECTED)
 
     # reparse succeeds
-    rpm1 = make_reparse_meta(True, True)
-    df1.reparses.add(rpm1)
+    rpm1 = make_reparse_meta()
+    df1.reparses.add(rpm1, through_defaults={'finished': True, 'success': True})
 
     # reparse again, fails this time
     dfs1.delete()  # reparse deletes the original dfs and creates the new one
@@ -244,8 +242,8 @@ def test_find_pending_submissions__new_reparse_stuck__old_not_stuck(stt_user, st
         status=DataFileSummary.Status.PENDING,
     )
 
-    rpm2 = make_reparse_meta(False, False)
-    df1.reparses.add(rpm2)
+    rpm2 = make_reparse_meta()
+    df1.reparses.add(rpm2, through_defaults={'finished': False, 'success': False})
 
     stuck_files = get_stuck_files()
     assert stuck_files.count() == 1
diff --git a/tdrs-backend/tdpservice/parsers/test/factories.py b/tdrs-backend/tdpservice/parsers/test/factories.py
index c0f50e85b..5b952d02d 100644
--- a/tdrs-backend/tdpservice/parsers/test/factories.py
+++ b/tdrs-backend/tdpservice/parsers/test/factories.py
@@ -17,11 +17,6 @@ class Meta:
         model = "search_indexes.ReparseMeta"
 
     timeout_at = timezone.now()
-    finished = False
-    success = False
-    num_files_to_reparse = 1
-    files_completed = 1
-    files_failed = 0
 
 
 class ParsingFileFactory(factory.django.DjangoModelFactory):
diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
index 286bcccf8..48d4cf3fe 100644
--- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py
@@ -341,8 +341,7 @@ def handle(self, *args, **options):
                                                 fiscal_year=fiscal_year,
                                                 all=reparse_all,
                                                 new_indices=new_indices,
-                                                delete_old_indices=new_indices,
-                                                num_files_to_reparse=num_files)
+                                                delete_old_indices=new_indices)
 
         # Backup the Postgres DB
         backup_file_name += f"_rpv{meta_model.pk}.pg"
diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py b/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py
index a531ae558..c14a302a1 100644
--- a/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py
+++ b/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py
@@ -31,7 +31,7 @@ def __get_log_context(self):
 
     def __get_index_suffix(self):
         meta_model = ReparseMeta.get_latest()
-        if meta_model is not None and not meta_model.finished:
+        if meta_model is not None and not meta_model.is_finished:
             return f"_rpv{meta_model.pk}"
         fmt = "%Y-%m-%d_%H.%M.%S"
         return f"_{datetime.now().strftime(fmt)}"
diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
index 118591a47..45e6d631b 100644
--- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
@@ -6,6 +6,7 @@
 from tdpservice.search_indexes.management.commands import clean_and_reparse
 from tdpservice.search_indexes.models.reparse_meta import ReparseMeta
 from tdpservice.users.models import User
+from tdpservice.data_files.models import ReparseFileMeta
 
 from django.contrib.admin.models import LogEntry, ADDITION
 from django.db.utils import DatabaseError
@@ -265,7 +266,7 @@ def test_reparse_dunce():
     assert ReparseMeta.objects.count() == 0
 
 @pytest.mark.django_db
-def test_reparse_sequential(log_context):
+def test_reparse_sequential(log_context, big_file):
     """Test reparse _assert_sequential_execution."""
     cmd = clean_and_reparse.Command()
     assert True is cmd._assert_sequential_execution(log_context)
@@ -278,6 +279,7 @@ def test_reparse_sequential(log_context):
         "safely execute reparse, please fix manually."
     )
 
+    big_file.reparses.add(meta)
     meta.timeout_at = timezone.now() + timedelta(seconds=100)
     meta.save()
     assert False is cmd._assert_sequential_execution(log_context)
@@ -287,6 +289,7 @@ def test_reparse_sequential(log_context):
 
     meta.timeout_at = timezone.now()
     meta.save()
+
     assert True is cmd._assert_sequential_execution(log_context)
     timeout_entry = LogEntry.objects.latest('pk')
     assert timeout_entry.change_message == ("Previous reparse has exceeded the timeout. Allowing "
@@ -308,7 +311,7 @@ def test_reparse_quarter_and_year(mocker, dfs, cat4_edge_case_file, big_file, sm
     cmd.handle(**opts)
 
     latest = ReparseMeta.objects.select_for_update().latest("pk")
-    assert latest.num_files_to_reparse == 1
+    assert latest.num_files == 1
     assert latest.num_records_deleted == 3073
 
 @pytest.mark.django_db()
@@ -327,7 +330,7 @@ def test_reparse_quarter(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_s
     cmd.handle(**opts)
 
     latest = ReparseMeta.objects.select_for_update().latest("pk")
-    assert latest.num_files_to_reparse == 4
+    assert latest.num_files == 4
     assert latest.num_records_deleted == 3104
 
 @pytest.mark.django_db()
@@ -346,7 +349,7 @@ def test_reparse_year(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_sect
     cmd.handle(**opts)
 
     latest = ReparseMeta.objects.select_for_update().latest("pk")
-    assert latest.num_files_to_reparse == 2
+    assert latest.num_files == 2
     assert latest.num_records_deleted == 27
 
 @pytest.mark.django_db()
@@ -365,7 +368,7 @@ def test_reparse_all(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_secti
     cmd.handle(**opts)
 
     latest = ReparseMeta.objects.select_for_update().latest("pk")
-    assert latest.num_files_to_reparse == 4
+    assert latest.num_files == 4
     assert latest.num_records_deleted == 3104
 
 @pytest.mark.django_db()
@@ -387,97 +390,85 @@ def test_reparse_no_files(mocker):
                                                             "Quarter: Q1-4. Nothing to do.")
 
 @pytest.mark.django_db()
-def test_mm_all_files_done():
+def test_mm_all_files_done(big_file):
     """Test meta model all files done."""
     meta_model = ReparseMeta.objects.create()
+    big_file.reparses.add(meta_model)
     assert ReparseMeta.assert_all_files_done(meta_model) is False
 
-    meta_model.finished = True
-    meta_model.files_completed = 1
-    meta_model.num_files_to_reparse = 1
+    fm = ReparseFileMeta.objects.get(data_file_id=big_file.pk, reparse_meta_id=meta_model.pk)
+    fm.finished = True
+    fm.save()
     assert ReparseMeta.assert_all_files_done(meta_model) is True
 
 @pytest.mark.django_db()
-def test_mm_increment_files_completed(big_file):
+def test_mm_files_completed(big_file):
     """Test meta model increment files completed."""
-    meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
+    meta_model = ReparseMeta.objects.create(all=True)
     big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_files_completed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
-    assert meta_model.finished is False
-    assert meta_model.files_completed == 1
-    assert meta_model.files_failed == 0
+    assert meta_model.is_finished is False
+    assert meta_model.num_files == 1
+    assert meta_model.num_files_completed == 0
+    assert meta_model.num_files_failed == 0
+    assert ReparseMeta.assert_all_files_done(meta_model) is False
 
-    ReparseMeta.increment_files_completed(big_file.reparses)
+    fm = ReparseFileMeta.objects.get(data_file_id=big_file.pk, reparse_meta_id=meta_model.pk)
+    fm.finished = True
+    fm.success = True
+    fm.save()
     meta_model = ReparseMeta.get_latest()
-    assert meta_model.finished is True
-    assert meta_model.files_completed == 2
-    assert meta_model.files_failed == 0
+    assert meta_model.is_finished is True
+    assert meta_model.num_files == 1
+    assert meta_model.num_files_completed == 1
+    assert meta_model.num_files_failed == 0
 
-    assert meta_model.success is True
+    assert meta_model.is_success is True
 
     assert ReparseMeta.assert_all_files_done(meta_model) is True
 
 @pytest.mark.django_db()
-def test_mm_increment_files_failed(big_file):
+def test_mm_files_failed(big_file):
     """Test meta model increment files failed."""
-    meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
-    big_file.reparses.add(meta_model)
-    big_file.save()
-
-    ReparseMeta.increment_files_failed(big_file.reparses)
-    meta_model = ReparseMeta.get_latest()
-    assert meta_model.finished is False
-    assert meta_model.files_completed == 0
-    assert meta_model.files_failed == 1
-
-    ReparseMeta.increment_files_failed(big_file.reparses)
-    meta_model = ReparseMeta.get_latest()
-    assert meta_model.finished is True
-    assert meta_model.files_completed == 0
-    assert meta_model.files_failed == 2
-
-    assert meta_model.success is False
-
-    assert ReparseMeta.assert_all_files_done(meta_model) is True
-
-@pytest.mark.django_db()
-def test_mm_increment_files_failed_and_passed(big_file):
-    """Test meta model both increment failed and passed files."""
-    meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
+    meta_model = ReparseMeta.objects.create(all=True)
     big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_files_completed(big_file.reparses)
     meta_model = ReparseMeta.get_latest()
-    assert meta_model.finished is False
-    assert meta_model.files_completed == 1
-    assert meta_model.files_failed == 0
+    assert meta_model.is_finished is False
+    assert meta_model.num_files_completed == 0
+    assert meta_model.num_files_failed == 0
+    assert ReparseMeta.assert_all_files_done(meta_model) is False
 
-    ReparseMeta.increment_files_failed(big_file.reparses)
+    fm = ReparseFileMeta.objects.get(data_file_id=big_file.pk, reparse_meta_id=meta_model.pk)
+    fm.finished = True
+    fm.save()
     meta_model = ReparseMeta.get_latest()
-    assert meta_model.finished is True
-    assert meta_model.files_completed == 1
-    assert meta_model.files_failed == 1
+    assert meta_model.is_finished is True
+    assert meta_model.num_files_completed == 1
+    assert meta_model.num_files_failed == 1
 
-    assert meta_model.success is False
+    assert meta_model.is_success is False
 
     assert ReparseMeta.assert_all_files_done(meta_model) is True
 
 @pytest.mark.django_db()
 def test_mm_increment_records_created(big_file):
     """Test meta model increment records created."""
-    meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True)
+    meta_model = ReparseMeta.objects.create(all=True)
     big_file.reparses.add(meta_model)
     big_file.save()
 
-    ReparseMeta.increment_records_created(big_file.reparses, 500)
     meta_model = ReparseMeta.get_latest()
-    assert meta_model.num_records_created == 500
+    assert meta_model.num_records_created == 0
 
-    ReparseMeta.increment_records_created(big_file.reparses, 888)
+    fm = ReparseFileMeta.objects.get(data_file_id=big_file.pk, reparse_meta_id=meta_model.pk)
+    fm.finished = True
+    fm.success = True
+    fm.num_records_created = 1388
+    fm.save()
     meta_model = ReparseMeta.get_latest()
     assert meta_model.num_records_created == 1388
 
@@ -492,18 +483,14 @@ def test_mm_get_latest():
     assert ReparseMeta.get_latest() != meta1
 
 @pytest.mark.django_db()
-def test_mm_file_counts_match():
+def test_mm_file_counts_match(big_file):
     """Test meta model file counts match."""
-    meta_model = ReparseMeta.objects.create(num_files_to_reparse=2)
+    meta_model = ReparseMeta.objects.create()
+    big_file.reparses.add(meta_model)
+    big_file.save()
     assert ReparseMeta.file_counts_match(meta_model) is False
 
-    meta_model.files_completed = 2
-    assert ReparseMeta.file_counts_match(meta_model) is True
-
-    meta_model.files_completed = 0
-    meta_model.files_failed = 2
-    assert ReparseMeta.file_counts_match(meta_model) is True
-
-    meta_model.files_completed = 1
-    meta_model.files_failed = 1
+    fm = ReparseFileMeta.objects.get(data_file_id=big_file.pk, reparse_meta_id=meta_model.pk)
+    fm.finished = True
+    fm.save()
     assert ReparseMeta.file_counts_match(meta_model) is True
diff --git a/tdrs-backend/tdpservice/users/test/test_permissions.py b/tdrs-backend/tdpservice/users/test/test_permissions.py
index ae53b3cda..f1b3847ad 100644
--- a/tdrs-backend/tdpservice/users/test/test_permissions.py
+++ b/tdrs-backend/tdpservice/users/test/test_permissions.py
@@ -159,6 +159,9 @@ def test_ofa_system_admin_permissions(ofa_system_admin):
         'search_indexes.add_reparsemeta',
         'search_indexes.view_reparsemeta',
         'search_indexes.change_reparsemeta',
+        'data_files.add_reparsefilemeta',
+        'data_files.view_reparsefilemeta',
+        'data_files.change_reparsefilemeta',
     }
     group_permissions = ofa_system_admin.get_group_permissions()
     assert group_permissions == expected_permissions

From ae86249c0847b2dbbb9d0c816c997ec3ff86b490 Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Tue, 8 Oct 2024 16:33:23 -0400
Subject: [PATCH 11/19] lint

---
 tdrs-backend/tdpservice/data_files/models.py     |  9 +++++++--
 tdrs-backend/tdpservice/parsers/parse.py         |  2 --
 .../search_indexes/admin/reparse_meta.py         |  2 ++
 .../search_indexes/models/reparse_meta.py        | 16 ++++++++++++----
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tdrs-backend/tdpservice/data_files/models.py b/tdrs-backend/tdpservice/data_files/models.py
index e2cc293de..1ba6bfb35 100644
--- a/tdrs-backend/tdpservice/data_files/models.py
+++ b/tdrs-backend/tdpservice/data_files/models.py
@@ -81,12 +81,17 @@ class Meta:
 
 class ReparseFileMeta(models.Model):
     """Meta data model representing a single file parse within a reparse execution."""
+
     data_file = models.ForeignKey('data_files.DataFile', on_delete=models.CASCADE, related_name='reparse_file_metas')
-    reparse_meta = models.ForeignKey('search_indexes.ReparseMeta', on_delete=models.CASCADE, related_name='reparse_file_metas')
+    reparse_meta = models.ForeignKey(
+        'search_indexes.ReparseMeta',
+        on_delete=models.CASCADE,
+        related_name='reparse_file_metas'
+    )
 
     finished = models.BooleanField(default=False)
     success = models.BooleanField(default=False)
-    started_at = models.DateTimeField(auto_now_add=False, null=True)  # set at beg of parse run
+    started_at = models.DateTimeField(auto_now_add=False, null=True)
     finished_at = models.DateTimeField(auto_now_add=False, null=True)
 
     num_records_created = models.PositiveIntegerField(default=0)
diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py
index 47adeedcc..187787745 100644
--- a/tdrs-backend/tdpservice/parsers/parse.py
+++ b/tdrs-backend/tdpservice/parsers/parse.py
@@ -13,7 +13,6 @@
 from tdpservice.parsers.schema_defs.utils import get_section_reference, get_program_model
 from tdpservice.parsers.case_consistency_validator import CaseConsistencyValidator
 from tdpservice.parsers.util import log_parser_exception
-from tdpservice.search_indexes.models.reparse_meta import ReparseMeta
 
 logger = logging.getLogger(__name__)
 
@@ -474,7 +473,6 @@ def parse_datafile_lines(datafile, dfs, program_type, section, is_encrypted, cas
                  f"validated {case_consistency_validator.total_cases_validated} of them.")
     dfs.save()
 
-
     return errors
 
 
diff --git a/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
index 4b902bc26..98095de08 100644
--- a/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
@@ -9,10 +9,12 @@ class ReparseMetaAdmin(ReadOnlyAdminMixin):
     inlines = [DataFileInline]
 
     def reparse_is_finished(self, instance):
+        """Overload instance property for ui checkboxes."""
         return instance.is_finished
     reparse_is_finished.boolean = True
 
     def reparse_is_success(self, instance):
+        """Overload instance property for ui checkboxes."""
         return instance.is_success
     reparse_is_success.boolean = True
 
diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
index c7c9b2f08..ee882e8b8 100644
--- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
@@ -1,9 +1,7 @@
 """Meta data model for tracking reparsed files."""
 
-from django.db import models, transaction
-from django.db.utils import DatabaseError
+from django.db import models
 from django.db.models import Max
-from tdpservice.search_indexes.util import count_all_records
 import logging
 
 logger = logging.getLogger(__name__)
@@ -12,6 +10,7 @@
 class ReparseMeta(models.Model):
     """
     Meta data model representing a single execution of `clean_and_reparse`.
+
     Because this model is intended to be queried in a distributed and parrallel fashion, all queries should rely on
     database level locking to ensure race conditions aren't introduced. See `increment_files_reparsed` for an example.
     """
@@ -39,44 +38,52 @@ class Meta:
 
     @property
     def is_finished(self):
+        """Return True if all associated ReparseFileMeta objects are finished."""
         return all([r.finished for r in self.reparse_file_metas.all()])
 
     @property
     def is_success(self):
+        """Return True if all associated ReparseFileMeta objects are successful."""
         return all([r.success for r in self.reparse_file_metas.all()])
 
     @property
     def finished_at(self):
+        """Return the finished_at timestamp of the last ReparseFileMeta object."""
         last_parse = self.reparse_file_metas.order_by('-finished_at').first()
         return last_parse.finished_at if last_parse else None
 
     @property
     def num_files(self):
+        """Return the number of associated ReparseFileMeta objects."""
         return self.reparse_file_metas.count()
 
     @property
     def num_files_completed(self):
+        """Return the number of completed ReparseFileMeta objects."""
         return self.reparse_file_metas.filter(finished=True).count()
 
     @property
     def num_files_succeeded(self):
+        """Return the number of successful ReparseFileMeta objects."""
         return self.reparse_file_metas.filter(finished=True, success=True).count()
 
     @property
     def num_files_failed(self):
+        """Return the number of failed ReparseFileMeta objects."""
         return self.reparse_file_metas.filter(finished=True, success=False).count()
 
     @property
     def num_records_created(self):
+        """Return the sum of records created for all associated ReparseFileMeta objects."""
         return sum([r.num_records_created for r in self.reparse_file_metas.all()])
 
-
     # remove unused statics or change to utils funcs in own app and/or make new cleanup ticket for future
 
     @staticmethod
     def file_counts_match(meta_model):
         """
         Check whether the file counts match.
+
         This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row
         containing this model has not been locked the caller will experience race issues.
         """
@@ -90,6 +97,7 @@ def file_counts_match(meta_model):
     def assert_all_files_done(meta_model):
         """
         Check if all files have been parsed with or without exceptions.
+
         This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row
         containing this model has not been locked the caller will experience race issues.
         """

From 6f453594bd13be03ec09c27ad02c58bab0385cff Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Thu, 10 Oct 2024 12:36:56 -0400
Subject: [PATCH 12/19] de-duplicate error handling

---
 .../tdpservice/scheduling/parser_task.py       | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/tdrs-backend/tdpservice/scheduling/parser_task.py b/tdrs-backend/tdpservice/scheduling/parser_task.py
index 65ea8fe95..e78abad44 100644
--- a/tdrs-backend/tdpservice/scheduling/parser_task.py
+++ b/tdrs-backend/tdpservice/scheduling/parser_task.py
@@ -17,6 +17,14 @@
 logger = logging.getLogger(__name__)
 
 
+def set_reparse_file_meta_model_failed_state(file_meta):
+    """Set ReparseFileMeta fields to indicate a parse failure."""
+    file_meta.finished = True
+    file_meta.success = False
+    file_meta.finished_at = timezone.now()
+    file_meta.save()
+
+
 @shared_task
 def parse(data_file_id, reparse_id=None):
     """Send data file for processing."""
@@ -71,10 +79,7 @@ def parse(data_file_id, reparse_id=None):
                              "error"
                              )
         if reparse_id:
-            file_meta.finished = True
-            file_meta.success = False
-            file_meta.finished_at = timezone.now()
-            file_meta.save()
+            set_reparse_file_meta_model_failed_state(file_meta)
     except Exception as e:
         generate_error = make_generate_parser_error(data_file, None)
         error = generate_error(schema=None,
@@ -93,7 +98,4 @@ def parse(data_file_id, reparse_id=None):
                               f"see if manual intervention is required. Exception: \n{e}"),
                              "critical")
         if reparse_id:
-            file_meta.finished = True
-            file_meta.success = False
-            file_meta.finished_at = timezone.now()
-            file_meta.save()
+            set_reparse_file_meta_model_failed_state(file_meta)

From 18690e886814a6b50dbbaf52cc38e186c1d9a1d1 Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Fri, 11 Oct 2024 12:10:10 -0400
Subject: [PATCH 13/19] erronious docstring

---
 tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
index 98095de08..4ea731475 100644
--- a/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/admin/reparse_meta.py
@@ -1,4 +1,4 @@
-"""ModelAdmin classes for parsed SSP data files."""
+"""ModelAdmin class for the ReparseMeta model."""
 from .mixins import ReadOnlyAdminMixin
 from tdpservice.data_files.admin.admin import DataFileInline
 

From aed94c4ecaf0a86a2f6855eae8ce440aee171143 Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Fri, 11 Oct 2024 12:17:41 -0400
Subject: [PATCH 14/19] ensure is_finished and is_success are False if no
 associated files

---
 .../search_indexes/models/reparse_meta.py     |  8 +++++--
 .../search_indexes/test/test_reparse.py       | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
index ee882e8b8..e5d43441c 100644
--- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
@@ -39,12 +39,16 @@ class Meta:
     @property
     def is_finished(self):
         """Return True if all associated ReparseFileMeta objects are finished."""
-        return all([r.finished for r in self.reparse_file_metas.all()])
+        if self.num_files > 0:
+            return all([r.finished for r in self.reparse_file_metas.all()])
+        return False
 
     @property
     def is_success(self):
         """Return True if all associated ReparseFileMeta objects are successful."""
-        return all([r.success for r in self.reparse_file_metas.all()])
+        if self.is_finished:
+            return all([r.success for r in self.reparse_file_metas.all()])
+        return False
 
     @property
     def finished_at(self):
diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
index 45e6d631b..afe90c859 100644
--- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
@@ -494,3 +494,25 @@ def test_mm_file_counts_match(big_file):
     fm.finished = True
     fm.save()
     assert ReparseMeta.file_counts_match(meta_model) is True
+
+@pytest.mark.django_db()
+def test_reparse_finished_success_false_before_file_queue(big_file):
+    meta_model = ReparseMeta.objects.create()
+    assert meta_model.is_finished is False
+    assert meta_model.is_success is False
+
+    big_file.reparses.add(meta_model)
+    big_file.save()
+    assert meta_model.is_finished is False
+    assert meta_model.is_success is False
+
+    fm = ReparseFileMeta.objects.get(data_file_id=big_file.pk, reparse_meta_id=meta_model.pk)
+    fm.finished = True
+    fm.save()
+    assert meta_model.is_finished is True
+    assert meta_model.is_success is False
+
+    fm.success = True
+    fm.save()
+    assert meta_model.is_finished is True
+    assert meta_model.is_success is True

From 51981aa7f3a7bb1626367dd2a1d00ac478794fc0 Mon Sep 17 00:00:00 2001
From: Eric Lipe <125676261+elipe17@users.noreply.github.com>
Date: Fri, 11 Oct 2024 13:54:00 -0400
Subject: [PATCH 15/19] Additional DF Filters (#3204)

* - Add extra filters
- remove erroneous code block

* - Updated order of filters
- Removed program type filter since it is redundant

* - remove filter

* - remove id from filters

---------

Co-authored-by: raftmsohani <97037188+raftmsohani@users.noreply.github.com>
Co-authored-by: Alex P. <63075587+ADPennington@users.noreply.github.com>
Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com>
---
 .../tdpservice/data_files/admin/admin.py      | 11 +++++-----
 .../tdpservice/data_files/admin/filters.py    | 21 -------------------
 tdrs-backend/tdpservice/data_files/models.py  |  2 --
 3 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/tdrs-backend/tdpservice/data_files/admin/admin.py b/tdrs-backend/tdpservice/data_files/admin/admin.py
index 27c9b8868..51ddfd1d9 100644
--- a/tdrs-backend/tdpservice/data_files/admin/admin.py
+++ b/tdrs-backend/tdpservice/data_files/admin/admin.py
@@ -4,7 +4,7 @@
 # from tdpservice.core.filters import custom_filter_title
 from tdpservice.data_files.models import DataFile, LegacyFileTransfer
 from tdpservice.parsers.models import DataFileSummary, ParserError
-from tdpservice.data_files.admin.filters import DataFileSummaryPrgTypeFilter, LatestReparseEvent, VersionFilter
+from tdpservice.data_files.admin.filters import LatestReparseEvent, VersionFilter
 from django.conf import settings
 from django.utils.html import format_html
 from datetime import datetime, timedelta, timezone
@@ -113,14 +113,15 @@ def queryset(self, request, queryset):
     ]
 
     list_filter = [
+        'stt',
+        'year',
         'quarter',
         'section',
-        'stt',
+        'summary__status',
+        'stt__type',
+        'stt__region',
         'user',
-        'year',
         SubmissionDateFilter,
-        'summary__status',
-        DataFileSummaryPrgTypeFilter,
         LatestReparseEvent,
         VersionFilter,
     ]
diff --git a/tdrs-backend/tdpservice/data_files/admin/filters.py b/tdrs-backend/tdpservice/data_files/admin/filters.py
index 0f991d882..1429ecd20 100644
--- a/tdrs-backend/tdpservice/data_files/admin/filters.py
+++ b/tdrs-backend/tdpservice/data_files/admin/filters.py
@@ -4,27 +4,6 @@
 from tdpservice.search_indexes.models.reparse_meta import ReparseMeta
 from tdpservice.core.filters import MostRecentVersionFilter
 
-class DataFileSummaryPrgTypeFilter(admin.SimpleListFilter):
-    """Admin class filter for Program Type on datafile model."""
-
-    title = 'Program Type'
-    parameter_name = 'program_type'
-
-    def lookups(self, request, model_admin):
-        """Return a list of tuples."""
-        return [
-            ('TAN', 'TAN'),
-            ('SSP', 'SSP'),
-        ]
-
-    def queryset(self, request, queryset):
-        """Return a queryset."""
-        if self.value():
-            query_set_ids = [df.id for df in queryset if df.prog_type == self.value()]
-            return queryset.filter(id__in=query_set_ids)
-        else:
-            return queryset
-
 
 class LatestReparseEvent(admin.SimpleListFilter):
     """Filter class to filter files based on the latest reparse event."""
diff --git a/tdrs-backend/tdpservice/data_files/models.py b/tdrs-backend/tdpservice/data_files/models.py
index 6fe5355e0..66d245c87 100644
--- a/tdrs-backend/tdpservice/data_files/models.py
+++ b/tdrs-backend/tdpservice/data_files/models.py
@@ -164,8 +164,6 @@ def prog_type(self):
         # e.g., 'SSP Closed Case Data'
         if self.section.startswith('SSP'):
             return 'SSP'
-        elif self.section.startswith('Tribal'):
-            return 'TAN'  # problematic, do we need to infer tribal entirely from tribe/fips code?
         else:
             return 'TAN'
 

From 5b823bccbf4bda9ca0a14dac99d7cabd59bd526f Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Wed, 16 Oct 2024 10:22:17 -0400
Subject: [PATCH 16/19] lint err

---
 tdrs-backend/tdpservice/search_indexes/test/test_reparse.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
index afe90c859..54d49aedb 100644
--- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
+++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py
@@ -497,6 +497,7 @@ def test_mm_file_counts_match(big_file):
 
 @pytest.mark.django_db()
 def test_reparse_finished_success_false_before_file_queue(big_file):
+    """Test is_finished and is_success are False if no files added."""
     meta_model = ReparseMeta.objects.create()
     assert meta_model.is_finished is False
     assert meta_model.is_success is False

From 3e524bbdc15a7cd1cb7b4486dd1a14479660c54b Mon Sep 17 00:00:00 2001
From: Jan Timpe <jtimpe@goraft.tech>
Date: Wed, 16 Oct 2024 13:03:14 -0400
Subject: [PATCH 17/19] re-enable total_num_records_post

---
 tdrs-backend/tdpservice/scheduling/parser_task.py         | 2 ++
 .../tdpservice/search_indexes/models/reparse_meta.py      | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/tdrs-backend/tdpservice/scheduling/parser_task.py b/tdrs-backend/tdpservice/scheduling/parser_task.py
index e78abad44..06d0f7b21 100644
--- a/tdrs-backend/tdpservice/scheduling/parser_task.py
+++ b/tdrs-backend/tdpservice/scheduling/parser_task.py
@@ -12,6 +12,7 @@
 from tdpservice.parsers.aggregates import case_aggregates_by_month, total_errors_by_month
 from tdpservice.parsers.util import log_parser_exception, make_generate_parser_error
 from tdpservice.email.helpers.data_file import send_data_submitted_email
+from tdpservice.search_indexes.models.reparse_meta import ReparseMeta
 
 
 logger = logging.getLogger(__name__)
@@ -65,6 +66,7 @@ def parse(data_file_id, reparse_id=None):
             file_meta.success = True
             file_meta.finished_at = timezone.now()
             file_meta.save()
+            ReparseMeta.set_total_num_records_post(ReparseMeta.objects.get(pk=reparse_id))
         else:
             recipients = User.objects.filter(
                 stt=data_file.stt,
diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
index e5d43441c..a12d7b5b8 100644
--- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
+++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py
@@ -2,6 +2,7 @@
 
 from django.db import models
 from django.db.models import Max
+from tdpservice.search_indexes.util import count_all_records
 import logging
 
 logger = logging.getLogger(__name__)
@@ -116,3 +117,10 @@ def get_latest():
         if max_pk.get("pk__max", None) is None:
             return None
         return ReparseMeta.objects.get(pk=max_pk["pk__max"])
+
+    @staticmethod
+    def set_total_num_records_post(meta_model):
+        """Update the total_num_records_post field once reparse has completed."""
+        if meta_model.is_finished:
+            meta_model.total_num_records_post = count_all_records()
+            meta_model.save()

From 34fb345a89e967e790d6fc05b431b70926d94f8d Mon Sep 17 00:00:00 2001
From: raftmsohani <97037188+raftmsohani@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:55:34 -0400
Subject: [PATCH 18/19] 3060 active user session (#3182)

* no change

* lok into token

* Added a custom session handler instead of signed_sessions

* Update common.py

* Update README.md

* added a new settings

* 3060 linting

* 3060 linting

* 3060 remove unused params

* 3060 uncommented SIGNED_COOKIE_EXPIRES

* 3060 update markdown

* disable session expire at browser close

* remove unused overrides

* corrected the timeouts

---------

Co-authored-by: Alex P. <63075587+ADPennington@users.noreply.github.com>
---
 Taskfile.yml                                  |  2 +-
 tdrs-backend/docs/session-management.md       |  2 +-
 .../tdpservice/core/custom_session_engine.py  | 34 +++++++++++++++++++
 .../tdpservice/data_files/admin/admin.py      |  1 -
 tdrs-backend/tdpservice/settings/common.py    |  7 ++--
 5 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 tdrs-backend/tdpservice/core/custom_session_engine.py

diff --git a/Taskfile.yml b/Taskfile.yml
index 8f1731fe9..d5c0c8951 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -5,7 +5,7 @@ tasks:
   upload-kibana-objs:
     desc: Upload dashboards to Kibana server
     cmds:
-      - curl -X POST localhost:5601/api/saved_objects/_import -H "kbn-xsrf: true" --form file=@tdrs-backend/tdpservice/search_indexes/kibana_saved_objs.ndjson
+      - 'curl -X POST localhost:5601/api/saved_objects/_import -H "kbn-xsrf: true" --form file=@tdrs-backend/tdpservice/search_indexes/kibana_saved_objs.ndjson'
 
   create-network:
     desc: Create the external network
diff --git a/tdrs-backend/docs/session-management.md b/tdrs-backend/docs/session-management.md
index e4f0c1831..6c079efe8 100644
--- a/tdrs-backend/docs/session-management.md
+++ b/tdrs-backend/docs/session-management.md
@@ -11,7 +11,7 @@ When the user logs in, they will receive an HttpOnly cookie with no `Expires=` s
 SESSION_EXPIRE_AT_BROWSER_CLOSE=True
 ```
 
-The cookie itself contains a `sessionid` reference to a Django-managed session. The session expiration is set to the same expiration of the login.gov-provided jwt, **15 minutes**.
+The cookie itself contains a `sessionid` reference to a Django-managed session. By deafult, the session expiration is set to the same expiration of the login.gov-provided jwt, **15 minutes**. Since `sessionid` is being signed when created, it not possible to update the expiry without decoding and recreating the signature, so the session expires even though the cookie is being extended with every request. Thus, in order to overcome this shortcoming, a longer expiry is being assigned to `sessionid` using a new variable `SIGNED_COOKIE_EXPIRES` in common settings.
 
 This is managed in `tdrs-backend/tdpservice/settings/common.py` with the following setting:
 ```python
diff --git a/tdrs-backend/tdpservice/core/custom_session_engine.py b/tdrs-backend/tdpservice/core/custom_session_engine.py
new file mode 100644
index 000000000..70f397a52
--- /dev/null
+++ b/tdrs-backend/tdpservice/core/custom_session_engine.py
@@ -0,0 +1,34 @@
+"""Custom session engine for TDP."""
+
+from django.contrib.sessions.backends import signed_cookies
+from django.core import signing
+import datetime
+from django.conf import settings
+
+class SessionStore(signed_cookies.SessionStore):
+    """Custom session engine for TDP."""
+
+    def __init__(self, session_key=None):
+        """Initialize the custom session engine."""
+        super().__init__(session_key)
+
+    def load(self):
+        """Load the session data from the database."""
+        """
+        Load the data from the key itself instead of fetching from some
+        external data store. Opposite of _get_session_key(), raise BadSignature
+        if signature fails.
+        """
+
+        try:
+            return signing.loads(
+                self.session_key,
+                serializer=self.serializer,
+                # This doesn't handle non-default expiry dates, see #19201
+                max_age=datetime.timedelta(seconds=settings.SIGNED_COOKIE_EXPIRES),
+                salt="django.contrib.sessions.backends.signed_cookies",
+            )
+        except Exception:
+            # BadSignature, ValueError, or unpickling exceptions. If any of
+            # these happen, reset the session.
+            return {}
diff --git a/tdrs-backend/tdpservice/data_files/admin/admin.py b/tdrs-backend/tdpservice/data_files/admin/admin.py
index 51ddfd1d9..4c9fce07a 100644
--- a/tdrs-backend/tdpservice/data_files/admin/admin.py
+++ b/tdrs-backend/tdpservice/data_files/admin/admin.py
@@ -1,7 +1,6 @@
 """Admin class for DataFile objects."""
 from django.contrib import admin
 from tdpservice.core.utils import ReadOnlyAdminMixin
-# from tdpservice.core.filters import custom_filter_title
 from tdpservice.data_files.models import DataFile, LegacyFileTransfer
 from tdpservice.parsers.models import DataFileSummary, ParserError
 from tdpservice.data_files.admin.filters import LatestReparseEvent, VersionFilter
diff --git a/tdrs-backend/tdpservice/settings/common.py b/tdrs-backend/tdpservice/settings/common.py
index ba936b545..6f4e35353 100644
--- a/tdrs-backend/tdpservice/settings/common.py
+++ b/tdrs-backend/tdpservice/settings/common.py
@@ -281,10 +281,11 @@ class Common(Configuration):
     )
 
     # Sessions
-    SESSION_ENGINE = "django.contrib.sessions.backends.signed_cookies"
+    SESSION_ENGINE = "tdpservice.core.custom_session_engine"
+    SIGNED_COOKIE_EXPIRES = 60 * 60 * 12  # 12 hours
     SESSION_COOKIE_HTTPONLY = True
-    SESSION_EXPIRE_AT_BROWSER_CLOSE = True
-    SESSION_COOKIE_AGE = 15 * 60  # 15 minutes
+    SESSION_SAVE_EVERY_REQUEST = True
+    SESSION_COOKIE_AGE = 60 * 30  # 30 minutes
     # The CSRF token Cookie holds no security benefits when confined to HttpOnly.
     # Setting this to false to allow the frontend to include it in the header
     # of API POST calls to prevent false negative authorization errors.

From 27927766b1f57a85fc2a2ac049b35b582ca3247e Mon Sep 17 00:00:00 2001
From: Andrew <84722778+andrew-jameson@users.noreply.github.com>
Date: Wed, 16 Oct 2024 16:26:06 -0400
Subject: [PATCH 19/19] Hotfix/make erd docker login (#3226)

* Trying docker login step prereq

* hijack workflow

* more workflows

* Volume not updating, fix per Eric for missing png file

* Fail job if png not found

* Omit scheduling for coverage run.

* Removing branch-specific carve-outs in pipeline

---------

Co-authored-by: andrew-jameson <ajameson@teamraft.com>
---
 .circleci/build-and-test/workflows.yml | 6 ++++++
 .circleci/util/jobs.yml                | 5 +++++
 tdrs-backend/docker-compose.yml        | 2 +-
 tdrs-backend/setup.cfg                 | 2 ++
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/.circleci/build-and-test/workflows.yml b/.circleci/build-and-test/workflows.yml
index b822f1cdc..99d7c4fff 100644
--- a/.circleci/build-and-test/workflows.yml
+++ b/.circleci/build-and-test/workflows.yml
@@ -49,6 +49,12 @@
                 - /^release.*/
           requires:
             - secrets-check
+      - make_erd: # from ../util folder
+          filters:
+            branches:
+              only:
+                - develop
+                - master
 
   build-and-test-backend:
     when: << pipeline.parameters.build_and_test_backend >>
diff --git a/.circleci/util/jobs.yml b/.circleci/util/jobs.yml
index 3cd1bfe12..4752ad5a0 100644
--- a/.circleci/util/jobs.yml
+++ b/.circleci/util/jobs.yml
@@ -4,6 +4,7 @@
     steps:
       - checkout
       - docker-compose-check
+      - docker-login
       - run:
           name: Run graph_models
           command: |
@@ -13,5 +14,9 @@
             fi
             docker-compose run --rm web bash -c \
             "./manage.py graph_models -a -g -o tdp_erd.png"
+            if [[ ! -f tdp_erd.png ]]; then
+              echo "Entity Relationship Diagram not found."
+              exit 1
+            fi
       - store_artifacts:
           path: tdrs-backend/tdp_erd.png
diff --git a/tdrs-backend/docker-compose.yml b/tdrs-backend/docker-compose.yml
index 3330ae493..7ab823d3e 100644
--- a/tdrs-backend/docker-compose.yml
+++ b/tdrs-backend/docker-compose.yml
@@ -178,7 +178,7 @@ services:
       - ELASTICSEARCH_LOG_INDEX_SLOW_LEVEL
     volumes:
       - .:/tdpapp
-      - logs:/tdpapp
+      - logs:/tmp
     image: tdp-backend
     build: .
     command: >
diff --git a/tdrs-backend/setup.cfg b/tdrs-backend/setup.cfg
index ab064a186..9ce169907 100644
--- a/tdrs-backend/setup.cfg
+++ b/tdrs-backend/setup.cfg
@@ -6,6 +6,7 @@ omit =
     tdpservice/settings/production.py
     tdpservice/settings/staging.py
     tdpservice/wsgi.py
+    tdpservice/scheduling/*
     *test*
     *migrations*
 
@@ -18,6 +19,7 @@ omit =
     tdpservice/settings/production.py
     tdpservice/settings/staging.py
     tdpservice/wsgi.py
+    tdpservice/scheduling/*
     *test*
     *migrations*