Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DB Backup Bug #2853

Merged
merged 46 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
d8a6034
- pointing directly to the pg client directory
elipe17 Feb 16, 2024
6b0a1f0
- Add lots of logging
elipe17 Feb 16, 2024
9a94eb1
- fix lint
elipe17 Feb 16, 2024
9e670bd
- naming crontabs for easy understanding
elipe17 Feb 16, 2024
fe653b0
- Removing log entries
elipe17 Feb 16, 2024
d7b3040
- testing old method
elipe17 Feb 17, 2024
8cfdaed
Merge branch 'develop' into 2852-db-backup
elipe17 Feb 20, 2024
2097d8d
- installing postgres client 15
elipe17 Feb 20, 2024
fdfd31d
Merge branch '2852-db-backup' of https://github.com/raft-tech/TANF-ap…
elipe17 Feb 20, 2024
687c8d2
- print paths to see whats up
elipe17 Feb 20, 2024
27ff2db
- fix lint
elipe17 Feb 20, 2024
76c4b9d
- remove all traces of postgres before installing new postgres
elipe17 Feb 20, 2024
a2a073b
- disabling tests for speed
elipe17 Feb 20, 2024
e6a84a8
Revert "- remove all traces of postgres before installing new postgres"
elipe17 Feb 20, 2024
64c6907
Revert "- fix lint"
elipe17 Feb 20, 2024
5a2153a
Revert "- installing postgres client 15"
elipe17 Feb 20, 2024
1ce4d81
Revert "Revert "- fix lint""
elipe17 Feb 20, 2024
3b73f8d
- Add correct client to apt.yml
elipe17 Feb 20, 2024
30c3e51
- making tests even shorter
elipe17 Feb 20, 2024
72ad936
- trying clietn V14
elipe17 Feb 20, 2024
a2f94d3
- removing from apt and installing manually
elipe17 Feb 20, 2024
d65d6ec
Revert "- removing from apt and installing manually"
elipe17 Feb 20, 2024
2d41164
- revert
elipe17 Feb 20, 2024
68b9f8c
- Version 12 in apt.yml
elipe17 Feb 20, 2024
9faa526
- escaping quotes
elipe17 Feb 20, 2024
ce68563
Merge branch 'develop' into 2852-db-backup
elipe17 Feb 20, 2024
b606933
- forcing db name
elipe17 Feb 20, 2024
7257144
Merge branch '2852-db-backup' of https://github.com/raft-tech/TANF-ap…
elipe17 Feb 20, 2024
73a92a8
Revert "- forcing db name"
elipe17 Feb 20, 2024
447a774
- logging
elipe17 Feb 21, 2024
b3f0245
- more logging
elipe17 Feb 21, 2024
4600056
- Cleanup debug code
elipe17 Feb 21, 2024
2aeeac3
- Fix lint
elipe17 Feb 21, 2024
5a31397
Merge branch 'develop' into 2852-db-backup
elipe17 Feb 21, 2024
fcccf1a
- Adding back client search if hardcoded path doesn't exist
elipe17 Feb 22, 2024
70d2bdf
- fix syntax error
elipe17 Feb 22, 2024
9c69716
- fix lint
elipe17 Feb 22, 2024
d597f4f
- remove extra slash
elipe17 Feb 22, 2024
50a641d
Merge branch 'develop' into 2852-db-backup
ADPennington Feb 23, 2024
57fd384
- Adding log entries to backup task
elipe17 Feb 26, 2024
806569b
- Moving DB task to it's own file
elipe17 Feb 26, 2024
9521b3b
- fix lint
elipe17 Feb 26, 2024
83dcd4d
Merge branch 'develop' of https://github.com/raft-tech/TANF-app into …
elipe17 Feb 26, 2024
edd06b8
- Seperate out email tasks
elipe17 Feb 28, 2024
2f6d998
- update tests
elipe17 Feb 28, 2024
d1fd6e2
Merge branch 'develop' into 2852-db-backup
andrew-jameson Feb 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 51 additions & 24 deletions tdrs-backend/tdpservice/scheduling/db_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,26 @@ def get_system_values():
sys_values['SPACE'] = json.loads(OS_ENV['VCAP_APPLICATION'])['space_name']

# Postgres client pg_dump directory
pgdump_search = subprocess.Popen(["find", "/", "-iname", "pg_dump"],
stderr=subprocess.DEVNULL, stdout=subprocess.PIPE)
pgdump_search.wait()
pg_dump_paths, pgdump_search_error = pgdump_search.communicate()
pg_dump_paths = pg_dump_paths.decode("utf-8").split('\n')
if pg_dump_paths[0] == '':
raise Exception("Postgres client is not found")

for _ in pg_dump_paths:
if 'pg_dump' in str(_) and 'postgresql' in str(_):
sys_values['POSTGRES_CLIENT'] = _[:_.find('pg_dump')]
print("Found PG client here: {}".format(_))
sys_values['POSTGRES_CLIENT_DIR'] = "/home/vcap/deps/0/apt/usr/lib/postgresql/12/bin/"
elipe17 marked this conversation as resolved.
Show resolved Hide resolved

# If the client directory and binaries don't exist, we need to find them.
if not (os.path.exists(sys_values['POSTGRES_CLIENT_DIR']) and
os.path.isfile(f"{sys_values['POSTGRES_CLIENT_DIR']}pg_dump")):
logger.warning("Couldn't find postgres client binaries at the hardcoded path: "
f"{sys_values['POSTGRES_CLIENT_DIR']}. Searching OS for client directory.")
pgdump_search = subprocess.Popen(["find", "/", "-iname", "pg_dump"],
stderr=subprocess.DEVNULL, stdout=subprocess.PIPE)
pgdump_search.wait()
pg_dump_paths, pgdump_search_error = pgdump_search.communicate()
pg_dump_paths = pg_dump_paths.decode("utf-8").split('\n')
if pg_dump_paths[0] == '':
raise Exception("Postgres client is not found")

for _ in pg_dump_paths:
if 'pg_dump' in str(_) and 'postgresql' in str(_):
sys_values['POSTGRES_CLIENT'] = _[:_.find('pg_dump')]

logger.info(f"Using postgres client at: {sys_values['POSTGRES_CLIENT_DIR']}")

sys_values['S3_ENV_VARS'] = json.loads(OS_ENV['VCAP_SERVICES'])['s3']
sys_values['S3_CREDENTIALS'] = sys_values['S3_ENV_VARS'][0]['credentials']
Expand Down Expand Up @@ -82,12 +90,16 @@ def backup_database(file_name,
pg_dump -F c --no-acl --no-owner -f backup.pg postgresql://${USERNAME}:${PASSWORD}@${HOST}:${PORT}/${NAME}
"""
try:
os.system(postgres_client + "pg_dump -Fc --no-acl -f " + file_name + " -d " + database_uri)
print("Wrote pg dumpfile to {}".format(file_name))
cmd = postgres_client + "pg_dump -Fc --no-acl -f " + file_name + " -d " + database_uri
logger.info(f"Executing backup command: {cmd}")
os.system(cmd)
logger.info("Wrote pg dumpfile to {}".format(file_name))
file_size = os.path.getsize(file_name)
logger.info(f"Pg dumpfile size in bytes: {file_size}.")
return True
except Exception as e:
print(e)
return False
logger.error(f"Caught Exception while backing up database. Exception: {e}")
raise e


def restore_database(file_name, postgres_client, database_uri):
Expand All @@ -100,10 +112,12 @@ def restore_database(file_name, postgres_client, database_uri):
DATABASE_DB_NAME] = get_database_credentials(database_uri)
os.environ['PGPASSWORD'] = DATABASE_PASSWORD
try:
logger.info("Begining database creation.")
os.system(postgres_client + "createdb " + "-U " + DATABASE_USERNAME + " -h " + DATABASE_HOST + " -T template0 "
+ DATABASE_DB_NAME)
logger.info("Completed database creation.")
except Exception as e:
print(e)
logger.error(f"Caught exception while creating the database. Exception: {e}.")
return False

# write .pgpass
Expand All @@ -112,8 +126,10 @@ def restore_database(file_name, postgres_client, database_uri):
os.environ['PGPASSFILE'] = '/home/vcap/.pgpass'
os.system('chmod 0600 /home/vcap/.pgpass')

logger.info("Begining database restoration.")
os.system(postgres_client + "pg_restore" + " -p " + DATABASE_PORT + " -h " +
DATABASE_HOST + " -U " + DATABASE_USERNAME + " -d " + DATABASE_DB_NAME + " " + file_name)
logger.info("Completed database restoration.")
return True


Expand All @@ -129,10 +145,12 @@ def upload_file(file_name, bucket, sys_values, object_name=None, region='us-gov-
if object_name is None:
object_name = os.path.basename(file_name)

logger.info(f"Uploading {file_name} to S3.")
s3_client = boto3.client('s3', region_name=sys_values['S3_REGION'])

s3_client.upload_file(file_name, bucket, object_name)
print("Uploaded {} to S3:{}{}".format(file_name, bucket, object_name))
response = s3_client.upload_file(file_name, bucket, object_name)
logger.info(f"S3 upload response: {response}")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@elipe17 should the response here be None if its successful? Asking because that's what I'm observing so far.

   2024-02-23T12:25:01.07-0500 [APP/PROC/WEB/0] ERR 2024-02-23 17:25:01,070 INFO db_backup.py::upload_file:L152 :  S3 upload response: None
   2024-02-23T12:25:01.07-0500 [APP/PROC/WEB/0] ERR [2024-02-23 17:25:01,070: INFO/ForkPoolWorker-1] S3 upload response: None
   2024-02-23T12:25:01.07-0500 [APP/PROC/WEB/0] ERR 2024-02-23 17:25:01,071 INFO db_backup.py::upload_file:L153 :  Uploaded /tmp/backup.pg to s3://...

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should return either True or False: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-uploading-files.html. Would like to investigate more

Copy link
Author

@elipe17 elipe17 Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked at the source code for our version of the library and it looks like it doesnt return anything. That is a feature added in a more recent version. I will remove the logging of the response since it does not exist for our version of boto3.

logger.info("Uploaded {} to s3://{}/{}.".format(file_name, bucket, object_name))
return True


Expand All @@ -150,9 +168,11 @@ def download_file(bucket,
"""
if object_name is None:
object_name = os.path.basename(file_name)
logger.info("Begining download for backup file.")
s3 = boto3.client('s3', region_name=region)
s3.download_file(bucket, object_name, file_name)
print("Downloaded s3 file {}{} to {}.".format(bucket, object_name, file_name))
response = s3.download_file(bucket, object_name, file_name)
logger.info(f"Response from s3 download: {response}.")
logger.info("Downloaded s3 file {}/{} to {}.".format(bucket, object_name, file_name))


def list_s3_files(sys_values):
Expand Down Expand Up @@ -212,7 +232,7 @@ def main(argv, sys_values):
if arg_to_backup:
# back up database
backup_database(file_name=arg_file,
postgres_client=sys_values['POSTGRES_CLIENT'],
postgres_client=sys_values['POSTGRES_CLIENT_DIR'],
database_uri=arg_database)

# upload backup file
Expand All @@ -221,6 +241,7 @@ def main(argv, sys_values):
sys_values=sys_values,
region=sys_values['S3_REGION'],
object_name="backup"+arg_file)
logger.info(f"Deleting {arg_file} from local storage.")
os.system('rm ' + arg_file)

elif arg_to_restore:
Expand All @@ -232,9 +253,10 @@ def main(argv, sys_values):

# restore database
restore_database(file_name=arg_file,
postgres_client=sys_values['POSTGRES_CLIENT'],
postgres_client=sys_values['POSTGRES_CLIENT_DIR'],
database_uri=arg_database)

logger.info(f"Deleting {arg_file} from local storage.")
os.system('rm ' + arg_file)


Expand All @@ -243,7 +265,12 @@ def run_backup(arg):
if settings.USE_LOCALSTACK is True:
logger.info("Won't backup locally")
else:
main([arg], sys_values=get_system_values())
try:
main([arg], sys_values=get_system_values())
except Exception as e:
logger.error(f"Caught Exception in run_backup. Exception: {e}.")
return False
return True


if __name__ == '__main__':
Expand Down
9 changes: 7 additions & 2 deletions tdrs-backend/tdpservice/scheduling/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ def postgres_backup(*args):
"""Run nightly postgres backup."""
arg = ''.join(args)
logger.debug("postgres_backup::run_backup() run with arg: " + arg)
run_backup(arg)
return True
logger.info("Begining database backup.")
result = run_backup(arg)
if result:
logger.info("Finished database backup.")
else:
logger.error("Failed to complete database backup.")
return result

@shared_task
def check_for_accounts_needing_deactivation_warning():
Expand Down
4 changes: 2 additions & 2 deletions tdrs-backend/tdpservice/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,15 +452,15 @@ class Common(Configuration):
CELERY_TIMEZONE = 'UTC'

CELERY_BEAT_SCHEDULE = {
'name': {
'Database Backup': {
ADPennington marked this conversation as resolved.
Show resolved Hide resolved
'task': 'tdpservice.scheduling.tasks.postgres_backup',
'schedule': crontab(minute='0', hour='4'), # Runs at midnight EST
'args': "-b",
'options': {
'expires': 15.0,
},
},
'name': {
'Account Deactivation Warning': {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

overriding the celery_beat_schedule with the same name was committed November 2022 - #2204

'task': 'tdpservice.scheduling.tasks.check_for_accounts_needing_deactivation_warning',
'schedule': crontab(day_of_week='*', hour='13', minute='0'), # Every day at 1pm UTC (9am EST)

Expand Down
1 change: 1 addition & 0 deletions terraform/dev/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ resource "cloudfoundry_service_instance" "database" {
name = "tdp-db-dev"
space = data.cloudfoundry_space.space.id
service_plan = data.cloudfoundry_service.rds.service_plans["micro-psql"]
json_params = "{\"version\": \"12\"}"
recursive_delete = true
}

Expand Down
1 change: 1 addition & 0 deletions terraform/production/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ resource "cloudfoundry_service_instance" "database" {
name = "tdp-db-prod"
space = data.cloudfoundry_space.space.id
service_plan = data.cloudfoundry_service.rds.service_plans["medium-psql"]
json_params = "{\"version\": \"12\"}"
recursive_delete = true
}

Expand Down
1 change: 1 addition & 0 deletions terraform/staging/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ resource "cloudfoundry_service_instance" "database" {
name = "tdp-db-staging"
space = data.cloudfoundry_space.space.id
service_plan = data.cloudfoundry_service.rds.service_plans["micro-psql"]
json_params = "{\"version\": \"12\"}"
recursive_delete = true
}

Expand Down
Loading