Skip to content

Commit

Permalink
DB Backup Bug (#2853)
Browse files Browse the repository at this point in the history
* - pointing directly to the pg client directory

* - Add lots of logging

* - fix lint

* - naming crontabs for easy understanding

* - Removing log entries

* - testing old method

* - installing postgres client 15

* - print paths to see whats up

* - fix lint

* - remove all traces of postgres before installing new postgres

* - disabling tests for speed

* Revert "- remove all traces of postgres before installing new postgres"

This reverts commit 76c4b9d.

* Revert "- fix lint"

This reverts commit 27ff2db.

* Revert "- installing postgres client 15"

This reverts commit 2097d8d.

* Revert "Revert "- fix lint""

This reverts commit 64c6907.

* - Add correct client to apt.yml

* - making tests even shorter

* - trying clietn V14

* - removing from apt and installing manually

* Revert "- removing from apt and installing manually"

This reverts commit a2f94d3.

* - revert

* - Version 12 in apt.yml
- Tell terraform to specify db version

* - escaping quotes

* - forcing db name

* Revert "- forcing db name"

This reverts commit b606933.

* - logging
- every 5 min

* - more logging

* - Cleanup debug code
- update tf environments to force version 12 of pg  server

* - Fix lint

* - Adding back client search if hardcoded path doesn't exist

* - fix syntax error

* - fix lint

* - remove extra slash

* - Adding log entries to backup task

* - Moving DB task to it's own file

* - fix lint

* - Seperate out email tasks
- update crontabs

* - update tests

---------

Co-authored-by: Alex P <[email protected]>
Co-authored-by: Andrew <[email protected]>
  • Loading branch information
3 people authored Feb 29, 2024
1 parent c8d0934 commit d559d4f
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 111 deletions.
71 changes: 71 additions & 0 deletions tdrs-backend/tdpservice/email/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Shared celery email tasks for beat."""

from __future__ import absolute_import
from tdpservice.users.models import User, AccountApprovalStatusChoices
from django.contrib.auth.models import Group
from django.conf import settings
from django.urls import reverse
from django.utils import timezone
from celery import shared_task
from datetime import datetime, timedelta
import logging
from tdpservice.email.helpers.account_access_requests import send_num_access_requests_email
from tdpservice.email.helpers.account_deactivation_warning import send_deactivation_warning_email


logger = logging.getLogger(__name__)


@shared_task
def check_for_accounts_needing_deactivation_warning():
"""Check for accounts that need deactivation warning emails."""
deactivate_in_10_days = users_to_deactivate(10)
deactivate_in_3_days = users_to_deactivate(3)
deactivate_in_1_day = users_to_deactivate(1)

if deactivate_in_10_days:
send_deactivation_warning_email(deactivate_in_10_days, 10)
if deactivate_in_3_days:
send_deactivation_warning_email(deactivate_in_3_days, 3)
if deactivate_in_1_day:
send_deactivation_warning_email(deactivate_in_1_day, 1)

def users_to_deactivate(days):
"""Return a list of users that have not logged in in the last {180 - days} days."""
days = 180 - days
return User.objects.filter(
last_login__lte=datetime.now(tz=timezone.utc) - timedelta(days=days),
last_login__gte=datetime.now(tz=timezone.utc) - timedelta(days=days+1),
account_approval_status=AccountApprovalStatusChoices.APPROVED,
)

def get_ofa_admin_user_emails():
"""Return a list of OFA System Admin and OFA Admin users."""
return User.objects.filter(
groups__in=Group.objects.filter(name__in=('OFA Admin', 'OFA System Admin'))
).values_list('email', flat=True).distinct()

def get_num_access_requests():
"""Return the number of users requesting access."""
return User.objects.filter(
account_approval_status=AccountApprovalStatusChoices.ACCESS_REQUEST,
).count()

@shared_task
def email_admin_num_access_requests():
"""Send all OFA System Admins an email with how many users have requested access."""
recipient_email = get_ofa_admin_user_emails()
text_message = ''
subject = 'Number of Active Access Requests'
url = f'{settings.FRONTEND_BASE_URL}{reverse("admin:users_user_changelist")}?o=-2'
email_context = {
'date': datetime.today(),
'num_requests': get_num_access_requests(),
'admin_user_pg': url,
}

send_num_access_requests_email(recipient_email,
text_message,
subject,
email_context,
)
207 changes: 171 additions & 36 deletions tdrs-backend/tdpservice/scheduling/db_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@
from django.conf import settings
import boto3
import logging
from tdpservice.users.models import User
from django.contrib.admin.models import ADDITION, ContentType, LogEntry


logger = logging.getLogger(__name__)


OS_ENV = os.environ
content_type = ContentType.objects.get_for_model(LogEntry)

def get_system_values():
"""Return dict of keys and settings to use whether local or deployed."""
Expand All @@ -26,18 +29,26 @@ def get_system_values():
sys_values['SPACE'] = json.loads(OS_ENV['VCAP_APPLICATION'])['space_name']

# Postgres client pg_dump directory
pgdump_search = subprocess.Popen(["find", "/", "-iname", "pg_dump"],
stderr=subprocess.DEVNULL, stdout=subprocess.PIPE)
pgdump_search.wait()
pg_dump_paths, pgdump_search_error = pgdump_search.communicate()
pg_dump_paths = pg_dump_paths.decode("utf-8").split('\n')
if pg_dump_paths[0] == '':
raise Exception("Postgres client is not found")

for _ in pg_dump_paths:
if 'pg_dump' in str(_) and 'postgresql' in str(_):
sys_values['POSTGRES_CLIENT'] = _[:_.find('pg_dump')]
print("Found PG client here: {}".format(_))
sys_values['POSTGRES_CLIENT_DIR'] = "/home/vcap/deps/0/apt/usr/lib/postgresql/12/bin/"

# If the client directory and binaries don't exist, we need to find them.
if not (os.path.exists(sys_values['POSTGRES_CLIENT_DIR']) and
os.path.isfile(f"{sys_values['POSTGRES_CLIENT_DIR']}pg_dump")):
logger.warning("Couldn't find postgres client binaries at the hardcoded path: "
f"{sys_values['POSTGRES_CLIENT_DIR']}. Searching OS for client directory.")
pgdump_search = subprocess.Popen(["find", "/", "-iname", "pg_dump"],
stderr=subprocess.DEVNULL, stdout=subprocess.PIPE)
pgdump_search.wait()
pg_dump_paths, pgdump_search_error = pgdump_search.communicate()
pg_dump_paths = pg_dump_paths.decode("utf-8").split('\n')
if pg_dump_paths[0] == '':
raise Exception("Postgres client is not found")

for _ in pg_dump_paths:
if 'pg_dump' in str(_) and 'postgresql' in str(_):
sys_values['POSTGRES_CLIENT'] = _[:_.find('pg_dump')]

logger.info(f"Using postgres client at: {sys_values['POSTGRES_CLIENT_DIR']}")

sys_values['S3_ENV_VARS'] = json.loads(OS_ENV['VCAP_SERVICES'])['s3']
sys_values['S3_CREDENTIALS'] = sys_values['S3_ENV_VARS'][0]['credentials']
Expand Down Expand Up @@ -73,7 +84,8 @@ def get_system_values():

def backup_database(file_name,
postgres_client,
database_uri):
database_uri,
system_user):
"""Back up postgres database into file.
:param file_name: back up file name
Expand All @@ -82,15 +94,28 @@ def backup_database(file_name,
pg_dump -F c --no-acl --no-owner -f backup.pg postgresql://${USERNAME}:${PASSWORD}@${HOST}:${PORT}/${NAME}
"""
try:
os.system(postgres_client + "pg_dump -Fc --no-acl -f " + file_name + " -d " + database_uri)
print("Wrote pg dumpfile to {}".format(file_name))
cmd = postgres_client + "pg_dump -Fc --no-acl -f " + file_name + " -d " + database_uri
logger.info(f"Executing backup command: {cmd}")
os.system(cmd)
msg = "Successfully executed backup. Wrote pg dumpfile to {}".format(file_name)
logger.info(msg)
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Executed Database Backup",
action_flag=ADDITION,
change_message=msg
)
file_size = os.path.getsize(file_name)
logger.info(f"Pg dumpfile size in bytes: {file_size}.")
return True
except Exception as e:
print(e)
return False
logger.error(f"Caught Exception while backing up database. Exception: {e}")
raise e


def restore_database(file_name, postgres_client, database_uri):
def restore_database(file_name, postgres_client, database_uri, system_user):
"""Restore the database from filename.
:param file_name: database backup filename
Expand All @@ -100,10 +125,23 @@ def restore_database(file_name, postgres_client, database_uri):
DATABASE_DB_NAME] = get_database_credentials(database_uri)
os.environ['PGPASSWORD'] = DATABASE_PASSWORD
try:
os.system(postgres_client + "createdb " + "-U " + DATABASE_USERNAME + " -h " + DATABASE_HOST + " -T template0 "
+ DATABASE_DB_NAME)
logger.info("Begining database creation.")
cmd = (postgres_client + "createdb " + "-U " + DATABASE_USERNAME + " -h " + DATABASE_HOST + " -T template0 "
+ DATABASE_DB_NAME)
logger.info(f"Executing create command: {cmd}")
os.system(cmd)
msg = "Completed database creation."
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Executed Database create",
action_flag=ADDITION,
change_message=msg
)
logger.info(msg)
except Exception as e:
print(e)
logger.error(f"Caught exception while creating the database. Exception: {e}.")
return False

# write .pgpass
Expand All @@ -112,12 +150,25 @@ def restore_database(file_name, postgres_client, database_uri):
os.environ['PGPASSFILE'] = '/home/vcap/.pgpass'
os.system('chmod 0600 /home/vcap/.pgpass')

os.system(postgres_client + "pg_restore" + " -p " + DATABASE_PORT + " -h " +
DATABASE_HOST + " -U " + DATABASE_USERNAME + " -d " + DATABASE_DB_NAME + " " + file_name)
logger.info("Begining database restoration.")
cmd = (postgres_client + "pg_restore" + " -p " + DATABASE_PORT + " -h " +
DATABASE_HOST + " -U " + DATABASE_USERNAME + " -d " + DATABASE_DB_NAME + " " + file_name)
logger.info(f"Executing restore command: {cmd}")
os.system(cmd)
msg = "Completed database restoration."
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Executed Database restore",
action_flag=ADDITION,
change_message=msg
)
logger.info(msg)
return True


def upload_file(file_name, bucket, sys_values, object_name=None, region='us-gov-west-1'):
def upload_file(file_name, bucket, sys_values, system_user, object_name=None, region='us-gov-west-1'):
"""Upload a file to an S3 bucket.
:param file_name: file name being uploaded to s3 bucket
Expand All @@ -129,16 +180,27 @@ def upload_file(file_name, bucket, sys_values, object_name=None, region='us-gov-
if object_name is None:
object_name = os.path.basename(file_name)

logger.info(f"Uploading {file_name} to S3.")
s3_client = boto3.client('s3', region_name=sys_values['S3_REGION'])

s3_client.upload_file(file_name, bucket, object_name)
print("Uploaded {} to S3:{}{}".format(file_name, bucket, object_name))
msg = "Successfully uploaded {} to s3://{}/{}.".format(file_name, bucket, object_name)
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Executed database backup S3 upload",
action_flag=ADDITION,
change_message=msg
)
logger.info(msg)
return True


def download_file(bucket,
file_name,
region,
system_user,
object_name=None,
):
"""Download file from s3 bucket."""
Expand All @@ -150,9 +212,19 @@ def download_file(bucket,
"""
if object_name is None:
object_name = os.path.basename(file_name)
logger.info("Begining download for backup file.")
s3 = boto3.client('s3', region_name=region)
s3.download_file(bucket, object_name, file_name)
print("Downloaded s3 file {}{} to {}.".format(bucket, object_name, file_name))
msg = "Successfully downloaded s3 file {}/{} to {}.".format(bucket, object_name, file_name)
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Executed database backup S3 download",
action_flag=ADDITION,
change_message=msg
)
logger.info(msg)


def list_s3_files(sys_values):
Expand Down Expand Up @@ -187,7 +259,7 @@ def get_database_credentials(database_uri):
return [username, password, host, port, database_name]


def main(argv, sys_values):
def main(argv, sys_values, system_user):
"""Handle commandline args."""
arg_file = "/tmp/backup.pg"
arg_database = sys_values['DATABASE_URI']
Expand All @@ -210,31 +282,75 @@ def main(argv, sys_values):
raise e

if arg_to_backup:
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Begining Database Backup",
action_flag=ADDITION,
change_message="Begining database backup."
)
# back up database
backup_database(file_name=arg_file,
postgres_client=sys_values['POSTGRES_CLIENT'],
database_uri=arg_database)
postgres_client=sys_values['POSTGRES_CLIENT_DIR'],
database_uri=arg_database,
system_user=system_user)

# upload backup file
upload_file(file_name=arg_file,
bucket=sys_values['S3_BUCKET'],
sys_values=sys_values,
system_user=system_user,
region=sys_values['S3_REGION'],
object_name="backup"+arg_file)
object_name="backup"+arg_file,
)

LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Finished Database Backup",
action_flag=ADDITION,
change_message="Finished database backup."
)

logger.info(f"Deleting {arg_file} from local storage.")
os.system('rm ' + arg_file)

elif arg_to_restore:
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Begining Database Restore",
action_flag=ADDITION,
change_message="Begining database restore."
)

# download file from s3
download_file(bucket=sys_values['S3_BUCKET'],
file_name=arg_file,
region=sys_values['S3_REGION'],
object_name="backup"+arg_file)
system_user=system_user,
object_name="backup"+arg_file,
)

# restore database
restore_database(file_name=arg_file,
postgres_client=sys_values['POSTGRES_CLIENT'],
database_uri=arg_database)

postgres_client=sys_values['POSTGRES_CLIENT_DIR'],
database_uri=arg_database,
system_user=system_user)

LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Finished Database Restore",
action_flag=ADDITION,
change_message="Finished database restore."
)

logger.info(f"Deleting {arg_file} from local storage.")
os.system('rm ' + arg_file)


Expand All @@ -243,8 +359,27 @@ def run_backup(arg):
if settings.USE_LOCALSTACK is True:
logger.info("Won't backup locally")
else:
main([arg], sys_values=get_system_values())
try:
system_user, created = User.objects.get_or_create(username='system')
if created:
logger.debug('Created reserved system user.')
main([arg], sys_values=get_system_values(), system_user=system_user)
except Exception as e:
logger.error(f"Caught Exception in run_backup. Exception: {e}.")
LogEntry.objects.log_action(
user_id=system_user.pk,
content_type_id=content_type.pk,
object_id=None,
object_repr="Exception in run_backup",
action_flag=ADDITION,
change_message=str(e)
)
return False
return True


if __name__ == '__main__':
main(sys.argv[1:], get_system_values())
system_user, created = User.objects.get_or_create(username='system')
if created:
logger.debug('Created reserved system user.')
main(sys.argv[1:], get_system_values(), system_user)
Loading

0 comments on commit d559d4f

Please sign in to comment.