Skip to content

Commit

Permalink
Add execute_sql_rds.sh script to run SQL using RDS Data API (#1847)
Browse files Browse the repository at this point in the history
### Time to review: __5 mins__

## Changes proposed

- Add `execute_sql_rds.sh` script to run SQL using RDS Data API
- Add some sample sql files that it can use

## Context for reviewers

During development, this can make it easier to run queries against the
dev database.

## Additional information

![Screenshot 2024-04-26 at 10 39
33](https://github.com/HHS/simpler-grants-gov/assets/3811269/38fed12d-ccf1-43ac-8dc5-2f896ed8d0d6)

---------

Co-authored-by: Michael Chouinard <[email protected]>
  • Loading branch information
jamesbursa and chouinar authored Oct 18, 2024
1 parent 9a7cf92 commit 1fa4cef
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 20 deletions.
128 changes: 128 additions & 0 deletions api/bin/execute_sql_rds.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env bash
#
# Execute some SQL using the RDS Data API.
#
# Examples:
# ./execute_sql_rds.sh <sql/table_list.sql
# ./execute_sql_rds.sh --cluster=api-prod --multiple <sql/select_from_foreign_table.sql
#
# When using --multiple, provide one SQL statement per input line.
#

set -o errexit -o pipefail

PROGRAM_NAME=$(basename "$0")

CYAN='\033[96m'
GREEN='\033[92m'
RED='\033[01;31m'
END='\033[0m'

CLUSTER=api-dev

USAGE="Usage: $PROGRAM_NAME [OPTION]
--multiple one SQL statement per input line (otherwise expects a single multi-line statement)
--cluster=CLUSTER target RDS cluster (default $CLUSTER)
"


main() {
cluster="$CLUSTER"
parse_arguments "$@"
print_log "using cluster $cluster"
read_cluster_arns
create_temporary_directory

# Note that to use jtbl, it needs to be installed directly
# by the user with pip - if we wanted it to work with our poetry
# setup we'd have to run many of these commands via poetry
if ! command -v jtbl 2>&1 >/dev/null
then
printf "\n${RED}jtbl command not found${END} - please install before running: https://github.com/kellyjonbrazil/jtbl \n\n"
exit 1
fi

count=1
if [ $multiple ]
then
while read line
do
execute_statement "$line"
count=$((count + 1))
done
else
execute_statement "$(cat)"
fi
}


parse_arguments() {
for arg in "$@"
do
if [ "$arg" == "--multiple" ]; then
print_log "multiple mode enabled (one statement per input line)"
multiple=1
elif [[ "$arg" =~ ^--cluster=(.*)$ ]]; then
cluster="${BASH_REMATCH[1]}"
else
echo "$USAGE"
exit 1
fi
done
}


read_cluster_arns() {
resource_arn=$(aws rds describe-db-clusters --db-cluster-identifier="$cluster" \
--query='DBClusters[0].DBClusterArn' --output=text)
secret_arn=$(aws rds describe-db-clusters --db-cluster-identifier="$cluster" \
--query='DBClusters[0].MasterUserSecret.SecretArn' --output=text)
print_log "database resource $resource_arn"
}


create_temporary_directory() {
tmp_dir="/tmp/execute_sql_rds/execute_sql_rds.$(date "+%Y-%m-%d_%H:%M:%S")"
mkdir -m "u=rwx,g=,o=" -p "$tmp_dir"
print_log "temporary directory $tmp_dir"
}


execute_statement() {
print_log "$1"
result_path="$tmp_dir/raw_result_$count.json"
json_result_path="$tmp_dir/result_$count.json"
csv_result_path="$tmp_dir/result_$count.csv"

aws rds-data execute-statement \
--resource-arn "$resource_arn" \
--database "app" \
--secret-arn "$secret_arn" \
--sql "$1" \
--continue-after-timeout \
--format-records-as JSON \
>"$result_path"

if grep formattedRecords "$result_path" >/dev/null
then
# Print a pretty table to the user
jq -r .formattedRecords "$result_path" | jtbl --truncate --markdown
# Pull the results out and write to a CSV + JSON
jq -r .formattedRecords "$result_path" | jtbl --csv > $csv_result_path
jq -r .formattedRecords "$result_path" > $json_result_path
print_log "----"
print_log "Output written to $tmp_dir/"
else
cat "$result_path"
fi
}


# Utility functions
print_log() {
printf "$CYAN%s $GREEN%s: $END%s\\n" "$(date "+%Y-%m-%d %H:%M:%S")" "$PROGRAM_NAME" "$*"
}

# Entry point
main "$@"
8 changes: 7 additions & 1 deletion api/bin/setup_localstack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging

import boto3
import botocore.client
import botocore.exceptions

Expand Down Expand Up @@ -27,7 +28,12 @@ def does_s3_bucket_exist(s3_client: botocore.client.BaseClient, bucket_name: str

def setup_s3() -> None:
s3_config = S3Config()
s3_client = get_s3_client(s3_config)
# This is only used locally - to avoid any accidental running of commands here
# against a real AWS account (ie. you've authed in your local terminal where you're running this)
# we'll override the access keys explicitly.
s3_client = get_s3_client(
s3_config, boto3.Session(aws_access_key_id="NO_CREDS", aws_secret_access_key="NO_CREDS")
)

if s3_config.s3_opportunity_bucket is None:
raise Exception("S3_OPPORTUNITY_BUCKET env var must be set")
Expand Down
18 changes: 18 additions & 0 deletions api/bin/sql/select_from_foreign_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
SELECT * FROM legacy.tforecast ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tforecast_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tapplicanttypes_forecast ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tapplicanttypes_forecast_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundactcat_forecast ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundactcat_forecast_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundinstr_forecast ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundinstr_forecast_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.topportunity ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.topportunity_cfda ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tsynopsis ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tsynopsis_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tapplicanttypes_synopsis ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tapplicanttypes_synopsis_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundactcat_synopsis ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundactcat_synopsis_hist ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundinstr_synopsis ORDER BY created_date DESC LIMIT 8;
SELECT * FROM legacy.tfundinstr_synopsis_hist ORDER BY created_date DESC LIMIT 8;
15 changes: 15 additions & 0 deletions api/bin/sql/table_list.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
SELECT n.nspname as "Schema",
c.relname as "Name",
CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'm' THEN 'materialized view' WHEN 'i' THEN 'index' WHEN 'S' THEN 'sequence' WHEN 't' THEN 'TOAST table' WHEN 'f' THEN 'foreign table' WHEN 'p' THEN 'partitioned table' WHEN 'I' THEN 'partitioned index' END as "Type",
pg_catalog.pg_get_userbyid(c.relowner) as "Owner",
pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size",
pg_stat_get_last_analyze_time(c.oid) AS last_analyze,
pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze
FROM pg_catalog.pg_class c
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
LEFT JOIN pg_catalog.pg_am am ON am.oid = c.relam
WHERE c.relkind IN ('r','f','v','m','')
AND n.nspname <> 'pg_catalog'
AND n.nspname !~ '^pg_toast'
AND n.nspname <> 'information_schema'
ORDER BY 1,2;
18 changes: 0 additions & 18 deletions api/local.env
Original file line number Diff line number Diff line change
Expand Up @@ -70,24 +70,6 @@ SEARCH_PORT=9200
SEARCH_USE_SSL=FALSE
SEARCH_VERIFY_CERTS=FALSE

############################
# AWS Defaults
############################
# For these secret access keys, don't
# add them to this file to avoid mistakenly
# committing them. Set these in your shell
# by doing `export AWS_ACCESS_KEY_ID=whatever`
AWS_ACCESS_KEY_ID=DO_NOT_SET_HERE
AWS_SECRET_ACCESS_KEY=DO_NOT_SET_HERE
# These next two are commented out as we
# don't have configuration for individuals
# to use these at the moment and boto3
# tries to use them first before the keys above.
#AWS_SECURITY_TOKEN=DO_NOT_SET_HERE
#AWS_SESSION_TOKEN=DO_NOT_SET_HERE

AWS_DEFAULT_REGION=us-east-1

############################
# Localstack
############################
Expand Down
7 changes: 6 additions & 1 deletion api/src/adapters/aws/s3_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ class S3Config(PydanticBaseEnvConfig):
s3_opportunity_bucket: str | None = None


def get_s3_client(s3_config: S3Config | None = None) -> botocore.client.BaseClient:
def get_s3_client(
s3_config: S3Config | None = None, session: boto3.Session | None = None
) -> botocore.client.BaseClient:
if s3_config is None:
s3_config = S3Config()

params = {}
if s3_config.s3_endpoint_url is not None:
params["endpoint_url"] = s3_config.s3_endpoint_url

if session is not None:
return session.client("s3", **params)

return boto3.client("s3", **params)

0 comments on commit 1fa4cef

Please sign in to comment.