diff --git a/api/bin/execute_sql_rds.sh b/api/bin/execute_sql_rds.sh new file mode 100755 index 000000000..7809465c7 --- /dev/null +++ b/api/bin/execute_sql_rds.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# Execute some SQL using the RDS Data API. +# +# Examples: +# ./execute_sql_rds.sh &1 >/dev/null + then + printf "\n${RED}jtbl command not found${END} - please install before running: https://github.com/kellyjonbrazil/jtbl \n\n" + exit 1 + fi + + count=1 + if [ $multiple ] + then + while read line + do + execute_statement "$line" + count=$((count + 1)) + done + else + execute_statement "$(cat)" + fi +} + + +parse_arguments() { + for arg in "$@" + do + if [ "$arg" == "--multiple" ]; then + print_log "multiple mode enabled (one statement per input line)" + multiple=1 + elif [[ "$arg" =~ ^--cluster=(.*)$ ]]; then + cluster="${BASH_REMATCH[1]}" + else + echo "$USAGE" + exit 1 + fi + done +} + + +read_cluster_arns() { + resource_arn=$(aws rds describe-db-clusters --db-cluster-identifier="$cluster" \ + --query='DBClusters[0].DBClusterArn' --output=text) + secret_arn=$(aws rds describe-db-clusters --db-cluster-identifier="$cluster" \ + --query='DBClusters[0].MasterUserSecret.SecretArn' --output=text) + print_log "database resource $resource_arn" +} + + +create_temporary_directory() { + tmp_dir="/tmp/execute_sql_rds/execute_sql_rds.$(date "+%Y-%m-%d_%H:%M:%S")" + mkdir -m "u=rwx,g=,o=" -p "$tmp_dir" + print_log "temporary directory $tmp_dir" +} + + +execute_statement() { + print_log "$1" + result_path="$tmp_dir/raw_result_$count.json" + json_result_path="$tmp_dir/result_$count.json" + csv_result_path="$tmp_dir/result_$count.csv" + + aws rds-data execute-statement \ + --resource-arn "$resource_arn" \ + --database "app" \ + --secret-arn "$secret_arn" \ + --sql "$1" \ + --continue-after-timeout \ + --format-records-as JSON \ + >"$result_path" + + if grep formattedRecords "$result_path" >/dev/null + then + # Print a pretty table to the user + jq -r .formattedRecords "$result_path" | jtbl --truncate --markdown + # Pull the results out and write to a CSV + JSON + jq -r .formattedRecords "$result_path" | jtbl --csv > $csv_result_path + jq -r .formattedRecords "$result_path" > $json_result_path + print_log "----" + print_log "Output written to $tmp_dir/" + else + cat "$result_path" + fi +} + + +# Utility functions +print_log() { + printf "$CYAN%s $GREEN%s: $END%s\\n" "$(date "+%Y-%m-%d %H:%M:%S")" "$PROGRAM_NAME" "$*" +} + +# Entry point +main "$@" diff --git a/api/bin/setup_localstack.py b/api/bin/setup_localstack.py index f9107bf89..2c78861a6 100644 --- a/api/bin/setup_localstack.py +++ b/api/bin/setup_localstack.py @@ -1,5 +1,6 @@ import logging +import boto3 import botocore.client import botocore.exceptions @@ -27,7 +28,12 @@ def does_s3_bucket_exist(s3_client: botocore.client.BaseClient, bucket_name: str def setup_s3() -> None: s3_config = S3Config() - s3_client = get_s3_client(s3_config) + # This is only used locally - to avoid any accidental running of commands here + # against a real AWS account (ie. you've authed in your local terminal where you're running this) + # we'll override the access keys explicitly. + s3_client = get_s3_client( + s3_config, boto3.Session(aws_access_key_id="NO_CREDS", aws_secret_access_key="NO_CREDS") + ) if s3_config.s3_opportunity_bucket is None: raise Exception("S3_OPPORTUNITY_BUCKET env var must be set") diff --git a/api/bin/sql/select_from_foreign_table.sql b/api/bin/sql/select_from_foreign_table.sql new file mode 100644 index 000000000..6c4f56f24 --- /dev/null +++ b/api/bin/sql/select_from_foreign_table.sql @@ -0,0 +1,18 @@ +SELECT * FROM legacy.tforecast ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tforecast_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tapplicanttypes_forecast ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tapplicanttypes_forecast_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundactcat_forecast ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundactcat_forecast_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundinstr_forecast ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundinstr_forecast_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.topportunity ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.topportunity_cfda ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tsynopsis ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tsynopsis_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tapplicanttypes_synopsis ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tapplicanttypes_synopsis_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundactcat_synopsis ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundactcat_synopsis_hist ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundinstr_synopsis ORDER BY created_date DESC LIMIT 8; +SELECT * FROM legacy.tfundinstr_synopsis_hist ORDER BY created_date DESC LIMIT 8; diff --git a/api/bin/sql/table_list.sql b/api/bin/sql/table_list.sql new file mode 100644 index 000000000..f365852bd --- /dev/null +++ b/api/bin/sql/table_list.sql @@ -0,0 +1,15 @@ +SELECT n.nspname as "Schema", + c.relname as "Name", + CASE c.relkind WHEN 'r' THEN 'table' WHEN 'v' THEN 'view' WHEN 'm' THEN 'materialized view' WHEN 'i' THEN 'index' WHEN 'S' THEN 'sequence' WHEN 't' THEN 'TOAST table' WHEN 'f' THEN 'foreign table' WHEN 'p' THEN 'partitioned table' WHEN 'I' THEN 'partitioned index' END as "Type", + pg_catalog.pg_get_userbyid(c.relowner) as "Owner", + pg_catalog.pg_size_pretty(pg_catalog.pg_table_size(c.oid)) as "Size", + pg_stat_get_last_analyze_time(c.oid) AS last_analyze, + pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze +FROM pg_catalog.pg_class c + LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace + LEFT JOIN pg_catalog.pg_am am ON am.oid = c.relam +WHERE c.relkind IN ('r','f','v','m','') + AND n.nspname <> 'pg_catalog' + AND n.nspname !~ '^pg_toast' + AND n.nspname <> 'information_schema' +ORDER BY 1,2; diff --git a/api/local.env b/api/local.env index 2bfd4fca1..d6248be61 100644 --- a/api/local.env +++ b/api/local.env @@ -70,24 +70,6 @@ SEARCH_PORT=9200 SEARCH_USE_SSL=FALSE SEARCH_VERIFY_CERTS=FALSE -############################ -# AWS Defaults -############################ -# For these secret access keys, don't -# add them to this file to avoid mistakenly -# committing them. Set these in your shell -# by doing `export AWS_ACCESS_KEY_ID=whatever` -AWS_ACCESS_KEY_ID=DO_NOT_SET_HERE -AWS_SECRET_ACCESS_KEY=DO_NOT_SET_HERE -# These next two are commented out as we -# don't have configuration for individuals -# to use these at the moment and boto3 -# tries to use them first before the keys above. -#AWS_SECURITY_TOKEN=DO_NOT_SET_HERE -#AWS_SESSION_TOKEN=DO_NOT_SET_HERE - -AWS_DEFAULT_REGION=us-east-1 - ############################ # Localstack ############################ diff --git a/api/src/adapters/aws/s3_adapter.py b/api/src/adapters/aws/s3_adapter.py index ad43d20dc..4a5f13fb8 100644 --- a/api/src/adapters/aws/s3_adapter.py +++ b/api/src/adapters/aws/s3_adapter.py @@ -19,7 +19,9 @@ class S3Config(PydanticBaseEnvConfig): s3_opportunity_bucket: str | None = None -def get_s3_client(s3_config: S3Config | None = None) -> botocore.client.BaseClient: +def get_s3_client( + s3_config: S3Config | None = None, session: boto3.Session | None = None +) -> botocore.client.BaseClient: if s3_config is None: s3_config = S3Config() @@ -27,4 +29,7 @@ def get_s3_client(s3_config: S3Config | None = None) -> botocore.client.BaseClie if s3_config.s3_endpoint_url is not None: params["endpoint_url"] = s3_config.s3_endpoint_url + if session is not None: + return session.client("s3", **params) + return boto3.client("s3", **params)