Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor code to use managed_cursor context manager #10

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
/client/ @joshuagraber
* @josh-chamberlain
* @mbodeantor
49 changes: 24 additions & 25 deletions .github/workflows/test_api.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
#name: Test API using Pytest
#
#on:
# pull_request:
#
#jobs:
# test_api:
# env:
# SECRET_KEY: ${{ secrets.SECRET_KEY }}
# DEV_DB_CONN_STRING: ${{secrets.DEV_DB_CONN_STRING}}
# name: Test API
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v4
# - uses: actions/setup-python@v4
# with:
# python-version: '3.11'
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# pip install -r requirements.txt
# python -m spacy download en_core_web_sm
# pip install pytest pytest-cov
# - name: Run tests
# run: pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
name: Test API using Pytest

on:
pull_request:

jobs:
test_api:
env:
SECRET_KEY: ${{ secrets.SECRET_KEY }}
DEV_DB_CONN_STRING: ${{secrets.DEV_DB_CONN_STRING}}
name: Test API
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
python -m spacy download en_core_web_sm
pip install pytest pytest-cov
pytest tests --doctest-modules --junitxml=junit/test-results.xml --cov=com --cov-report=xml --cov-report=html
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# data-sources-app-v2

Development of the next big iteration of the data sources app according to https://github.com/Police-Data-Accessibility-Project/data-sources-app/issues/248
# data-sources-app

An API and UI for searching, using, and maintaining Data Sources.

Expand Down
46 changes: 32 additions & 14 deletions middleware/archives_queries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import List, Dict, Any, Optional, Tuple

import psycopg2.extensions

from utilities.common import convert_dates_to_strings
from psycopg2.extensions import connection as PgConnection

Expand All @@ -11,14 +14,13 @@
]


def archives_get_results(conn: PgConnection) -> list[tuple[Any, ...]]:
def archives_get_results(cursor: psycopg2.extensions.cursor) -> list[tuple[Any, ...]]:
"""
Pulls data sources for the automatic archives script that performs caching

:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
:return: A list of dictionaries representing the rows matching the query conditions.
"""
cursor = conn.cursor()
sql_query = """
SELECT
airtable_uid,
Expand All @@ -38,17 +40,17 @@ def archives_get_results(conn: PgConnection) -> list[tuple[Any, ...]]:

def archives_get_query(
test_query_results: Optional[List[Dict[str, Any]]] = None,
conn: Optional[PgConnection] = None,
cursor: psycopg2.extensions.cursor = None,
) -> List[Dict[str, Any]]:
"""
Processes the archives get results, either from the database or a provided set of test results, and converts dates to strings.

:param test_query_results: A list of dictionaries representing test query results, if any.
:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
:return: A list of dictionaries with the query results after processing and date conversion.
"""
results = (
archives_get_results(conn) if not test_query_results else test_query_results
archives_get_results(cursor) if not test_query_results else test_query_results
)
archives_combined_results = [
dict(zip(ARCHIVES_GET_COLUMNS, result)) for result in results
Expand All @@ -61,33 +63,49 @@ def archives_get_query(


def archives_put_broken_as_of_results(
id: str, broken_as_of: str, last_cached: str, conn: PgConnection
id: str, broken_as_of: str, last_cached: str, cursor: psycopg2.extensions.cursor
) -> None:
"""
Updates the data_sources table setting the url_status to 'broken' for a given id.

:param id: The airtable_uid of the data source.
:param broken_as_of: The date when the source was identified as broken.
:param last_cached: The last cached date of the data source.
:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
"""
cursor = conn.cursor()
sql_query = "UPDATE data_sources SET url_status = 'broken', broken_source_url_as_of = '{0}', last_cached = '{1}' WHERE airtable_uid = '{2}'"
cursor.execute(sql_query.format(broken_as_of, last_cached, id))
cursor.close()


def archives_put_last_cached_results(
id: str, last_cached: str, conn: PgConnection
id: str, last_cached: str, cursor: psycopg2.extensions.cursor
) -> None:
"""
Updates the last_cached field in the data_sources table for a given id.

:param id: The airtable_uid of the data source.
:param last_cached: The last cached date to be updated.
:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
"""
cursor = conn.cursor()
sql_query = "UPDATE data_sources SET last_cached = '{0}' WHERE airtable_uid = '{1}'"
cursor.execute(sql_query.format(last_cached, id))
cursor.close()


def archives_put_query(
id: str = "",
broken_as_of: str = "",
last_cached: str = "",
cursor: Optional[psycopg2.extensions.cursor] = None,
) -> None:
"""
Updates the data_sources table based on the provided parameters, marking sources as broken or updating the last cached date.

:param id: The airtable_uid of the data source.
:param broken_as_of: The date when the source was identified as broken, if applicable.
:param last_cached: The last cached date to be updated.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
"""
if broken_as_of:
archives_put_broken_as_of_results(id, broken_as_of, last_cached, cursor)
else:
archives_put_last_cached_results(id, last_cached, cursor)
105 changes: 60 additions & 45 deletions middleware/data_source_queries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import List, Dict, Any, Optional, Tuple, Union

import psycopg2.extensions

from utilities.common import convert_dates_to_strings, format_arrays
from psycopg2.extensions import connection as PgConnection

Expand Down Expand Up @@ -46,8 +49,6 @@
"last_cached",
]

DATA_SOURCES_OUTPUT_COLUMNS = DATA_SOURCES_APPROVED_COLUMNS + ["agency_name"]

AGENCY_APPROVED_COLUMNS = [
"homepage_url",
"count_data_sources",
Expand All @@ -74,31 +75,17 @@
"defunct_year",
]

DATA_SOURCES_MAP_COLUMN = [
"data_source_id",
"name",
"agency_id",
"agency_name",
"state_iso",
"municipality",
"county_name",
"record_type",
"lat",
"lng",
]


def data_source_by_id_results(
conn: PgConnection, data_source_id: str
cursor: psycopg2.extensions.cursor, data_source_id: str
) -> Union[tuple[Any, ...], None]:
"""
Fetches a single data source by its ID, including related agency information, from a PostgreSQL database.

:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
:param data_source_id: The unique identifier for the data source.
:return: A dictionary containing the data source and its related agency details.
"""
cursor = conn.cursor()

data_source_approved_columns = [
f"data_sources.{approved_column}"
Expand Down Expand Up @@ -130,26 +117,25 @@ def data_source_by_id_results(

cursor.execute(sql_query)
result = cursor.fetchone()
cursor.close()

return result


def data_source_by_id_query(
data_source_id: str = "",
test_query_results: Optional[List[Dict[str, Any]]] = None,
conn: Optional[PgConnection] = None,
cursor: psycopg2.extensions.cursor = None,
) -> Dict[str, Any]:
"""
Processes a request to fetch data source details by ID, either from the database or provided test results.

:param data_source_id: The unique identifier for the data source.
:param test_query_results: A list of dictionaries representing test query results, if provided.
:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
:return: A dictionary with the data source details after processing.
"""
if conn:
result = data_source_by_id_results(conn, data_source_id)
if cursor:
result = data_source_by_id_results(cursor, data_source_id)
else:
result = test_query_results

Expand All @@ -170,14 +156,16 @@ def data_source_by_id_query(
return data_source_details


def get_approved_data_sources(conn: PgConnection) -> list[tuple[Any, ...]]:
def get_approved_data_sources(
cursor: psycopg2.extensions.cursor,
) -> list[tuple[Any, ...]]:
"""
Fetches all approved data sources and their related agency information from a PostgreSQL database.

:param conn: A psycopg2 connection object to a PostgreSQL database.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
:return: A list of dictionaries, each containing details of a data source and its related agency.
"""
cursor = conn.cursor()

data_source_approved_columns = [
f"data_sources.{approved_column}"
for approved_column in DATA_SOURCES_APPROVED_COLUMNS
Expand All @@ -202,16 +190,15 @@ def get_approved_data_sources(conn: PgConnection) -> list[tuple[Any, ...]]:
)
cursor.execute(sql_query)
results = cursor.fetchall()
cursor.close()

return convert_data_source_matches(DATA_SOURCES_OUTPUT_COLUMNS, results)
return results


def needs_identification_data_sources(conn) -> list:
def needs_identification_data_sources(cursor: psycopg2.extensions.cursor) -> list:
"""
Returns a list of data sources that need identification
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
"""
cursor = conn.cursor()
joined_column_names = ", ".join(DATA_SOURCES_APPROVED_COLUMNS)

sql_query = """
Expand All @@ -226,16 +213,15 @@ def needs_identification_data_sources(conn) -> list:
)
cursor.execute(sql_query)
results = cursor.fetchall()
cursor.close()

return convert_data_source_matches(DATA_SOURCES_OUTPUT_COLUMNS, results)
return results


def get_data_sources_for_map(conn) -> list:
def get_data_sources_for_map(cursor: psycopg2.extensions.cursor) -> list:
"""
Returns a list of data sources with relevant info for the map
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
"""
cursor = conn.cursor()
sql_query = """
SELECT
data_sources.airtable_uid as data_source_id,
Expand All @@ -259,27 +245,56 @@ def get_data_sources_for_map(conn) -> list:
"""
cursor.execute(sql_query)
results = cursor.fetchall()
cursor.close()

return convert_data_source_matches(DATA_SOURCES_MAP_COLUMN, results)
return results


def convert_data_source_matches(
data_source_output_columns: list[str], results: list[tuple]
) -> dict:
def data_sources_query(
cursor: psycopg2.extensions.cursor = None,
test_query_results: Optional[List[Dict[str, Any]]] = None,
approval_status: str = "approved",
for_map: bool = False,
) -> List[Dict[str, Any]]:
"""
Combine a list of output columns with a list of results,
and produce a list of dictionaries where the keys correspond
to the output columns and the values correspond to the results
:param data_source_output_columns:
:param results:
:return:
Processes and formats a list of approved data sources, with an option to use test query results.

:param approval_status: The approval status of the data sources to query.
:param cursor: A psycopg2 cursor object to a PostgreSQL database.
:param test_query_results: Optional list of test query results to use instead of querying the database.
:return: A list of dictionaries, each formatted with details of a data source and its associated agency.
"""
if for_map:
results = get_data_sources_for_map(cursor)
elif cursor and approval_status == "approved":
results = get_approved_data_sources(cursor)
elif cursor and not for_map:
results = needs_identification_data_sources(cursor)
else:
results = test_query_results

if not for_map:
data_source_output_columns = DATA_SOURCES_APPROVED_COLUMNS + ["agency_name"]
else:
data_source_output_columns = [
"data_source_id",
"name",
"agency_id",
"agency_name",
"state_iso",
"municipality",
"county_name",
"record_type",
"lat",
"lng",
]

data_source_matches = [
dict(zip(data_source_output_columns, result)) for result in results
]
data_source_matches_converted = []

for data_source_match in data_source_matches:
data_source_match = convert_dates_to_strings(data_source_match)
data_source_matches_converted.append(format_arrays(data_source_match))

return data_source_matches_converted
Loading
Loading