Skip to content

Commit

Permalink
Merge pull request #3107 from HHS/OPS-3077/user-script
Browse files Browse the repository at this point in the history
Ops 3077/user script
  • Loading branch information
johndeange authored Nov 21, 2024
2 parents 09df606 + 8547691 commit d6a4587
Show file tree
Hide file tree
Showing 7 changed files with 562 additions and 0 deletions.
18 changes: 18 additions & 0 deletions backend/data_tools/scripts/load_users.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/sh
set -eo pipefail

export PYTHONPATH=.:$PYTHONPATH

ENV=$1
INPUT_CSV=$2

echo "Activating virtual environment..."
. .venv/bin/activate

echo "ENV is $ENV"
echo "INPUT_CSV is $INPUT_CSV"

echo "Running script..."
python data_tools/src/load_users/main.py \
--env "${ENV}" \
--input-csv "${INPUT_CSV}"
Empty file.
76 changes: 76 additions & 0 deletions backend/data_tools/src/load_users/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import sys
import time

import click
from data_tools.src.azure_utils.utils import get_csv
from data_tools.src.common.db import init_db_from_config, setup_triggers
from data_tools.src.common.utils import get_config, get_or_create_sys_user
from data_tools.src.load_users.utils import transform
from loguru import logger
from sqlalchemy import text
from sqlalchemy.orm import scoped_session, sessionmaker

# Set the timezone to UTC
os.environ["TZ"] = "UTC"
time.tzset()

# logger configuration
format = (
"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
"<level>{level: <8}</level> | "
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | "
"<level>{message}</level>"
)
logger.add(sys.stdout, format=format, level="INFO")
logger.add(sys.stderr, format=format, level="INFO")


@click.command()
@click.option("--env", help="The environment to use.")
@click.option("--input-csv", help="The path to the CSV input file.")
def main(
env: str,
input_csv: str,
):
"""
Main entrypoint for the script.
"""
logger.debug(f"Environment: {env}")
logger.debug(f"Input CSV: {input_csv}")

logger.info("Starting the ETL process.")

script_config = get_config(env)
db_engine, db_metadata_obj = init_db_from_config(script_config)

if db_engine is None:
logger.error("Failed to initialize the database engine.")
sys.exit(1)

with db_engine.connect() as conn:
conn.execute(text("SELECT 1"))
logger.info("Successfully connected to the database.")

csv_f = get_csv(input_csv, script_config)

logger.info(f"Loaded CSV file from {input_csv}.")

Session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=db_engine))

with Session() as session:
sys_user = get_or_create_sys_user(session)
logger.info(f"Retrieved system user {sys_user}")

setup_triggers(session, sys_user)

try:
transform(csv_f, session, sys_user)
except RuntimeError as re:
logger.error(f"Error transforming data: {re}")
sys.exit(1)

logger.info("Finished the ETL process.")

if __name__ == "__main__":
main()
164 changes: 164 additions & 0 deletions backend/data_tools/src/load_users/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
from csv import DictReader
from dataclasses import dataclass
from typing import List, Optional

from loguru import logger
from sqlalchemy import select
from sqlalchemy.orm import Session

from models import Division, OpsEvent, OpsEventStatus, OpsEventType, Role, User, UserStatus


@dataclass
class UserData:
"""
Dataclass to represent a User data row.
"""
EMAIL: str
SYS_USER_ID: Optional[int] = None
DIVISION: Optional[str] = None
STATUS: Optional[str] = None
ROLES: Optional[list[str]] = None

def __post_init__(self):
if not self.EMAIL:
raise ValueError("EMAIL is required.")

self.SYS_USER_ID = int(self.SYS_USER_ID) if self.SYS_USER_ID else None
self.EMAIL = str(self.EMAIL)
self.DIVISION = str(self.DIVISION)
self.STATUS = str(self.STATUS)
self.ROLES = [str(r).strip() for r in self.ROLES.split(",")] if self.ROLES else []


def create_user_data(data: dict) -> UserData:
"""
Convert a dictionary to a UserData dataclass instance.
:param data: The dictionary to convert.
:return: A UserData dataclass instance.
"""
return UserData(**data)

def validate_data(data: UserData) -> bool:
"""
Validate the data in a UserData instance.
:param data: The UserData instance to validate.
:return: True if the data is valid, False otherwise.
"""
return all([
data.EMAIL is not None,
])

def validate_all(data: List[UserData]) -> bool:
"""
Validate a list of UserData instances.
:param data: The list of UserData instances to validate.
:return: A list of valid UserData instances.
"""
return sum(1 for d in data if validate_data(d)) == len(data)

def create_models(data: UserData, sys_user: User, session: Session, roles: List[Role], divisions: List[Division]) -> None:
"""
Create and persist the User and UserRole models.
:param data: The CanData instance to convert.
:param sys_user: The system user to use.
:param session: The database session to use.
"""
logger.debug(f"Creating models for {data}")

try:
if not data or not sys_user or not session or not roles or not divisions:
raise ValueError(f"Arguments are invalid. {data}, {sys_user}, {session}, {roles}, {divisions}")

user = User(
id=data.SYS_USER_ID if data.SYS_USER_ID else None,
email=data.EMAIL,
status=UserStatus[data.STATUS],
created_by=sys_user.id,
)

user.roles = [r for r in roles if r.name in data.ROLES]
division = next((d for d in divisions if d.abbreviation == data.DIVISION), None)
user.division = division.id if division else None

session.merge(user)
session.commit()

ops_event = OpsEvent(
event_type=OpsEventType.CREATE_USER,
event_status=OpsEventStatus.SUCCESS,
created_by=sys_user.id,
event_details={"user_id": user.id, "message": f"Upserted user {user.email}"},
)
session.add(ops_event)
session.commit()
except Exception as e:
logger.error(f"Error creating models for {data}")
raise e


def create_all_models(data: List[UserData], sys_user: User, session: Session, roles: List[Role], divisions: List[Division]) -> None:
"""
Convert a list of UserData instances to a list of BaseModel instances.
:param data: The list of UserData instances to convert.
:param sys_user: The system user to use.
:param session: The database session to use.
:param roles: The list of roles to use.
:param divisions: The list of divisions to use.
:return: A list of BaseModel instances.
"""
for d in data:
create_models(d, sys_user, session, roles, divisions)


def create_all_user_data(data: List[dict]) -> List[UserData]:
"""
Convert a list of dictionaries to a list of UserData instances.
:param data: The list of dictionaries to convert.
:return: A list of UserData instances.
"""
return [create_user_data(d) for d in data]


def transform(data: DictReader, session: Session, sys_user: User) -> None:
"""
Transform the data from the CSV file and persist the models to the database.
:param data: The data from the CSV file.
:param session: The database session to use.
:param sys_user: The system user to use.
:return: None
"""
roles = list(session.execute(select(Role)).scalars().all())
logger.info(f"Retrieved {len(roles)} roles.")

divisions = list(session.execute(select(Division)).scalars().all())
logger.info(f"Retrieved {len(divisions)} divisions.")

if not data or not roles or not divisions or not session or not sys_user:
logger.error("No data to process. Exiting.")
raise RuntimeError("No data to process.")

user_data = create_all_user_data(list(data))
logger.info(f"Created {len(user_data)} UserData instances.")

if not validate_all(user_data):
logger.error("Validation failed. Exiting.")
raise RuntimeError("Validation failed.")

logger.info("Data validation passed.")

create_all_models(user_data, sys_user, session, roles, divisions)
logger.info(f"Finished loading models.")
30 changes: 30 additions & 0 deletions backend/data_tools/test_csv/users.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
SYS_USER_ID EMAIL DIVISION STATUS ROLES
1 [email protected] CC INACTIVE user
2 [email protected] CC INACTIVE user
3 [email protected] CC INACTIVE user
4 [email protected] DEI ACTIVE user
5 [email protected] OD INACTIVE user
6 [email protected] OD INACTIVE user
7 [email protected] OD INACTIVE user
8 [email protected] DCFD INACTIVE user
9 [email protected] DCFD INACTIVE user
10 [email protected] DCFD INACTIVE user
11 [email protected] DCFD INACTIVE user
12 [email protected] DCFD INACTIVE user
13 [email protected] OD INACTIVE user
14 [email protected] OD ACTIVE admin,USER_ADMIN
15 [email protected] OD ACTIVE user
16 [email protected] OD INACTIVE user
17 [email protected] OD INACTIVE user
18 [email protected] OD ACTIVE admin,USER_ADMIN
19 [email protected] OD ACTIVE user
20 [email protected] OD ACTIVE user
21 [email protected] OD ACTIVE admin
22 [email protected] OD ACTIVE user
23 [email protected] CC ACTIVE division_director
24 [email protected] CC ACTIVE BUDGET_TEAM
25 [email protected] CC ACTIVE user
26 [email protected] DDI ACTIVE division_director
27 [email protected] LOCKED
28 [email protected] ACTIVE admin
29 [email protected] ACTIVE admin,USER_ADMIN
Empty file.
Loading

0 comments on commit d6a4587

Please sign in to comment.