-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3107 from HHS/OPS-3077/user-script
Ops 3077/user script
- Loading branch information
Showing
7 changed files
with
562 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/sh | ||
set -eo pipefail | ||
|
||
export PYTHONPATH=.:$PYTHONPATH | ||
|
||
ENV=$1 | ||
INPUT_CSV=$2 | ||
|
||
echo "Activating virtual environment..." | ||
. .venv/bin/activate | ||
|
||
echo "ENV is $ENV" | ||
echo "INPUT_CSV is $INPUT_CSV" | ||
|
||
echo "Running script..." | ||
python data_tools/src/load_users/main.py \ | ||
--env "${ENV}" \ | ||
--input-csv "${INPUT_CSV}" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
import os | ||
import sys | ||
import time | ||
|
||
import click | ||
from data_tools.src.azure_utils.utils import get_csv | ||
from data_tools.src.common.db import init_db_from_config, setup_triggers | ||
from data_tools.src.common.utils import get_config, get_or_create_sys_user | ||
from data_tools.src.load_users.utils import transform | ||
from loguru import logger | ||
from sqlalchemy import text | ||
from sqlalchemy.orm import scoped_session, sessionmaker | ||
|
||
# Set the timezone to UTC | ||
os.environ["TZ"] = "UTC" | ||
time.tzset() | ||
|
||
# logger configuration | ||
format = ( | ||
"<green>{time:YYYY-MM-DD HH:mm:ss}</green> | " | ||
"<level>{level: <8}</level> | " | ||
"<cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> | " | ||
"<level>{message}</level>" | ||
) | ||
logger.add(sys.stdout, format=format, level="INFO") | ||
logger.add(sys.stderr, format=format, level="INFO") | ||
|
||
|
||
@click.command() | ||
@click.option("--env", help="The environment to use.") | ||
@click.option("--input-csv", help="The path to the CSV input file.") | ||
def main( | ||
env: str, | ||
input_csv: str, | ||
): | ||
""" | ||
Main entrypoint for the script. | ||
""" | ||
logger.debug(f"Environment: {env}") | ||
logger.debug(f"Input CSV: {input_csv}") | ||
|
||
logger.info("Starting the ETL process.") | ||
|
||
script_config = get_config(env) | ||
db_engine, db_metadata_obj = init_db_from_config(script_config) | ||
|
||
if db_engine is None: | ||
logger.error("Failed to initialize the database engine.") | ||
sys.exit(1) | ||
|
||
with db_engine.connect() as conn: | ||
conn.execute(text("SELECT 1")) | ||
logger.info("Successfully connected to the database.") | ||
|
||
csv_f = get_csv(input_csv, script_config) | ||
|
||
logger.info(f"Loaded CSV file from {input_csv}.") | ||
|
||
Session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=db_engine)) | ||
|
||
with Session() as session: | ||
sys_user = get_or_create_sys_user(session) | ||
logger.info(f"Retrieved system user {sys_user}") | ||
|
||
setup_triggers(session, sys_user) | ||
|
||
try: | ||
transform(csv_f, session, sys_user) | ||
except RuntimeError as re: | ||
logger.error(f"Error transforming data: {re}") | ||
sys.exit(1) | ||
|
||
logger.info("Finished the ETL process.") | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
from csv import DictReader | ||
from dataclasses import dataclass | ||
from typing import List, Optional | ||
|
||
from loguru import logger | ||
from sqlalchemy import select | ||
from sqlalchemy.orm import Session | ||
|
||
from models import Division, OpsEvent, OpsEventStatus, OpsEventType, Role, User, UserStatus | ||
|
||
|
||
@dataclass | ||
class UserData: | ||
""" | ||
Dataclass to represent a User data row. | ||
""" | ||
EMAIL: str | ||
SYS_USER_ID: Optional[int] = None | ||
DIVISION: Optional[str] = None | ||
STATUS: Optional[str] = None | ||
ROLES: Optional[list[str]] = None | ||
|
||
def __post_init__(self): | ||
if not self.EMAIL: | ||
raise ValueError("EMAIL is required.") | ||
|
||
self.SYS_USER_ID = int(self.SYS_USER_ID) if self.SYS_USER_ID else None | ||
self.EMAIL = str(self.EMAIL) | ||
self.DIVISION = str(self.DIVISION) | ||
self.STATUS = str(self.STATUS) | ||
self.ROLES = [str(r).strip() for r in self.ROLES.split(",")] if self.ROLES else [] | ||
|
||
|
||
def create_user_data(data: dict) -> UserData: | ||
""" | ||
Convert a dictionary to a UserData dataclass instance. | ||
:param data: The dictionary to convert. | ||
:return: A UserData dataclass instance. | ||
""" | ||
return UserData(**data) | ||
|
||
def validate_data(data: UserData) -> bool: | ||
""" | ||
Validate the data in a UserData instance. | ||
:param data: The UserData instance to validate. | ||
:return: True if the data is valid, False otherwise. | ||
""" | ||
return all([ | ||
data.EMAIL is not None, | ||
]) | ||
|
||
def validate_all(data: List[UserData]) -> bool: | ||
""" | ||
Validate a list of UserData instances. | ||
:param data: The list of UserData instances to validate. | ||
:return: A list of valid UserData instances. | ||
""" | ||
return sum(1 for d in data if validate_data(d)) == len(data) | ||
|
||
def create_models(data: UserData, sys_user: User, session: Session, roles: List[Role], divisions: List[Division]) -> None: | ||
""" | ||
Create and persist the User and UserRole models. | ||
:param data: The CanData instance to convert. | ||
:param sys_user: The system user to use. | ||
:param session: The database session to use. | ||
""" | ||
logger.debug(f"Creating models for {data}") | ||
|
||
try: | ||
if not data or not sys_user or not session or not roles or not divisions: | ||
raise ValueError(f"Arguments are invalid. {data}, {sys_user}, {session}, {roles}, {divisions}") | ||
|
||
user = User( | ||
id=data.SYS_USER_ID if data.SYS_USER_ID else None, | ||
email=data.EMAIL, | ||
status=UserStatus[data.STATUS], | ||
created_by=sys_user.id, | ||
) | ||
|
||
user.roles = [r for r in roles if r.name in data.ROLES] | ||
division = next((d for d in divisions if d.abbreviation == data.DIVISION), None) | ||
user.division = division.id if division else None | ||
|
||
session.merge(user) | ||
session.commit() | ||
|
||
ops_event = OpsEvent( | ||
event_type=OpsEventType.CREATE_USER, | ||
event_status=OpsEventStatus.SUCCESS, | ||
created_by=sys_user.id, | ||
event_details={"user_id": user.id, "message": f"Upserted user {user.email}"}, | ||
) | ||
session.add(ops_event) | ||
session.commit() | ||
except Exception as e: | ||
logger.error(f"Error creating models for {data}") | ||
raise e | ||
|
||
|
||
def create_all_models(data: List[UserData], sys_user: User, session: Session, roles: List[Role], divisions: List[Division]) -> None: | ||
""" | ||
Convert a list of UserData instances to a list of BaseModel instances. | ||
:param data: The list of UserData instances to convert. | ||
:param sys_user: The system user to use. | ||
:param session: The database session to use. | ||
:param roles: The list of roles to use. | ||
:param divisions: The list of divisions to use. | ||
:return: A list of BaseModel instances. | ||
""" | ||
for d in data: | ||
create_models(d, sys_user, session, roles, divisions) | ||
|
||
|
||
def create_all_user_data(data: List[dict]) -> List[UserData]: | ||
""" | ||
Convert a list of dictionaries to a list of UserData instances. | ||
:param data: The list of dictionaries to convert. | ||
:return: A list of UserData instances. | ||
""" | ||
return [create_user_data(d) for d in data] | ||
|
||
|
||
def transform(data: DictReader, session: Session, sys_user: User) -> None: | ||
""" | ||
Transform the data from the CSV file and persist the models to the database. | ||
:param data: The data from the CSV file. | ||
:param session: The database session to use. | ||
:param sys_user: The system user to use. | ||
:return: None | ||
""" | ||
roles = list(session.execute(select(Role)).scalars().all()) | ||
logger.info(f"Retrieved {len(roles)} roles.") | ||
|
||
divisions = list(session.execute(select(Division)).scalars().all()) | ||
logger.info(f"Retrieved {len(divisions)} divisions.") | ||
|
||
if not data or not roles or not divisions or not session or not sys_user: | ||
logger.error("No data to process. Exiting.") | ||
raise RuntimeError("No data to process.") | ||
|
||
user_data = create_all_user_data(list(data)) | ||
logger.info(f"Created {len(user_data)} UserData instances.") | ||
|
||
if not validate_all(user_data): | ||
logger.error("Validation failed. Exiting.") | ||
raise RuntimeError("Validation failed.") | ||
|
||
logger.info("Data validation passed.") | ||
|
||
create_all_models(user_data, sys_user, session, roles, divisions) | ||
logger.info(f"Finished loading models.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
SYS_USER_ID EMAIL DIVISION STATUS ROLES | ||
1 [email protected] CC INACTIVE user | ||
2 [email protected] CC INACTIVE user | ||
3 [email protected] CC INACTIVE user | ||
4 [email protected] DEI ACTIVE user | ||
5 [email protected] OD INACTIVE user | ||
6 [email protected] OD INACTIVE user | ||
7 [email protected] OD INACTIVE user | ||
8 [email protected] DCFD INACTIVE user | ||
9 [email protected] DCFD INACTIVE user | ||
10 [email protected] DCFD INACTIVE user | ||
11 [email protected] DCFD INACTIVE user | ||
12 [email protected] DCFD INACTIVE user | ||
13 [email protected] OD INACTIVE user | ||
14 [email protected] OD ACTIVE admin,USER_ADMIN | ||
15 [email protected] OD ACTIVE user | ||
16 [email protected] OD INACTIVE user | ||
17 [email protected] OD INACTIVE user | ||
18 [email protected] OD ACTIVE admin,USER_ADMIN | ||
19 [email protected] OD ACTIVE user | ||
20 [email protected] OD ACTIVE user | ||
21 [email protected] OD ACTIVE admin | ||
22 [email protected] OD ACTIVE user | ||
23 [email protected] CC ACTIVE division_director | ||
24 [email protected] CC ACTIVE BUDGET_TEAM | ||
25 [email protected] CC ACTIVE user | ||
26 [email protected] DDI ACTIVE division_director | ||
27 [email protected] LOCKED | ||
28 [email protected] ACTIVE admin | ||
29 [email protected] ACTIVE admin,USER_ADMIN |
Empty file.
Oops, something went wrong.