From a1dfb653ecd51376fdf79c9adf991e8797d6c7d7 Mon Sep 17 00:00:00 2001 From: Mark Saroufim Date: Sun, 15 Dec 2024 19:20:19 -0800 Subject: [PATCH] Ruff everything --- .github/workflows/lint.yml | 26 ++++ .ruff.toml | 11 ++ scripts/flush_db.py | 14 ++- scripts/github-only-bot.py | 114 ------------------ scripts/modal-test.py | 2 +- src/discord-cluster-manager/bot.py | 31 ++--- .../cogs/github_cog.py | 29 +++-- .../cogs/leaderboard_cog.py | 21 ++-- src/discord-cluster-manager/cogs/misc_cog.py | 4 +- src/discord-cluster-manager/cogs/modal_cog.py | 57 ++++----- .../cogs/verify_run_cog.py | 19 +-- src/discord-cluster-manager/consts.py | 3 +- src/discord-cluster-manager/leaderboard_db.py | 15 +-- ...208_01_p3yuR-initial-leaderboard-schema.py | 2 +- src/discord-cluster-manager/modal_runner.py | 17 +-- src/discord-cluster-manager/utils.py | 4 +- 16 files changed, 153 insertions(+), 216 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .ruff.toml delete mode 100644 scripts/github-only-bot.py diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..97d7515 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,26 @@ +name: Lint + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + pip install ruff + + - name: Run Ruff check + run: | + ruff check . \ No newline at end of file diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..b442c74 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,11 @@ +line-length = 110 # ideally I want this to be less than 100 but don't wanna test and change the sql stuff +target-version = "py310" +lint.select = [ + "E", # pycodestyle errors + "F", # pyflakes + "I", # isort + "B", # flake8-bugbear + "C", # mccabe + "W", # pycodestyle warnings +] +lint.ignore = [] \ No newline at end of file diff --git a/scripts/flush_db.py b/scripts/flush_db.py index 5e413f7..9073eb7 100644 --- a/scripts/flush_db.py +++ b/scripts/flush_db.py @@ -1,18 +1,20 @@ #!/usr/bin/env python3 +import os + import psycopg2 -from psycopg2 import Error from dotenv import load_dotenv -import os +from psycopg2 import Error + def flush_database(): # Load environment variables load_dotenv() - + DATABASE_URL = os.getenv('DATABASE_URL') - + if DATABASE_URL is None: - print(f"❌ Missing DATABASE_URL environment variable") + print("❌ Missing DATABASE_URL environment variable") return try: @@ -47,4 +49,4 @@ def flush_database(): print("🔌 Database connection closed") if __name__ == "__main__": - flush_database() + flush_database() diff --git a/scripts/github-only-bot.py b/scripts/github-only-bot.py deleted file mode 100644 index 9b48ebb..0000000 --- a/scripts/github-only-bot.py +++ /dev/null @@ -1,114 +0,0 @@ -""" -This is a legacy file and no longer needed but keeping it a round since its simplest example I have of triggering a Github action using a python script -""" - -from dotenv import load_dotenv -from github import Github -import os -import time -from datetime import datetime, timezone -import requests - -# Load environment variables -load_dotenv() - -def trigger_github_action(): - """ - Triggers the GitHub action and returns the latest run ID - """ - gh = Github(os.getenv('GITHUB_TOKEN')) - repo = gh.get_repo(os.getenv('GITHUB_REPO')) - - try: - # Record the time before triggering - trigger_time = datetime.now(timezone.utc) - - # Trigger the workflow - workflow = repo.get_workflow("train_workflow.yml") - success = workflow.create_dispatch("main") - - if success: - # Wait a moment for the run to be created - time.sleep(2) - - # Get runs created after our trigger time - runs = list(workflow.get_runs()) - for run in runs: - if run.created_at.replace(tzinfo=timezone.utc) > trigger_time: - return run.id - - return None - except Exception as e: - print(f"Error: {str(e)}") - return None - -def download_artifact(run_id): - """ - Downloads the training log artifact from the workflow run - """ - gh = Github(os.getenv('GITHUB_TOKEN')) - repo = gh.get_repo(os.getenv('GITHUB_REPO')) - - # Get the specific run - run = repo.get_workflow_run(run_id) - - # Get artifacts from the run - artifacts = run.get_artifacts() - - for artifact in artifacts: - if artifact.name == 'training-logs': - # Download the artifact - url = artifact.archive_download_url - headers = {'Authorization': f'token {os.getenv("GITHUB_TOKEN")}'} - response = requests.get(url, headers=headers) - - if response.status_code == 200: - with open('training.log.zip', 'wb') as f: - f.write(response.content) - - # Read the log file from the zip - import zipfile - with zipfile.ZipFile('training.log.zip') as z: - with z.open('training.log') as f: - logs = f.read().decode('utf-8') - - # Clean up the zip file - os.remove('training.log.zip') - return logs - - return "No training logs found in artifacts" - -def check_workflow_status(run_id): - """ - Monitors the GitHub Action workflow status - """ - gh = Github(os.getenv('GITHUB_TOKEN')) - repo = gh.get_repo(os.getenv('GITHUB_REPO')) - - while True: - run = repo.get_workflow_run(run_id) - - if run.status == "completed": - logs = download_artifact(run_id) - return run.conclusion, logs, run.html_url - - print(f"Workflow still running... Status: {run.status}") - print(f"Live view: {run.html_url}") - time.sleep(30) - -if __name__ == "__main__": - run_id = trigger_github_action() - - if run_id: - print(f"GitHub Action triggered successfully! Run ID: {run_id}") - print("Monitoring progress...") - - # Monitor the workflow - status, logs, url = check_workflow_status(run_id) - - print(f"\nWorkflow completed with status: {status}") - print("\nTraining Logs:") - print(logs) - print(f"\nView the full run at: {url}") - else: - print("Failed to trigger GitHub Action. Please check your configuration.") \ No newline at end of file diff --git a/scripts/modal-test.py b/scripts/modal-test.py index f75019a..366e3f6 100644 --- a/scripts/modal-test.py +++ b/scripts/modal-test.py @@ -44,4 +44,4 @@ async def run_pytorch_script_on_modal(): if __name__ == "__main__": with modal_app.run(): result = run_pytorch_script_on_modal.remote() - print(result) \ No newline at end of file + print(result) diff --git a/src/discord-cluster-manager/bot.py b/src/discord-cluster-manager/bot.py index 1074915..e786c3a 100644 --- a/src/discord-cluster-manager/bot.py +++ b/src/discord-cluster-manager/bot.py @@ -1,27 +1,28 @@ -import discord -from discord import app_commands -from discord.ext import commands import argparse -from utils import setup_logging -from cogs.misc_cog import BotManagerCog from datetime import datetime + +import discord +from cogs.github_cog import GitHubCog +from cogs.leaderboard_cog import LeaderboardCog +from cogs.misc_cog import BotManagerCog +from cogs.modal_cog import ModalCog +from cogs.verify_run_cog import VerifyRunCog from consts import ( - init_environment, - DISCORD_TOKEN, - DISCORD_DEBUG_TOKEN, DISCORD_CLUSTER_STAGING_ID, DISCORD_DEBUG_CLUSTER_STAGING_ID, - POSTGRES_USER, - POSTGRES_PASSWORD, + DISCORD_DEBUG_TOKEN, + DISCORD_TOKEN, + POSTGRES_DATABASE, POSTGRES_HOST, + POSTGRES_PASSWORD, POSTGRES_PORT, - POSTGRES_DATABASE, + POSTGRES_USER, + init_environment, ) -from cogs.modal_cog import ModalCog -from cogs.github_cog import GitHubCog -from cogs.leaderboard_cog import LeaderboardCog +from discord import app_commands +from discord.ext import commands from leaderboard_db import LeaderboardDB -from cogs.verify_run_cog import VerifyRunCog +from utils import setup_logging logger = setup_logging() diff --git a/src/discord-cluster-manager/cogs/github_cog.py b/src/discord-cluster-manager/cogs/github_cog.py index 93f62bd..4fc574e 100644 --- a/src/discord-cluster-manager/cogs/github_cog.py +++ b/src/discord-cluster-manager/cogs/github_cog.py @@ -1,15 +1,16 @@ +import asyncio +import os +import zipfile +from datetime import datetime, timedelta, timezone + import discord +import requests +from consts import GITHUB_REPO, GITHUB_TOKEN, GPUType from discord import app_commands from discord.ext import commands -from datetime import datetime, timezone, timedelta -import asyncio -import requests -import zipfile -import os from github import Github -from utils import setup_logging, get_github_branch_name -from consts import GPUType, GITHUB_TOKEN, GITHUB_REPO -from leaderboard_eval import py_eval, cu_eval +from leaderboard_eval import cu_eval, py_eval +from utils import get_github_branch_name, setup_logging logger = setup_logging() @@ -100,12 +101,13 @@ async def run_github( "Failed to trigger GitHub Action. Please check the configuration." ) + return thread + except Exception as e: logger.error(f"Error processing request: {str(e)}", exc_info=True) - await thread.send(f"Error processing request: {str(e)}") - - finally: - return thread + if thread: + await thread.send(f"Error processing request: {str(e)}") + raise async def trigger_github_action( self, @@ -175,7 +177,8 @@ async def check_workflow_status(self, run_id, thread): if elapsed_time > timeout: try: run.cancel() - # Wait briefly to ensure cancellation is processed and Verify the run was actually cancelled + # Wait briefly to ensure cancellation is processed + # And Verify the run was actually cancelled await asyncio.sleep(5) run = repo.get_workflow_run(run_id) if run.status != "completed": diff --git a/src/discord-cluster-manager/cogs/leaderboard_cog.py b/src/discord-cluster-manager/cogs/leaderboard_cog.py index c431470..2d31c82 100644 --- a/src/discord-cluster-manager/cogs/leaderboard_cog.py +++ b/src/discord-cluster-manager/cogs/leaderboard_cog.py @@ -1,14 +1,12 @@ -import discord -from discord import app_commands -from discord.ext import commands +import random from datetime import datetime -from typing import TYPE_CHECKING +import discord from consts import GitHubGPU, ModalGPU +from discord import app_commands +from discord.ext import commands from utils import extract_score, get_user_from_id -import random - class LeaderboardSubmitCog(app_commands.Group): def __init__( @@ -67,8 +65,6 @@ async def submit_modal( await interaction.response.send_message("❌ Required cogs not found!") return - modal_command = modal_cog.run_modal - # Compute eval or submission score, call runner here. score = random.random() @@ -83,7 +79,10 @@ async def submit_modal( }) await interaction.response.send_message( - f"Ran on Modal. Leaderboard '{leaderboard_name}'. Submission title: {script.filename}. Submission user: {interaction.user.id}. Runtime: {score} ms", + f"Ran on Modal. Leaderboard '{leaderboard_name}'.\n" + + f"Submission title: {script.filename}.\n" + + f"Submission user: {interaction.user.id}.\n" + + f"Runtime: {score} ms", ephemeral=True, ) except ValueError: @@ -270,7 +269,9 @@ async def leaderboard_create( }) await interaction.response.send_message( - f"Leaderboard '{leaderboard_name}'. Reference code: {reference_code}. Submission deadline: {date_value}", + f"Leaderboard '{leaderboard_name}' created.\n" + + f"Reference code: {reference_code}.\n" + + f"Submission deadline: {date_value}", ephemeral=True, ) except ValueError: diff --git a/src/discord-cluster-manager/cogs/misc_cog.py b/src/discord-cluster-manager/cogs/misc_cog.py index 4494a7d..86dd115 100644 --- a/src/discord-cluster-manager/cogs/misc_cog.py +++ b/src/discord-cluster-manager/cogs/misc_cog.py @@ -1,9 +1,9 @@ import discord +import psycopg2 +from consts import DATABASE_URL from discord import app_commands from discord.ext import commands -import psycopg2 from utils import setup_logging -from consts import DATABASE_URL logger = setup_logging() diff --git a/src/discord-cluster-manager/cogs/modal_cog.py b/src/discord-cluster-manager/cogs/modal_cog.py index 44af78e..0e46d90 100644 --- a/src/discord-cluster-manager/cogs/modal_cog.py +++ b/src/discord-cluster-manager/cogs/modal_cog.py @@ -1,8 +1,9 @@ +import time + import discord +import modal from discord import app_commands from discord.ext import commands -import modal -import time from utils import setup_logging logger = setup_logging() @@ -32,32 +33,33 @@ async def run_modal( gpu_type: app_commands.Choice[str], use_followup: bool = False ) -> discord.Thread: - if not script.filename.endswith(".py") and not script.filename.endswith(".cu"): - await interaction.response.send_message( - "Please provide a Python (.py) or CUDA (.cu) file" - ) - return None + thread = None + try: + if not script.filename.endswith(".py") and not script.filename.endswith(".cu"): + await interaction.response.send_message( + "Please provide a Python (.py) or CUDA (.cu) file" + ) + return None - thread = await self.bot.create_thread(interaction, gpu_type.name, "Modal Job") - queue_start_time = time.perf_counter() - message = f"Created thread {thread.mention} for your Modal job" + thread = await self.bot.create_thread(interaction, gpu_type.name, "Modal Job") + queue_start_time = time.perf_counter() + message = f"Created thread {thread.mention} for your Modal job" - if use_followup: - await interaction.followup.send(message) - else: - await interaction.response.send_message(message) + if use_followup: + await interaction.followup.send(message) + else: + await interaction.response.send_message(message) - await thread.send(f"**Processing `{script.filename}` with {gpu_type.name}...**") + await thread.send(f"**Processing `{script.filename}` with {gpu_type.name}...**") - try: script_content = (await script.read()).decode("utf-8") status_msg = await thread.send("**Running on Modal...**\n> ⏳ Waiting for available GPU...") result, execution_time_ms = await self.trigger_modal_run(script_content, script.filename) - + # Update status message to show completion await status_msg.edit(content="**Running on Modal...**\n> ✅ Job completed!") - + queue_end_time = time.perf_counter() queue_time_ms = (queue_end_time - queue_start_time) * 1000 @@ -67,14 +69,15 @@ async def run_modal( await thread.send(f"**Execution time:** {execution_time_ms:.3f} ms\n") await thread.send(f"**Modal execution result:**\n```\n{result}\n```") + return thread + except Exception as e: logger.error(f"Error processing request: {str(e)}", exc_info=True) - # Update status message to show error - await status_msg.edit(content="**Running on Modal...**\n> ❌ Job failed!") - await thread.send(f"**Error:** {str(e)}") - - finally: - return thread + if thread: + # Update status message to show error + await status_msg.edit(content="**Running on Modal...**\n> ❌ Job failed!") + await thread.send(f"**Error:** {str(e)}") + raise async def trigger_modal_run(self, script_content: str, filename: str) -> tuple[str, float]: logger.info("Attempting to trigger Modal run") @@ -82,7 +85,7 @@ async def trigger_modal_run(self, script_content: str, filename: str) -> tuple[s from modal_runner import modal_app try: - print(f"Running {filename} with Modal") + print(f"Running {filename} with Modal") with modal.enable_output(): with modal_app.run(): if filename.endswith(".py"): @@ -93,7 +96,7 @@ async def trigger_modal_run(self, script_content: str, filename: str) -> tuple[s result, execution_time_ms = run_cuda_script.remote(script_content) return result, execution_time_ms - + except Exception as e: logger.error(f"Error in trigger_modal_run: {str(e)}", exc_info=True) - return f"Error: {str(e)}", 0 \ No newline at end of file + return f"Error: {str(e)}", 0 diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py index 4dfaa44..d3f6eee 100644 --- a/src/discord-cluster-manager/cogs/verify_run_cog.py +++ b/src/discord-cluster-manager/cogs/verify_run_cog.py @@ -1,12 +1,13 @@ -import re import asyncio +import re +from unittest.mock import AsyncMock + import discord +from cogs.github_cog import GitHubCog +from cogs.modal_cog import ModalCog from discord import app_commands from discord.ext import commands -from unittest.mock import AsyncMock from utils import setup_logging -from cogs.modal_cog import ModalCog -from cogs.github_cog import GitHubCog logger = setup_logging() @@ -56,7 +57,7 @@ async def verify_github_run( all_patterns_found = all( any( - re.search(pattern, content, re.DOTALL) != None + re.search(pattern, content, re.DOTALL) is not None for content in message_contents ) for pattern in required_patterns @@ -69,7 +70,7 @@ async def verify_github_run( else: missing_patterns = [ pattern for pattern in required_patterns - if not any(re.search(pattern, content, re.DOTALL) + if not any(re.search(pattern, content, re.DOTALL) for content in message_contents) ] await interaction.followup.send( @@ -98,7 +99,7 @@ async def verify_modal_run( ] all_patterns_found = all( - any(re.search(pattern, content, re.DOTALL) != None + any(re.search(pattern, content, re.DOTALL) is not None for content in message_contents) for pattern in required_patterns ) @@ -141,7 +142,7 @@ async def verify_runs(self, interaction: discord.Interaction): self.verify_github_run(github_cog, nvidia, interaction), self.verify_github_run(github_cog, amd, interaction), self.verify_modal_run(modal_cog, interaction)) - + if all(results): await interaction.followup.send("✅ All runs completed successfully!") else: @@ -151,4 +152,4 @@ async def verify_runs(self, interaction: discord.Interaction): logger.error(f"Error starting verification runs: {e}", exc_info=True) await interaction.followup.send( f"❌ Problem performing verification runs: {str(e)}" - ) \ No newline at end of file + ) diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py index c8b1324..c1e16ba 100644 --- a/src/discord-cluster-manager/consts.py +++ b/src/discord-cluster-manager/consts.py @@ -1,7 +1,8 @@ -from dotenv import load_dotenv import os from enum import Enum +from dotenv import load_dotenv + def init_environment(): load_dotenv() diff --git a/src/discord-cluster-manager/leaderboard_db.py b/src/discord-cluster-manager/leaderboard_db.py index eadfe5b..2d03bb6 100644 --- a/src/discord-cluster-manager/leaderboard_db.py +++ b/src/discord-cluster-manager/leaderboard_db.py @@ -1,15 +1,16 @@ -import psycopg2 -from psycopg2 import Error from typing import Optional -from utils import LeaderboardItem, SubmissionItem + +import psycopg2 from consts import ( - POSTGRES_USER, - POSTGRES_PASSWORD, + DATABASE_URL, + POSTGRES_DATABASE, POSTGRES_HOST, + POSTGRES_PASSWORD, POSTGRES_PORT, - POSTGRES_DATABASE, - DATABASE_URL, + POSTGRES_USER, ) +from psycopg2 import Error +from utils import LeaderboardItem, SubmissionItem class LeaderboardDB: diff --git a/src/discord-cluster-manager/migrations/20241208_01_p3yuR-initial-leaderboard-schema.py b/src/discord-cluster-manager/migrations/20241208_01_p3yuR-initial-leaderboard-schema.py index 75a13c3..f6e6dd4 100644 --- a/src/discord-cluster-manager/migrations/20241208_01_p3yuR-initial-leaderboard-schema.py +++ b/src/discord-cluster-manager/migrations/20241208_01_p3yuR-initial-leaderboard-schema.py @@ -43,4 +43,4 @@ """), step("CREATE INDEX ON leaderboard.runinfo (submission_id)") -] \ No newline at end of file +] diff --git a/src/discord-cluster-manager/modal_runner.py b/src/discord-cluster-manager/modal_runner.py index 7ee362b..16680bb 100644 --- a/src/discord-cluster-manager/modal_runner.py +++ b/src/discord-cluster-manager/modal_runner.py @@ -1,6 +1,7 @@ -from modal import App, Image -from contextlib import contextmanager import signal +from contextlib import contextmanager + +from modal import App, Image # Create a stub for the Modal app # IMPORTANT: This has to stay in separate file or modal breaks @@ -31,7 +32,7 @@ def timeout_handler(signum, frame): @modal_app.function( - gpu="T4", + gpu="T4", image=Image.debian_slim(python_version="3.10").pip_install(["torch"]) ) def run_pytorch_script(script_content: str, timeout_seconds: int = 300) -> tuple[str, float]: @@ -48,8 +49,8 @@ def run_pytorch_script(script_content: str, timeout_seconds: int = 300) -> tuple NOTE: Modal execution time is not programmatically accessible, so we manually calculate it """ import sys - from io import StringIO import time + from io import StringIO # Capture stdout output = StringIO() @@ -59,7 +60,7 @@ def run_pytorch_script(script_content: str, timeout_seconds: int = 300) -> tuple with timeout(timeout_seconds): # Create a new dictionary for local variables to avoid polluting the global namespace local_vars = {} - + execution_start_time = time.perf_counter() # Execute the script in the isolated namespace @@ -98,11 +99,11 @@ def run_cuda_script(script_content: str, timeout_seconds: int = 600) -> tuple[st NOTE: Modal execution time is not programmatically accessible, so we manually calculate it """ - import sys - from io import StringIO - import subprocess import os + import subprocess + import sys import time + from io import StringIO # Capture stdout output = StringIO() diff --git a/src/discord-cluster-manager/utils.py b/src/discord-cluster-manager/utils.py index 04020f1..ebfbaa1 100644 --- a/src/discord-cluster-manager/utils.py +++ b/src/discord-cluster-manager/utils.py @@ -1,7 +1,7 @@ -import logging -import subprocess import datetime +import logging import re +import subprocess from typing import TypedDict