Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QoL changes: Add GPU types view, remove dtype and shapes info. No DB or functionality. #52

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions .github/workflows/amd_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,6 @@ jobs:
cat training.log # Debug: show output
fi

- name: Extract score
shell: bash
run: |
# Extract only the last occurrence of "score:"
score=$(grep -oP 'score:\s*\d+(\.\d+)?' training.log | tail -n 1 | awk '{print $2}')

echo "Score extracted: $score"
echo "::set-output name=score::$score"

- name: Upload training artifacts
uses: actions/upload-artifact@v4
if: always()
Expand Down
9 changes: 0 additions & 9 deletions .github/workflows/nvidia_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,6 @@ jobs:
cat training.log # Debug: show output
fi

- name: Extract score
shell: bash
run: |
# Extract only the last occurrence of "score:"
score=$(grep -oP 'score:\s*\d+(\.\d+)?' training.log | tail -n 1 | awk '{print $2}')

echo "Score extracted: $score"
echo "::set-output name=score::$score"

- name: Upload training artifacts
uses: actions/upload-artifact@v4
if: always()
Expand Down
9 changes: 3 additions & 6 deletions src/discord-cluster-manager/cogs/github_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import zipfile
import os
from github import Github
from utils import setup_logging, get_github_branch_name
from utils import setup_logging, get_github_branch_name, send_discord_message
from consts import GPUType, GITHUB_TOKEN, GITHUB_REPO
from leaderboard_eval import py_eval, cu_eval

Expand Down Expand Up @@ -36,7 +36,6 @@ async def run_github(
interaction: discord.Interaction,
script: discord.Attachment,
gpu_type: app_commands.Choice[str],
use_followup: bool = False,
reference_script: discord.Attachment = None,
reference_code: str = None,
) -> discord.Thread:
Expand All @@ -49,10 +48,8 @@ async def run_github(
thread = await self.bot.create_thread(interaction, gpu_type.name, "GitHub Job")
message = f"Created thread {thread.mention} for your GitHub job"

if use_followup:
await interaction.followup.send(message)
else:
await interaction.response.send_message(message)
# Send message or append message
await send_discord_message(interaction, message)

await thread.send(f"Processing `{script.filename}` with {gpu_type.name}...")

Expand Down
182 changes: 124 additions & 58 deletions src/discord-cluster-manager/cogs/leaderboard_cog.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import discord
from discord import ui, SelectOption, Interaction
from discord import app_commands
from discord.ext import commands
from discord.app_commands.errors import CommandInvokeError
from datetime import datetime

from typing import TYPE_CHECKING
from consts import GitHubGPU, ModalGPU
from utils import extract_score, get_user_from_id
from utils import extract_score, get_user_from_id, setup_logging, send_discord_message

import random

logger = setup_logging()


class LeaderboardSubmitCog(app_commands.Group):
def __init__(
Expand All @@ -23,8 +27,6 @@ def __init__(
@app_commands.describe(
leaderboard_name="Name of the competition / kernel to optimize",
script="The Python / CUDA script file to run",
dtype="dtype (e.g. FP32, BF16, FP4) that the input and output expects.",
shape="Data input shape as a tuple",
)
# TODO: Modularize this so all the write functionality is in here. Haven't figured
# a good way to do this yet.
Expand All @@ -33,8 +35,6 @@ async def submit(
interaction: discord.Interaction,
leaderboard_name: str,
script: discord.Attachment,
dtype: app_commands.Choice[str] = None,
shape: app_commands.Choice[str] = None,
):
pass

Expand All @@ -53,8 +53,6 @@ async def submit_modal(
leaderboard_name: str,
script: discord.Attachment,
gpu_type: app_commands.Choice[str],
dtype: app_commands.Choice[str] = "fp32",
shape: app_commands.Choice[str] = None,
):
try:
# Read the template file
Expand Down Expand Up @@ -110,8 +108,6 @@ async def submit_github(
leaderboard_name: str,
script: discord.Attachment,
gpu_type: app_commands.Choice[str],
dtype: app_commands.Choice[str] = "fp32",
shape: app_commands.Choice[str] = None,
):
# Read the template file
submission_content = await script.read()
Expand Down Expand Up @@ -148,15 +144,13 @@ async def submit_github(
return

github_command = github_cog.run_github
print(github_command)
try:
github_thread = await github_command.callback(
github_cog,
interaction,
script,
gpu_type,
reference_code=reference_code,
use_followup=True,
)
except discord.errors.NotFound as e:
print(f"Webhook not found: {e}")
Expand All @@ -180,7 +174,11 @@ async def submit_github(
"submission_score": score,
})

user_id = interaction.user.global_name if interaction.user.nick is None else interaction.user.nick
user_id = (
interaction.user.global_name
if interaction.user.nick is None
else interaction.user.nick
)
await interaction.followup.send(
"Successfully ran on GitHub runners!\n"
+ f"Leaderboard '{leaderboard_name}'.\n"
Expand All @@ -195,10 +193,35 @@ async def submit_github(
)


class GPUSelectionView(ui.View):
def __init__(self, available_gpus: list[str]):
super().__init__()

# Add the Select Menu with the list of GPU options
select = ui.Select(
placeholder="Select GPUs for this leaderboard...",
options=[SelectOption(label=gpu, value=gpu) for gpu in available_gpus],
min_values=1, # Minimum number of selections
max_values=len(available_gpus), # Maximum number of selections
)
select.callback = self.select_callback
self.add_item(select)

async def select_callback(self, interaction: Interaction):
# Retrieve the selected options
select = interaction.data["values"]
self.selected_gpus = select
await interaction.response.send_message(
f"Selected GPUs: {', '.join(self.selected_gpus)}",
ephemeral=True,
)
self.stop()


class LeaderboardCog(commands.Cog):
def __init__(self, bot):
self.bot: commands.Bot = bot
self.get_leaderboards = bot.leaderboard_group.command(name="get")(
self.get_leaderboards = bot.leaderboard_group.command(name="list")(
self.get_leaderboards
)
self.leaderboard_create = bot.leaderboard_group.command(
Expand Down Expand Up @@ -246,36 +269,71 @@ async def leaderboard_create(
deadline: str,
reference_code: discord.Attachment,
):
# Try parsing with time first
try:
# Try parsing with time first
date_value = datetime.strptime(deadline, "%Y-%m-%d %H:%M")
except ValueError:
try:
date_value = datetime.strptime(deadline, "%Y-%m-%d %H:%M")
except ValueError:
# If that fails, try parsing just the date (will set time to 00:00)
date_value = datetime.strptime(deadline, "%Y-%m-%d")
except ValueError as ve:
logger.error(f"Value Error: {str(ve)}", exc_info=True)
await interaction.response.send_message(
"Invalid date format. Please use YYYY-MM-DD or YYYY-MM-DD HH:MM",
ephemeral=True,
)
return

# Ask the user to select GPUs
view = GPUSelectionView([gpu.name for gpu in GitHubGPU])

await send_discord_message(
interaction,
"Please select GPUs for this leaderboard:",
view=view,
ephemeral=True,
)

# Wait until the user makes a selection
await view.wait()

# Kind of messy, but separate date try/catch
try:
# Read the template file
template_content = await reference_code.read()

with self.bot.leaderboard_db as db:
print(
leaderboard_name,
type(date_value),
type(template_content.decode("utf-8")),
)
db.create_leaderboard({
err = db.create_leaderboard({
"name": leaderboard_name,
"deadline": date_value,
"reference_code": template_content.decode("utf-8"),
"gpu_types": view.selected_gpus,
})

await interaction.response.send_message(
if err:
if "duplicate key" in err:
await interaction.followup.send(
f'Error: Tried to create a leaderboard "{leaderboard_name}" that already exists.',
ephemeral=True,
)
Comment on lines +313 to +317
Copy link
Collaborator

@b9r5 b9r5 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like in the within the create_leaderboard implementation in leaderboard_db.py, we should examine the message in e and sanitize it. We could return a message containing "duplicate key" if we found that string, and we could check for other strings that we've found to occur. I just want to be careful not to leak sensitive information by returning the full string of e.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure what you mean by this, do you mean we sanitize the "err" string even if we never print it out?

else:
# Handle any other errors
logger.error(f"Error in leaderboard creation: {err}")
await interaction.followup.send(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a good idea to log the error in this case, so that we can look at the logs and then add more informative error messages.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is fair, I've added logging information for the specific error here, but the Discord bot produces a fixed string.

"Error in leaderboard creation.",
ephemeral=True,
)
return

await interaction.followup.send(
f"Leaderboard '{leaderboard_name}'. Reference code: {reference_code}. Submission deadline: {date_value}",
ephemeral=True,
)
except ValueError:
await interaction.response.send_message(
"Invalid date format. Please use YYYY-MM-DD or YYYY-MM-DD HH:MM",

except Exception as e:
logger.error(f"Error in leaderboard creation: {e}")
# Handle any other errors
await interaction.followup.send(
"Error in leaderboard creation.",
ephemeral=True,
)

Expand All @@ -284,43 +342,51 @@ async def get_leaderboard_submissions(
self,
interaction: discord.Interaction,
leaderboard_name: str,
dtype: app_commands.Choice[str] = "fp32",
):
with self.bot.leaderboard_db as db:
# TODO: query that gets leaderboard id given leaderboard name
leaderboard_id = db.get_leaderboard(leaderboard_name)["id"]
if not leaderboard_id:
try:
with self.bot.leaderboard_db as db:
# TODO: query that gets leaderboard id given leaderboard name
leaderboard_id = db.get_leaderboard(leaderboard_name)["id"]
if not leaderboard_id:
await interaction.response.send_message(
f'Leaderboard "{leaderboard_name}" not found.', ephemeral=True
)
return

submissions = db.get_leaderboard_submissions(leaderboard_name)

if not submissions:
await interaction.response.send_message(
"Leaderboard not found.", ephemeral=True
f'No submissions found for "{leaderboard_name}".', ephemeral=True
)
return

# submissions = db.get_leaderboard_submissions(leaderboard_id) # Add dtype
submissions = db.get_leaderboard_submissions(leaderboard_name) # Add dtype

if not submissions:
await interaction.response.send_message(
"No submissions found.", ephemeral=True
# Create embed
embed = discord.Embed(
title=f'Leaderboard Submissions for "{leaderboard_name}"',
color=discord.Color.blue(),
)
return

# Create embed
embed = discord.Embed(
title=f'Leaderboard Submissions for "{leaderboard_name}"',
color=discord.Color.blue(),
)

for submission in submissions:
user_id = await get_user_from_id(
submission["user_id"], interaction, self.bot
)
print("members", interaction.guild.members)
print(user_id)
for submission in submissions:
user_id = await get_user_from_id(
submission["user_id"], interaction, self.bot
)

embed.add_field(
name=f"{user_id}: {submission['submission_name']}",
value=f"Submission speed: {submission['submission_score']}",
inline=False,
)
embed.add_field(
name=f"{user_id}: {submission['submission_name']}",
value=f"Submission speed: {submission['submission_score']}",
inline=False,
)

await interaction.response.send_message(embed=embed)
await interaction.response.send_message(embed=embed)
except Exception as e:
logger.error(str(e))
if "'NoneType' object is not subscriptable" in str(e):
await interaction.response.send_message(
f"The leaderboard '{leaderboard_name}' doesn't exist.",
ephemeral=True,
)
else:
await interaction.response.send_message(
"An unknown error occurred.", ephemeral=True
)
Loading
Loading