Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

QoL changes: Add GPU types view, remove dtype and shapes info. No DB or functionality. #52

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 134 additions & 56 deletions src/discord-cluster-manager/cogs/leaderboard_cog.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import discord
from discord import ui, SelectOption, Interaction
from discord import app_commands
from discord.ext import commands
from discord.app_commands.errors import CommandInvokeError
from datetime import datetime

from typing import TYPE_CHECKING
Expand All @@ -23,8 +25,6 @@ def __init__(
@app_commands.describe(
leaderboard_name="Name of the competition / kernel to optimize",
script="The Python / CUDA script file to run",
dtype="dtype (e.g. FP32, BF16, FP4) that the input and output expects.",
shape="Data input shape as a tuple",
)
# TODO: Modularize this so all the write functionality is in here. Haven't figured
# a good way to do this yet.
Expand All @@ -33,8 +33,6 @@ async def submit(
interaction: discord.Interaction,
leaderboard_name: str,
script: discord.Attachment,
dtype: app_commands.Choice[str] = None,
shape: app_commands.Choice[str] = None,
):
pass

Expand All @@ -53,8 +51,6 @@ async def submit_modal(
leaderboard_name: str,
script: discord.Attachment,
gpu_type: app_commands.Choice[str],
dtype: app_commands.Choice[str] = "fp32",
shape: app_commands.Choice[str] = None,
):
try:
# Read the template file
Expand Down Expand Up @@ -110,8 +106,6 @@ async def submit_github(
leaderboard_name: str,
script: discord.Attachment,
gpu_type: app_commands.Choice[str],
dtype: app_commands.Choice[str] = "fp32",
shape: app_commands.Choice[str] = None,
):
# Read the template file
submission_content = await script.read()
Expand Down Expand Up @@ -148,7 +142,6 @@ async def submit_github(
return

github_command = github_cog.run_github
print(github_command)
try:
github_thread = await github_command.callback(
github_cog,
Expand Down Expand Up @@ -180,7 +173,11 @@ async def submit_github(
"submission_score": score,
})

user_id = interaction.user.global_name if interaction.user.nick is None else interaction.user.nick
user_id = (
interaction.user.global_name
if interaction.user.nick is None
else interaction.user.nick
)
await interaction.followup.send(
"Successfully ran on GitHub runners!\n"
+ f"Leaderboard '{leaderboard_name}'.\n"
Expand All @@ -195,10 +192,35 @@ async def submit_github(
)


class GPUSelectionView(ui.View):
def __init__(self, available_gpus: list[str]):
super().__init__()

# Add the Select Menu with the list of GPU options
select = ui.Select(
placeholder="Select GPUs for this leaderboard...",
options=[SelectOption(label=gpu, value=gpu) for gpu in available_gpus],
min_values=1, # Minimum number of selections
max_values=len(available_gpus), # Maximum number of selections
)
select.callback = self.select_callback
self.add_item(select)

async def select_callback(self, interaction: Interaction):
# Retrieve the selected options
select = interaction.data["values"]
self.selected_gpus = select
await interaction.response.send_message(
f"Selected GPUs: {', '.join(self.selected_gpus)}",
ephemeral=True,
)
self.stop()


class LeaderboardCog(commands.Cog):
def __init__(self, bot):
self.bot: commands.Bot = bot
self.get_leaderboards = bot.leaderboard_group.command(name="get")(
self.get_leaderboards = bot.leaderboard_group.command(name="list")(
self.get_leaderboards
)
self.leaderboard_create = bot.leaderboard_group.command(
Expand Down Expand Up @@ -251,76 +273,132 @@ async def leaderboard_create(
try:
date_value = datetime.strptime(deadline, "%Y-%m-%d %H:%M")
except ValueError:
# If that fails, try parsing just the date (will set time to 00:00)
date_value = datetime.strptime(deadline, "%Y-%m-%d")
try:
date_value = datetime.strptime(deadline, "%Y-%m-%d")
except ValueError:
raise ValueError(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should do interaction.response.send_message("Invalid date format. Please use...") and then return here. It's a simpler control flow.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated this, there was also a mistake in here regarding bad implementations I forgot to delete. It's been simplified.

"Invalid date format. Please use YYYY-MM-DD or YYYY-MM-DD HH:MM"
)

# Ask the user to select GPUs
view = GPUSelectionView([gpu.name for gpu in GitHubGPU])

await interaction.response.send_message(
Copy link
Collaborator

@b9r5 b9r5 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be interaction.followup.send(...)? If there are going to be more messages in this interaction, then interaction.response.send_message(...) will cause problems.

I think the easiest thing to do in this function is to try to only send one response, and use interrupt.response.send_message(...). And return immediately after the message is sent. Using followup can definitely be confusing.

Copy link
Collaborator Author

@alexzhang13 alexzhang13 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've wrapped this around our send_discord_message util to avoid this issue.

"Please select GPUs for this leaderboard:",
view=view,
ephemeral=True,
)

# Wait until the user makes a selection
await view.wait()

except ValueError as ve:
# Handle invalid date errors
await interaction.response.send_message(
str(ve),
ephemeral=True,
)

# Kind of messy, but separate date try/catch
try:
# Read the template file
template_content = await reference_code.read()

with self.bot.leaderboard_db as db:
print(
leaderboard_name,
type(date_value),
type(template_content.decode("utf-8")),
)
db.create_leaderboard({
err = db.create_leaderboard({
"name": leaderboard_name,
"deadline": date_value,
"reference_code": template_content.decode("utf-8"),
"gpu_types": view.selected_gpus,
})

await interaction.response.send_message(
if err:
print(err)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
print(err)

...or log the error.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed these, this should not have been in the commit. I was debugging at the time, thanks for catching!

if "duplicate key" in err:
await interaction.followup.send(
f'Error: Tried to create a leaderboard "{leaderboard_name}" that already exists.',
ephemeral=True,
)
Comment on lines +313 to +317
Copy link
Collaborator

@b9r5 b9r5 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like in the within the create_leaderboard implementation in leaderboard_db.py, we should examine the message in e and sanitize it. We could return a message containing "duplicate key" if we found that string, and we could check for other strings that we've found to occur. I just want to be careful not to leak sensitive information by returning the full string of e.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sure what you mean by this, do you mean we sanitize the "err" string even if we never print it out?

else:
# Handle any other errors
await interaction.followup.send(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a good idea to log the error in this case, so that we can look at the logs and then add more informative error messages.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is fair, I've added logging information for the specific error here, but the Discord bot produces a fixed string.

"Error in leaderboard creation.",
ephemeral=True,
)
return

await interaction.followup.send(
f"Leaderboard '{leaderboard_name}'. Reference code: {reference_code}. Submission deadline: {date_value}",
ephemeral=True,
)
except ValueError:
await interaction.response.send_message(
"Invalid date format. Please use YYYY-MM-DD or YYYY-MM-DD HH:MM",
ephemeral=True,
)

except Exception as e:
print(str(e))
# Handle specific leaderboard name conflict
if "already exists" in str(e):
await interaction.followup.send(
str(e),
ephemeral=True,
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How could we get here? Shouldn't this be handled in or near the `"duplicate key" area above?

(I know I'm sounding like a broken record but) we shouldn't send str(e) to the Discord user. We should log a message containing e and send a generic message to the user like "Leaderboard already exists".

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this has to do with why I ended up re-factoring create_leaderboard to return an error string, because it wasn't being captured in the try except. I've removed this, but also made sure to remove the print errors on Discord and add them to a logger instead.

else:
# Handle any other errors
await interaction.followup.send(
"Error in leaderboard creation.",
ephemeral=True,
)

@discord.app_commands.describe(leaderboard_name="Name of the leaderboard")
async def get_leaderboard_submissions(
self,
interaction: discord.Interaction,
leaderboard_name: str,
dtype: app_commands.Choice[str] = "fp32",
):
with self.bot.leaderboard_db as db:
# TODO: query that gets leaderboard id given leaderboard name
leaderboard_id = db.get_leaderboard(leaderboard_name)["id"]
if not leaderboard_id:
try:
with self.bot.leaderboard_db as db:
# TODO: query that gets leaderboard id given leaderboard name
leaderboard_id = db.get_leaderboard(leaderboard_name)["id"]
if not leaderboard_id:
await interaction.response.send_message(
"Leaderboard not found.", ephemeral=True
)
return

submissions = db.get_leaderboard_submissions(leaderboard_name)

if not submissions:
await interaction.response.send_message(
"Leaderboard not found.", ephemeral=True
"No submissions found.", ephemeral=True
)
return

# submissions = db.get_leaderboard_submissions(leaderboard_id) # Add dtype
submissions = db.get_leaderboard_submissions(leaderboard_name) # Add dtype

if not submissions:
await interaction.response.send_message(
"No submissions found.", ephemeral=True
# Create embed
embed = discord.Embed(
title=f'Leaderboard Submissions for "{leaderboard_name}"',
color=discord.Color.blue(),
)
return

# Create embed
embed = discord.Embed(
title=f'Leaderboard Submissions for "{leaderboard_name}"',
color=discord.Color.blue(),
)

for submission in submissions:
user_id = await get_user_from_id(
submission["user_id"], interaction, self.bot
)
print("members", interaction.guild.members)
print(user_id)
for submission in submissions:
user_id = await get_user_from_id(
submission["user_id"], interaction, self.bot
)
print("members", interaction.guild.members)
print(user_id)

embed.add_field(
name=f"{user_id}: {submission['submission_name']}",
value=f"Submission speed: {submission['submission_score']}",
inline=False,
)
embed.add_field(
name=f"{user_id}: {submission['submission_name']}",
value=f"Submission speed: {submission['submission_score']}",
inline=False,
)

await interaction.response.send_message(embed=embed)
await interaction.response.send_message(embed=embed)
except Exception as e:
print(str(e))
if "'NoneType' object is not subscriptable" in str(e):
await interaction.response.send_message(
f"The leaderboard '{leaderboard_name}' doesn't exist.",
ephemeral=True,
)
else:
await interaction.response.send_message(
"An unknown error occurred.", ephemeral=True
)
12 changes: 8 additions & 4 deletions src/discord-cluster-manager/leaderboard_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit"""
self.disconnect()

def create_leaderboard(self, leaderboard: LeaderboardItem):
def create_leaderboard(self, leaderboard: LeaderboardItem) -> Optional[str]:
try:
self.cursor.execute(
"""
Expand All @@ -114,8 +114,10 @@ def create_leaderboard(self, leaderboard: LeaderboardItem):
)
self.connection.commit()
except psycopg2.Error as e:
print(f"Error during leaderboard creation: {e}")
self.connection.rollback() # Ensure rollback if error occurs
return f"Error during leaderboard creation: {e}"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm nervous about returning the value with the stringified exception in it. The reason is that I'm not sure what's going to be in e. I wouldn't expect it to include sensitive information (like the database URL), but it could. What I would suggest is to log the full string (f"Error during leaderboard creation: {e}") but only return "Error during leaderboard creation".

Copy link
Collaborator Author

@alexzhang13 alexzhang13 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, for now I won't log it (we can change this later) but will remove error message inside.


return None

def create_submission(self, submission: SubmissionItem):
try:
Expand All @@ -139,7 +141,9 @@ def create_submission(self, submission: SubmissionItem):
self.connection.rollback() # Ensure rollback if error occurs

def get_leaderboards(self) -> list[LeaderboardItem]:
self.cursor.execute("SELECT id, name, deadline, reference_code FROM leaderboard.problem")
self.cursor.execute(
"SELECT id, name, deadline, reference_code FROM leaderboard.problem"
)

return [
LeaderboardItem(id=lb[0], name=lb[1], deadline=lb[2], reference_code=lb[3])
Expand All @@ -149,7 +153,7 @@ def get_leaderboards(self) -> list[LeaderboardItem]:
def get_leaderboard(self, leaderboard_name: str) -> int | None:
self.cursor.execute(
"SELECT id, name, deadline, reference_code FROM leaderboard.problem WHERE name = %s",
(leaderboard_name,)
(leaderboard_name,),
)

res = self.cursor.fetchone()
Expand Down
16 changes: 15 additions & 1 deletion src/discord-cluster-manager/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
import subprocess
import datetime
import re
from typing import TypedDict
from typing import TypedDict, List
import discord


def setup_logging():
Expand Down Expand Up @@ -58,6 +59,18 @@ async def get_user_from_id(id, interaction, bot):
return id


def discord_followup_wrapper(interaction: discord.Interaction, msg: str) -> None:
"""
To get around response messages in slash commands that are
called externally, send a message using the followup.
"""
try:
await interaction.response.send_message(msg)

except Exception:
await interaction.followup.send(msg)
Copy link
Collaborator

@b9r5 b9r5 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this would be better written as the following?

Suggested change
try:
await interaction.response.send_message(msg)
except Exception:
await interaction.followup.send(msg)
if interaction.response.is_done():
await interaction.followup.send(msg)
else:
await interaction.response.send_message(msg)

We do call interaction.response.defer() when we intend to use followup. And that call to defer() ends up making is_done() return True.

What makes me uncomfortable about except Exception is that we don't really know what's gone wrong.

Honestly, I think it would work either way, but I think I prefer my suggestion if it works.

Copy link
Collaborator Author

@alexzhang13 alexzhang13 Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed this, I think this makes sense. Will verify that this works locally before I push though. Also renamed more generally to send_discord_message

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also changed the logic for use_followup=True in the followup commit.



def extract_score(score_str: str) -> float:
"""
Extract score from output logs and push to DB (kind of hacky).
Expand All @@ -73,6 +86,7 @@ class LeaderboardItem(TypedDict):
name: str
deadline: datetime.datetime
reference_code: str
gpu_types: List[str]


class SubmissionItem(TypedDict):
Expand Down
Loading