From ad334941f4f98842c7abc325342792ff9e3c5c85 Mon Sep 17 00:00:00 2001 From: John Strunk Date: Tue, 11 Jun 2024 19:12:17 +0000 Subject: [PATCH 1/2] Add script to generate status roll-ups Signed-off-by: John Strunk --- jiraissues.py | 32 ++++++++++- rollup.ipynb | 140 +++++++++++++++++++++++++++++++++++++++++++++++ rollup_status.py | 119 ++++++++++++++++++++++++++++++++++++++++ summarizer.py | 29 +++++++++- 4 files changed, 314 insertions(+), 6 deletions(-) create mode 100644 rollup.ipynb create mode 100755 rollup_status.py diff --git a/jiraissues.py b/jiraissues.py index a4328ff..0614629 100644 --- a/jiraissues.py +++ b/jiraissues.py @@ -126,6 +126,19 @@ def __init__(self, data: dict[str, Any]) -> None: def __str__(self) -> str: return f"{self.display_name} ({self.key})" + def __hash__(self) -> int: + return hash((self.key, self.name, self.display_name, self.timezone)) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, User): + return False + return ( + self.key == other.key + and self.name == other.name + and self.display_name == other.display_name + and self.timezone == other.timezone + ) + class Issue: # pylint: disable=too-many-instance-attributes """ @@ -152,7 +165,18 @@ def __init__(self, client: Jira, issue_key: str) -> None: CF_BLOCKED_REASON, CF_CONTRIBUTORS, "comment", + "assignee", ] + # Need to Handle 403 errors + # DEBUG:urllib3.connectionpool:https://server.com:443 "GET + # /rest/api/2/issue/XXXX-16688?fields=summary,...,comment HTTP/1.1" 403 + # None + # DEBUG:atlassian.rest_client:HTTP: GET + # rest/api/2/issue/XXXX-16688?fields=summary,...,comment -> 403 + # Forbidden + # DEBUG:atlassian.rest_client:HTTP: Response text -> + # {"errorMessages":["You do not have the permission to see the specified + # issue."],"errors":{}} data = check_response( with_retry(lambda: client.issue(issue_key, fields=",".join(fields))) ) @@ -185,14 +209,16 @@ def __init__(self, client: Jira, issue_key: str) -> None: self.blocked = str(blocked_dict.get("value", "False")).lower() in ["true"] self.blocked_reason = str(data["fields"].get(CF_BLOCKED_REASON) or "") self.contributors = { - User(user) for user in data["fields"].get(CF_CONTRIBUTORS, []) + User(user) for user in (data["fields"].get(CF_CONTRIBUTORS) or []) } + self.assignee = ( + User(data["fields"]["assignee"]) if data["fields"]["assignee"] else None + ) _logger.info("Retrieved issue: %s", self) def __str__(self) -> str: - updated = self.updated.strftime("%Y-%m-%d %H:%M:%S") return ( - f"{self.key} ({self.issue_type}) {updated} - " + f"{self.key} ({self.issue_type}) - " + f"{self.summary} ({self.status}/{self.resolution})" ) diff --git a/rollup.ipynb b/rollup.ipynb new file mode 100644 index 0000000..0522842 --- /dev/null +++ b/rollup.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create a JIRA client\n", + "from os import environ\n", + "from atlassian import Jira\n", + "\n", + "jira_api_token = environ.get(\"JIRA_TOKEN\", \"\")\n", + "jira_url = environ.get(\"JIRA_URL\", \"\")\n", + "client = Jira(url=jira_url, token=jira_api_token)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import our local modules\n", + "from jiraissues import Issue, issue_cache\n", + "from summarizer import summarize_issue, get_chat_model, rollup_contributors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "epic_to_summarize = \"OCTO-2\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the existing summaries from the Jira issues\n", + "child_inputs = []\n", + "epic = issue_cache.get_issue(client, epic_to_summarize)\n", + "for child in epic.children:\n", + " issue = issue_cache.get_issue(client, child.key)\n", + " text = f\"{issue}\\n\"\n", + " text += summarize_issue(issue, max_depth=1)\n", + " child_inputs.append({\"issue\": issue, \"summary\": text})\n", + "\n", + "# Sort the issues by key\n", + "child_inputs.sort(key=lambda x: x[\"issue\"].key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate the individual exec summaries\n", + "import textwrap\n", + "llm = get_chat_model(\"meta-llama/llama-3-70b-instruct\", max_new_tokens=2048)\n", + "for item in child_inputs:\n", + " data = f\"\"\"\\\n", + "{item[\"issue\"]}\n", + "{item[\"summary\"]}\n", + "Contributors: {', '.join(c.display_name for c in item[\"issue\"].contributors)}\"\"\"\n", + " prompt = f\"\"\"\\\n", + "Condense the following technical status update into a short, high-level summary for an engineering leader.\n", + "Focus on the high-level objective, keeping the technical detail to a minimum.\n", + "Where possible, avoid mentioning specific issue IDs.\n", + "\n", + "{data}\n", + "\n", + "Please provide your converted summary with no formatting or bullet points:\n", + "\"\"\"\n", + " summary = llm.invoke(prompt, stop=[\"<|endoftext|>\"])\n", + " item[\"exec_summary\"] = textwrap.fill(summary).strip()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for item in child_inputs:\n", + " issue = item[\"issue\"]\n", + " print(f\"**{issue.key} - {issue.summary}**\")\n", + " print(item[\"exec_summary\"])\n", + " contributors = sorted(rollup_contributors(item[\"issue\"]), key=lambda x: x.display_name.split()[-1])\n", + " if contributors:\n", + " print(f\"Contributors: {', '.join([c.display_name for c in contributors])}\")\n", + " print()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate the overall exec summary\n", + "prompt = f\"\"\"\\\n", + "Given the following high-level summaries of our group's work, please provide a short, one-paragraph summary of this initiative for a corporate leader:\n", + "\n", + "{\"\\n\".join([item[\"exec_summary\"] for item in child_inputs])}\n", + "\n", + "Please provide just the summary paragraph, with no header.\n", + "\"\"\"\n", + "paragraph = llm.invoke(prompt, stop=[\"<|endoftext|>\"])\n", + "print(paragraph.strip())\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/rollup_status.py b/rollup_status.py new file mode 100755 index 0000000..1d9859f --- /dev/null +++ b/rollup_status.py @@ -0,0 +1,119 @@ +#! /usr/bin/env python + +"""Roll-up the status of Jira issues into a single document""" + +import argparse +import logging +import os +import textwrap +from dataclasses import dataclass, field + +from atlassian import Jira # type: ignore + +from jiraissues import Issue, User, issue_cache +from summarizer import get_chat_model, rollup_contributors, summarize_issue + +LINK_BASE = "https://issues.redhat.com/browse/" + + +@dataclass +class IssueSummary: + """Summary of an issue""" + + issue: Issue + summary: str = "" + exec_summary: str = "" + contributors: set[User] = field(default_factory=set) + + +def main() -> None: # pylint: disable=too-many-locals + """Main function""" + # pylint: disable=duplicate-code + parser = argparse.ArgumentParser(description="Generate an issue summary roll-up") + parser.add_argument( + "--log-level", + default="WARNING", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set the logging level", + ) + parser.add_argument("jira_issue_key", type=str, help="JIRA issue key") + + args = parser.parse_args() + logging.basicConfig(level=getattr(logging, str(args.log_level).upper())) + issue_key: str = args.jira_issue_key + + client = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"]) + + # Get the existing summaries from the Jira issues + logging.info("Collecting issue summaries for children of %s", issue_key) + child_inputs: list[IssueSummary] = [] + epic = issue_cache.get_issue(client, issue_key) + for child in epic.children: + issue = issue_cache.get_issue(client, child.key) + text = f"{issue}\n" + text += summarize_issue(issue, max_depth=1) + child_inputs.append( + IssueSummary( + issue=issue, summary=text, contributors=rollup_contributors(issue) + ) + ) + + # Sort the issues by key + child_inputs.sort(key=lambda x: x.issue.key) + + # Generate the individual exec summaries + llm = get_chat_model("meta-llama/llama-3-70b-instruct", max_new_tokens=2048) + for item in child_inputs: + logging.info("Generating an executive summary for %s", item.issue.key) + data = f"""\ +{item.issue} +{item.summary} +Contributors: {', '.join(c.display_name for c in item.contributors)}""" + prompt = f"""\ +Condense the following technical status update into a short, high-level summary for an engineering leader. +Focus on the high-level objective, keeping the technical detail to a minimum. +Where possible, avoid mentioning specific issue IDs. + +{data} + +Please provide just the summary paragraph, with no header. +""" + summary = llm.invoke(prompt, stop=["<|endoftext|>"]).strip() + item.exec_summary = textwrap.fill(summary) + + # Generate the overall exec summary + logging.info("Generating the overall executive summary") + prompt = f"""\ +Given the following high-level summaries of our group's work, please provide a short, one-paragraph summary of this initiative for a corporate leader: + +{"\n".join([item.exec_summary for item in child_inputs])} + +Please provide just the summary paragraph, with no header. +""" + exec_paragraph = textwrap.fill(llm.invoke(prompt, stop=["<|endoftext|>"]).strip()) + + # Generate the overall status update + print(f"# Executive Summary for [{issue_key}]({LINK_BASE}{issue_key})") + print() + print(exec_paragraph) + print() + print("## Individual issue status") + print() + for item in child_inputs: + issue = item.issue + print(f"### [{issue.key}]({LINK_BASE}{issue.key}) - {issue.summary}") + print() + print(item.exec_summary) + print() + contributors = sorted( + item.contributors, key=lambda x: x.display_name.split()[-1] + ) + if contributors: + print( + f"**Contributors:** {', '.join([c.display_name for c in contributors])}" + ) + print() + + +if __name__ == "__main__": + main() diff --git a/summarizer.py b/summarizer.py index adb561a..9a9ba80 100644 --- a/summarizer.py +++ b/summarizer.py @@ -22,6 +22,7 @@ from jiraissues import ( Issue, RelatedIssue, + User, check_response, descendants, get_self, @@ -202,7 +203,7 @@ def summarize_issue( # pylint: disable=too-many-arguments,too-many-branches,too ) _logger.debug("Prompt:\n%s", llm_prompt) - chat = _chat_model() + chat = get_chat_model() summary = chat.invoke(llm_prompt, stop=["<|endoftext|>"]).strip() folded_summary = textwrap.fill(summary, width=_WRAP_COLUMN) if send_updates and is_ok_to_post_summary(issue): @@ -313,7 +314,7 @@ def _genai_client() -> Client: return client -def _chat_model(model_name: str = _MODEL_ID) -> LLM: +def get_chat_model(model_name: str = _MODEL_ID, max_new_tokens=4000) -> LLM: """ Return a chat model to use for summarization. @@ -329,7 +330,7 @@ def _chat_model(model_name: str = _MODEL_ID) -> LLM: client=client, parameters=TextGenerationParameters( decoding_method=DecodingMethod.SAMPLE, - max_new_tokens=4000, + max_new_tokens=max_new_tokens, min_new_tokens=10, temperature=0.5, top_k=50, @@ -496,3 +497,25 @@ def add_summary_label_to_descendants(client: Jira, issue_key: str) -> None: for key in desc: issue = issue_cache.get_issue(client, key) add_summary_label(issue) + + +def rollup_contributors(issue: Issue, include_assignee=True) -> set[User]: + """ + Roll up the set of contributors from the issue and its children. + + Parameters: + - issue: The issue to roll up the contributors from + - include_assignee: Include the issue assignee in the set of + contributors + + Returns: + The set of contributors + """ + contributors = set() + for child in issue.children: + child_issue = issue_cache.get_issue(issue.client, child.key) + contributors.update(rollup_contributors(child_issue)) + contributors.update(issue.contributors) + if include_assignee and issue.assignee is not None: + contributors.add(issue.assignee) + return contributors From 9823eee47e73158a65b84827b91e43ee591b97f6 Mon Sep 17 00:00:00 2001 From: John Strunk Date: Tue, 11 Jun 2024 19:34:17 +0000 Subject: [PATCH 2/2] Directly specify python version for pipx commands Signed-off-by: John Strunk --- .github/workflows/workflow.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/workflow.yaml b/.github/workflows/workflow.yaml index 22b8f7b..539b821 100644 --- a/.github/workflows/workflow.yaml +++ b/.github/workflows/workflow.yaml @@ -44,10 +44,10 @@ jobs: pre-commit| - name: Run pre-commit checks - run: pipx run pre-commit run -a + run: pipx run --python ${{ steps.setup-py.outputs.python-version}} pre-commit run -a - name: Run pre-commit gc - run: pipx run pre-commit gc + run: pipx run --python ${{ steps.setup-py.outputs.python-version}} pre-commit gc tests: name: "Tests" @@ -67,7 +67,7 @@ jobs: python-version: "3.12" - name: Install pipenv - run: pipx install pipenv + run: pipx install --python ${{ steps.setup-py.outputs.python-version}} pipenv - name: Install dependencies run: pipenv install --dev