diff --git a/jiraissues.py b/jiraissues.py index 44ebd86..d3ba05f 100644 --- a/jiraissues.py +++ b/jiraissues.py @@ -1,5 +1,6 @@ """Helper functions for working with Jira issues.""" +import dis import logging from dataclasses import dataclass, field from datetime import datetime @@ -11,10 +12,10 @@ _logger = logging.getLogger(__name__) # Custom field IDs -_CF_EPIC_LINK = "customfield_12311140" # any -_CF_FEATURE_LINK = "customfield_12318341" # issuelinks -_CF_PARENT_LINK = "customfield_12313140" # any -_CF_STATUS_SUMMARY = "customfield_12320841" # string +CF_EPIC_LINK = "customfield_12311140" # any +CF_FEATURE_LINK = "customfield_12318341" # issuelinks +CF_PARENT_LINK = "customfield_12313140" # any +CF_STATUS_SUMMARY = "customfield_12320841" # string @dataclass @@ -100,7 +101,7 @@ def __init__(self, client: Jira, issue_key: str) -> None: "labels", "resolution", "updated", - _CF_STATUS_SUMMARY, + CF_STATUS_SUMMARY, ] data = _check(client.issue(issue_key, fields=",".join(fields))) @@ -120,7 +121,7 @@ def __init__(self, client: Jira, issue_key: str) -> None: else "Unresolved" ) self.updated: datetime = datetime.fromisoformat(data["fields"]["updated"]) - self.status_summary: str = data["fields"].get(_CF_STATUS_SUMMARY) or "" + self.status_summary: str = data["fields"].get(CF_STATUS_SUMMARY) or "" self._changelog: Optional[List[ChangelogEntry]] = None self._comments: Optional[List[Comment]] = None self._related: Optional[List[RelatedIssue]] = None @@ -186,57 +187,69 @@ def comments(self) -> List[Comment]: self._comments = self._fetch_comments() return self._comments - def _fetch_related(self) -> List[RelatedIssue]: + def _fetch_related(self) -> List[RelatedIssue]: # pylint: disable=too-many-branches """Fetch the related issues from the API.""" fields = [ "issuelinks", "subtasks", - _CF_EPIC_LINK, - _CF_PARENT_LINK, - _CF_FEATURE_LINK, + CF_EPIC_LINK, + CF_PARENT_LINK, + CF_FEATURE_LINK, ] + found_issues: set[str] = set() _logger.debug("Retrieving related links for %s", self.key) data = _check(self.client.issue(self.key, fields=",".join(fields))) # Get the related issues related: List[RelatedIssue] = [] for link in data["fields"]["issuelinks"]: - if "inwardIssue" in link: + if "inwardIssue" in link and link["inwardIssue"]["key"] not in found_issues: related.append( RelatedIssue( key=link["inwardIssue"]["key"], how=link["type"]["inward"] ) ) - elif "outwardIssue" in link: + found_issues.add(link["inwardIssue"]["key"]) + elif ( + "outwardIssue" in link + and link["outwardIssue"]["key"] not in found_issues + ): related.append( RelatedIssue( key=link["outwardIssue"]["key"], how=link["type"]["outward"] ) ) + found_issues.add(link["outwardIssue"]["key"]) # Get the sub-tasks for subtask in data["fields"]["subtasks"]: - related.append(RelatedIssue(key=subtask["key"], how=_HOW_SUBTASK)) + if subtask["key"] not in found_issues: + related.append(RelatedIssue(key=subtask["key"], how=_HOW_SUBTASK)) + found_issues.add(subtask["key"]) # Get the parent task(s) and epic links from the custom fields custom_fields = [ - (_CF_EPIC_LINK, "Epic Link"), # Upward link to epic - (_CF_PARENT_LINK, "Parent Link"), + (CF_EPIC_LINK, "Epic Link"), # Upward link to epic + (CF_PARENT_LINK, "Parent Link"), ] for cfield, how in custom_fields: if cfield in data["fields"].keys() and data["fields"][cfield] is not None: - related.append(RelatedIssue(key=data["fields"][cfield], how=how)) + if data["fields"][cfield] not in found_issues: + related.append(RelatedIssue(key=data["fields"][cfield], how=how)) + found_issues.add(data["fields"][cfield]) # The Feature Link has to be handled separately if ( - _CF_FEATURE_LINK in data["fields"].keys() - and data["fields"][_CF_FEATURE_LINK] is not None + CF_FEATURE_LINK in data["fields"].keys() + and data["fields"][CF_FEATURE_LINK] is not None ): - related.append( - RelatedIssue( - key=data["fields"][_CF_FEATURE_LINK]["key"], - how="Feature Link", + if data["fields"][CF_FEATURE_LINK]["key"] not in found_issues: + related.append( + RelatedIssue( + key=data["fields"][CF_FEATURE_LINK]["key"], + how="Feature Link", + ) ) - ) + found_issues.add(data["fields"][CF_FEATURE_LINK]["key"]) # Issues in the epic requires a query since there's no pointer from the epic # issue to it's children. epic_issues returns an error if the issue is not @@ -244,14 +257,18 @@ def _fetch_related(self) -> List[RelatedIssue]: if self.issue_type == "Epic": issues_in_epic = _check(self.client.epic_issues(self.key, fields="key")) for i in issues_in_epic["issues"]: - related.append(RelatedIssue(key=i["key"], how=_HOW_INEPIC)) + if i["key"] not in found_issues: + related.append(RelatedIssue(key=i["key"], how=_HOW_INEPIC)) + found_issues.add(i["key"]) else: # Non-epic issues use the parent link issues_with_parent = _check( self.client.jql(f"'Parent Link' = '{self.key}'", limit=50, fields="key") ) for i in issues_with_parent["issues"]: - related.append(RelatedIssue(key=i["key"], how=_HOW_INPARENT)) + if i["key"] not in found_issues: + related.append(RelatedIssue(key=i["key"], how=_HOW_INPARENT)) + found_issues.add(i["key"]) return related @@ -356,7 +373,7 @@ def update_status_summary(self, contents: str) -> None: - contents: The new description to set. """ _logger.info("Sending updated status summary for %s to server", self.key) - fields = {_CF_STATUS_SUMMARY: contents} + fields = {CF_STATUS_SUMMARY: contents} self.client.update_issue_field(self.key, fields) # type: ignore self.status_summary = contents issue_cache.remove(self.key) # Invalidate any cached copy diff --git a/summarizer.py b/summarizer.py index cba3efd..e36cdbc 100644 --- a/summarizer.py +++ b/summarizer.py @@ -5,7 +5,8 @@ import os import textwrap from datetime import UTC, datetime -from typing import List, Tuple +from optparse import Option +from typing import List, Optional, Tuple from atlassian import Jira # type: ignore from genai import Client, Credentials @@ -13,7 +14,8 @@ from genai.schema import DecodingMethod, TextGenerationParameters from langchain_core.language_models import LLM -from jiraissues import Issue, Myself, RelatedIssue, issue_cache +import text_wrapper +from jiraissues import CF_STATUS_SUMMARY, Issue, Myself, RelatedIssue, issue_cache _logger = logging.getLogger(__name__) @@ -36,6 +38,17 @@ # The default column width to wrap text to. _WRAP_COLUMN = 78 +_wrapper = text_wrapper.TextWrapper(SUMMARY_START_MARKER, SUMMARY_END_MARKER) + +_self: Optional[Myself] = None + + +def self(client: Jira) -> Myself: + global _self + if _self is None: + _self = Myself(client) + return _self + # pylint: disable=too-many-locals def summarize_issue( @@ -67,7 +80,7 @@ def summarize_issue( # return what's there if not regenerate and is_summary_current(issue): _logger.debug("Summary for %s is current, using that.", issue.key) - return get_aisummary(issue.description) + return _wrapper.get(issue.status_summary) or "" # if we have not reached max-depth, summarize the child issues for inclusion in this summary child_summaries: List[Tuple[RelatedIssue, str]] = [] @@ -132,7 +145,7 @@ def summarize_issue( Status/Resolution: {issue.status}/{issue.resolution} === Description === -{strip_aisummary(issue.description)} +{issue.description} === Comments === {comment_block.getvalue()} @@ -147,12 +160,12 @@ def summarize_issue( * Include an overview of any significant discussions or decisions, with their reasoning and outcome. * Highlight any recent updates or changes that effect the completion of the issue. * Use only the information below to create your summary. +* Include only the text of your summary in the response with no formatting. +* Limit your summary to 100 words or less. ``` {full_description} ``` - -Here is a short summary in less than 100 words: """ _logger.info("Summarizing %s via LLM", issue.key) @@ -160,65 +173,13 @@ def summarize_issue( chat = _chat_model() summary = chat.invoke(llm_prompt, stop=["<|endoftext|>"]).strip() + folded_summary = textwrap.fill(summary, width=_WRAP_COLUMN) if send_updates and is_ok_to_post_summary(issue): # Replace any existing AI summary w/ the updated one - new_description = ( - strip_aisummary(issue.description) + "\n\n" + wrap_aisummary(summary) + issue.update_status_summary( + _wrapper.upsert(issue.status_summary, folded_summary) ) - issue.update_status_summary(new_description) - - return textwrap.fill(summary, width=_WRAP_COLUMN) - - -def wrap_aisummary(text: str, width: int = _WRAP_COLUMN) -> str: - """ - Wrap the AI summary in markers so it can be stripped later, and wrap the - text to the specified width so that it is easier to read. - - Parameters: - - text: The text to wrap. - - width: The width to wrap the text to. - - Returns: - The wrapped text. - """ - return f"{SUMMARY_START_MARKER}\n{textwrap.fill(text, width=width)}\n{SUMMARY_END_MARKER}" - - -def strip_aisummary(text: str) -> str: - """ - Remove the AI summary from a block of text. This removes the summary by - finding the start and end markers, and removing all the text beween. - - Parameters: - - text: The text to strip. - - Returns: - The text with the summary removed. - """ - start = text.find(SUMMARY_START_MARKER) - end = text.find(SUMMARY_END_MARKER) - if start == -1 or end == -1: - return text - return text[:start] + text[end + len(SUMMARY_END_MARKER) :] - - -def get_aisummary(text: str) -> str: - """ - Extract the AI summary from a block of text. This extracts the summary by - finding the start and end markers, and returning the text beween. - - Parameters: - - text: The text to extract the summary from. - - Returns: - The extracted summary. - """ - start = text.find(SUMMARY_START_MARKER) - end = text.find(SUMMARY_END_MARKER) - if start == -1 or end == -1: - return "" - return text[start + len(SUMMARY_START_MARKER) : end].strip() + return folded_summary def summary_last_updated(issue: Issue) -> datetime: @@ -235,11 +196,11 @@ def summary_last_updated(issue: Issue) -> datetime: # The summary is never in the initial creation of the issue, therefore, # there will be a record of it in the changelog. - if issue.last_change is None or SUMMARY_START_MARKER not in issue.description: + if issue.last_change is None or SUMMARY_START_MARKER not in issue.status_summary: return last_update for change in issue.changelog: - if change.author == Myself(issue.client).display_name and "description" in [ + if change.author == self(issue.client).display_name and "Status Summary" in [ chg.field for chg in change.changes ]: last_update = max(last_update, change.created) @@ -261,14 +222,34 @@ def is_summary_current(issue: Issue) -> bool: True if the summary is current, False otherwise """ if SUMMARY_ALLOWED_LABEL not in issue.labels: - return True # We're not allowed to summarize it, so it's always current + _logger.debug( + "is_summary_current: no - Issue %s is not allowed to have a summary", + issue.key, + ) + return False # We're not allowed to summarize it, so it's never current last_update = summary_last_updated(issue) + if issue.updated > last_update: + # It's been changed since we last updated the summary + _logger.debug( + "is_summary_current: no - Issue %s has been updated more recently than summary %s > %s", + issue.key, + issue.updated.isoformat(), + last_update.isoformat(), + ) + return False for child in issue.children: child_issue = issue_cache.get_issue(issue.client, child.key) if child_issue.updated > last_update: + # A child issue has been updated since we last updated the summary + _logger.debug( + "is_summary_current: no - Issue %s has more recently updated child %s", + issue.key, + child_issue.key, + ) return False - return issue.updated == last_update + _logger.debug("is_summary_current: yes - Issue %s is current", issue.key) + return True def is_ok_to_post_summary(issue: Issue) -> bool: @@ -343,7 +324,7 @@ def get_issues_to_summarize( """ # The time format for the query needs to be in the local timezone of the # user, so we need to convert - user_zi = Myself(client).tzinfo + user_zi = self(client).tzinfo since_string = since.astimezone(user_zi).strftime("%Y-%m-%d %H:%M") updated_issues = client.jql( f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated DESC",