From 612e5350b8fb00c565e432e31f84aea9f73b944e Mon Sep 17 00:00:00 2001 From: John Strunk Date: Thu, 23 May 2024 15:19:53 +0000 Subject: [PATCH] Fix TZ issues, autofetch comments Signed-off-by: John Strunk --- bot.py | 31 +++++++++++++++++++++++-------- jiraissues.py | 21 ++++++++++++++++----- summarizer.py | 23 ++++++++++++++++++----- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/bot.py b/bot.py index 4742d81..6b27042 100755 --- a/bot.py +++ b/bot.py @@ -15,7 +15,7 @@ from summarizer import get_issues_to_summarize, summarize_issue -def main(): +def main() -> None: # pylint: disable=too-many-locals,too-many-statements """Main function for the bot.""" # pylint: disable=duplicate-code parser = argparse.ArgumentParser(description="Summarizer bot") @@ -32,6 +32,13 @@ def main(): choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the logging level", ) + parser.add_argument( + "-l", + "--limit", + type=int, + default=150, + help="Maximum number of issues to summarize in each iteration", + ) parser.add_argument( "-m", "--modified-since", @@ -58,19 +65,22 @@ def main(): max_depth = args.max_depth send_updates = not args.no_update delay: int = args.seconds + limit: int = args.limit since = datetime.fromisoformat(args.modified_since).astimezone(UTC) jira = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"]) most_recent_modification = since while True: - start_time = datetime.now() + start_time = datetime.now(UTC) logging.info("Starting iteration at %s", start_time.isoformat()) issue_keys: list[str] = [] successful = False while not successful: try: - issue_keys = get_issues_to_summarize(jira, since) + (issue_keys, most_recent_modification) = get_issues_to_summarize( + jira, since, limit + ) successful = True except requests.exceptions.HTTPError as error: logging.error( @@ -82,19 +92,24 @@ def main(): except requests.exceptions.ReadTimeout as error: logging.error("ReadTimeout exception: %s", error, exc_info=True) time.sleep(5) + + if len(issue_keys) < limit - 5: + # We retrieved all the modified issues, so we can advance farther + # and avoid re-fetching old issues + most_recent_modification = start_time + logging.info("Got updates through %s", most_recent_modification.isoformat()) + for issue_key in issue_keys: successful = False while not successful: try: - issue_start_time = datetime.now() + issue_start_time = datetime.now(UTC) issue = issue_cache.get_issue(jira, issue_key) summary = summarize_issue( issue, max_depth=max_depth, send_updates=send_updates ) - elapsed = datetime.now() - issue_start_time + elapsed = datetime.now(UTC) - issue_start_time print(f"Summarized {issue_key} ({elapsed}s):\n{summary}\n") - if issue.updated > most_recent_modification: - most_recent_modification = issue.updated successful = True except requests.exceptions.HTTPError as error: logging.error( @@ -108,7 +123,7 @@ def main(): time.sleep(5) since = most_recent_modification # Only update if we succeeded logging.info("Cache stats: %s", issue_cache) - now = datetime.now() + now = datetime.now(UTC) elapsed = now - start_time print(f"Iteration elapsed time: {elapsed}") sleep_time = max(delay - elapsed.total_seconds(), 0) diff --git a/jiraissues.py b/jiraissues.py index 7201eed..3895a16 100644 --- a/jiraissues.py +++ b/jiraissues.py @@ -3,7 +3,7 @@ import logging import queue from dataclasses import dataclass, field -from datetime import datetime +from datetime import UTC, datetime from time import sleep from typing import Any, List, Optional, Set from zoneinfo import ZoneInfo @@ -106,6 +106,7 @@ def __init__(self, client: Jira, issue_key: str) -> None: "resolution", "updated", CF_STATUS_SUMMARY, + "comment", ] data = check_response(client.issue(issue_key, fields=",".join(fields))) @@ -124,15 +125,22 @@ def __init__(self, client: Jira, issue_key: str) -> None: if data["fields"]["resolution"] else "Unresolved" ) + # The "last updated" time is provided w/ TZ info self.updated: datetime = datetime.fromisoformat(data["fields"]["updated"]) self.status_summary: str = data["fields"].get(CF_STATUS_SUMMARY) or "" self._changelog: Optional[List[ChangelogEntry]] = None self._comments: Optional[List[Comment]] = None + # Go ahead and parse the comments to avoid an extra API call + self._comments = self._parse_comment_data(data["fields"]["comment"]["comments"]) self._related: Optional[List[RelatedIssue]] = None - _logger.info("Retrieved issue: %s - %s", self.key, self.summary) + _logger.info("Retrieved issue: %s", self) def __str__(self) -> str: - return f"{self.key}: {self.summary} ({self.status}/{self.resolution})" + updated = self.updated.strftime("%Y-%m-%d %H:%M:%S") + return ( + f"{self.key} ({self.issue_type}) {updated} - " + + f"{self.summary} ({self.status}/{self.resolution})" + ) def _fetch_changelog(self) -> List[ChangelogEntry]: """Fetch the changelog from the API.""" @@ -175,6 +183,9 @@ def _fetch_comments(self) -> List[Comment]: comments = check_response(self.client.issue(self.key, fields="comment"))[ "fields" ]["comment"]["comments"] + return self._parse_comment_data(comments) + + def _parse_comment_data(self, comments: List[dict[str, Any]]) -> List[Comment]: items: List[Comment] = [] for comment in comments: items.append( @@ -400,13 +411,13 @@ def update_labels(self, new_labels: Set[str]) -> None: issue_cache.remove(self.key) # Invalidate any cached copy -_last_call_time = datetime.now() +_last_call_time = datetime.now(UTC) def _rate_limit() -> None: """Rate limit the API calls to avoid hitting the rate limit of the Jira server""" global _last_call_time # pylint: disable=global-statement - now = datetime.now() + now = datetime.now(UTC) delta = now - _last_call_time required_delay = MIN_CALL_DELAY - delta.total_seconds() if required_delay > 0: diff --git a/summarizer.py b/summarizer.py index 6bd4c6f..b6aad99 100644 --- a/summarizer.py +++ b/summarizer.py @@ -322,8 +322,10 @@ def _chat_model(model_name: str = _MODEL_ID) -> LLM: def get_issues_to_summarize( - client: Jira, since: datetime = datetime.fromisoformat("2020-01-01") -) -> List[str]: + client: Jira, + since: datetime = datetime.fromisoformat("2020-01-01"), + limit: int = 25, +) -> tuple[List[str], datetime]: """ Get a list of issues to summarize. @@ -333,6 +335,7 @@ def get_issues_to_summarize( Parameters: - client: The Jira client to use - since: Only return issues updated after this time + - limit: The maximum number of issues to return Returns: A list of issue keys @@ -344,22 +347,27 @@ def get_issues_to_summarize( updated_issues = check_response( client.jql( f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated ASC", # pylint: disable=line-too-long - limit=100, + limit=limit, fields="key,updated", ) ) keys: List[str] = [issue["key"] for issue in updated_issues["issues"]] # Filter out any issues that are not in the allowed projects filtered_keys = [] + most_recent = since issue_cache.clear() # Clear the cache to ensure we have the latest data for key in keys: issue = issue_cache.get_issue(client, key) if is_ok_to_post_summary(issue): filtered_keys.append(key) + most_recent = max(most_recent, issue.updated) keys = filtered_keys _logger.info( - "Issues updated since %s: (%d) %s", since_string, len(keys), ", ".join(keys) + "Issues updated since %s: (%d) %s", + since.isoformat(), + len(keys), + ", ".join(keys), ) # Given the updated issues, we also need to propagate the summaries up the @@ -380,7 +388,12 @@ def get_issues_to_summarize( # Sort the keys by level so that we summarize the children before the # parents, making the updated summaries available to the parents. keys = sorted(set(all_keys), key=lambda x: issue_cache.get_issue(client, x).level) - return keys + _logger.info( + "Total keys: %d, most recent modification: %s", + len(keys), + most_recent.isoformat(), + ) + return (keys, most_recent) def count_tokens(text: Union[str, list[str]]) -> int: