diff --git a/bot.py b/bot.py index 448f237..6b27042 100755 --- a/bot.py +++ b/bot.py @@ -6,7 +6,7 @@ import logging import os import time -from datetime import datetime +from datetime import UTC, datetime import requests from atlassian import Jira # type: ignore @@ -15,7 +15,7 @@ from summarizer import get_issues_to_summarize, summarize_issue -def main(): +def main() -> None: # pylint: disable=too-many-locals,too-many-statements """Main function for the bot.""" # pylint: disable=duplicate-code parser = argparse.ArgumentParser(description="Summarizer bot") @@ -32,6 +32,13 @@ def main(): choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the logging level", ) + parser.add_argument( + "-l", + "--limit", + type=int, + default=150, + help="Maximum number of issues to summarize in each iteration", + ) parser.add_argument( "-m", "--modified-since", @@ -57,33 +64,71 @@ def main(): logging.basicConfig(level=getattr(logging, args.log_level)) max_depth = args.max_depth send_updates = not args.no_update - delay = args.seconds - since = datetime.fromisoformat(args.modified_since) + delay: int = args.seconds + limit: int = args.limit + since = datetime.fromisoformat(args.modified_since).astimezone(UTC) jira = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"]) + most_recent_modification = since while True: - start_time = datetime.now() + start_time = datetime.now(UTC) logging.info("Starting iteration at %s", start_time.isoformat()) - try: - issue_keys = get_issues_to_summarize(jira, since) - for issue_key in issue_keys: - issue_start_time = datetime.now() - issue = issue_cache.get_issue(jira, issue_key) - summary = summarize_issue( - issue, max_depth=max_depth, send_updates=send_updates + issue_keys: list[str] = [] + successful = False + while not successful: + try: + (issue_keys, most_recent_modification) = get_issues_to_summarize( + jira, since, limit ) - elapsed = datetime.now() - issue_start_time - print(f"Summarized {issue_key} ({elapsed}s):\n{summary}\n") - since = start_time # Only update if we succeeded - except requests.exceptions.HTTPError as error: - logging.error("HTTPError exception: %s", error.response.reason) - except requests.exceptions.ReadTimeout as error: - logging.error("ReadTimeout exception: %s", error, exc_info=True) + successful = True + except requests.exceptions.HTTPError as error: + logging.error( + "HTTPError exception (%s): %s", + error.request.url, + error.response.reason, + ) + time.sleep(5) + except requests.exceptions.ReadTimeout as error: + logging.error("ReadTimeout exception: %s", error, exc_info=True) + time.sleep(5) + + if len(issue_keys) < limit - 5: + # We retrieved all the modified issues, so we can advance farther + # and avoid re-fetching old issues + most_recent_modification = start_time + logging.info("Got updates through %s", most_recent_modification.isoformat()) + + for issue_key in issue_keys: + successful = False + while not successful: + try: + issue_start_time = datetime.now(UTC) + issue = issue_cache.get_issue(jira, issue_key) + summary = summarize_issue( + issue, max_depth=max_depth, send_updates=send_updates + ) + elapsed = datetime.now(UTC) - issue_start_time + print(f"Summarized {issue_key} ({elapsed}s):\n{summary}\n") + successful = True + except requests.exceptions.HTTPError as error: + logging.error( + "HTTPError exception (%s): %s", + error.request.url, + error.response.reason, + ) + time.sleep(5) + except requests.exceptions.ReadTimeout as error: + logging.error("ReadTimeout exception: %s", error, exc_info=True) + time.sleep(5) + since = most_recent_modification # Only update if we succeeded logging.info("Cache stats: %s", issue_cache) - print(f"Iteration elapsed time: {datetime.now() - start_time}") - print(f"Sleeping for {delay} seconds...") - time.sleep(delay) + now = datetime.now(UTC) + elapsed = now - start_time + print(f"Iteration elapsed time: {elapsed}") + sleep_time = max(delay - elapsed.total_seconds(), 0) + print(f"Sleeping for {sleep_time} seconds...") + time.sleep(sleep_time) if __name__ == "__main__": diff --git a/jiraissues.py b/jiraissues.py index 7201eed..3895a16 100644 --- a/jiraissues.py +++ b/jiraissues.py @@ -3,7 +3,7 @@ import logging import queue from dataclasses import dataclass, field -from datetime import datetime +from datetime import UTC, datetime from time import sleep from typing import Any, List, Optional, Set from zoneinfo import ZoneInfo @@ -106,6 +106,7 @@ def __init__(self, client: Jira, issue_key: str) -> None: "resolution", "updated", CF_STATUS_SUMMARY, + "comment", ] data = check_response(client.issue(issue_key, fields=",".join(fields))) @@ -124,15 +125,22 @@ def __init__(self, client: Jira, issue_key: str) -> None: if data["fields"]["resolution"] else "Unresolved" ) + # The "last updated" time is provided w/ TZ info self.updated: datetime = datetime.fromisoformat(data["fields"]["updated"]) self.status_summary: str = data["fields"].get(CF_STATUS_SUMMARY) or "" self._changelog: Optional[List[ChangelogEntry]] = None self._comments: Optional[List[Comment]] = None + # Go ahead and parse the comments to avoid an extra API call + self._comments = self._parse_comment_data(data["fields"]["comment"]["comments"]) self._related: Optional[List[RelatedIssue]] = None - _logger.info("Retrieved issue: %s - %s", self.key, self.summary) + _logger.info("Retrieved issue: %s", self) def __str__(self) -> str: - return f"{self.key}: {self.summary} ({self.status}/{self.resolution})" + updated = self.updated.strftime("%Y-%m-%d %H:%M:%S") + return ( + f"{self.key} ({self.issue_type}) {updated} - " + + f"{self.summary} ({self.status}/{self.resolution})" + ) def _fetch_changelog(self) -> List[ChangelogEntry]: """Fetch the changelog from the API.""" @@ -175,6 +183,9 @@ def _fetch_comments(self) -> List[Comment]: comments = check_response(self.client.issue(self.key, fields="comment"))[ "fields" ]["comment"]["comments"] + return self._parse_comment_data(comments) + + def _parse_comment_data(self, comments: List[dict[str, Any]]) -> List[Comment]: items: List[Comment] = [] for comment in comments: items.append( @@ -400,13 +411,13 @@ def update_labels(self, new_labels: Set[str]) -> None: issue_cache.remove(self.key) # Invalidate any cached copy -_last_call_time = datetime.now() +_last_call_time = datetime.now(UTC) def _rate_limit() -> None: """Rate limit the API calls to avoid hitting the rate limit of the Jira server""" global _last_call_time # pylint: disable=global-statement - now = datetime.now() + now = datetime.now(UTC) delta = now - _last_call_time required_delay = MIN_CALL_DELAY - delta.total_seconds() if required_delay > 0: diff --git a/summarizer.py b/summarizer.py index 0e74757..b6aad99 100644 --- a/summarizer.py +++ b/summarizer.py @@ -81,13 +81,13 @@ def summarize_issue( A string containing the summary """ - _logger.info("Summarizing %s...", issue.key) # If the current summary is up-to-date and we're not asked to regenerate it, # return what's there if not regenerate and is_summary_current(issue): - _logger.debug("Summary for %s is current, using that.", issue.key) + _logger.info("Summarizing (using current): %s", issue) return _wrapper.get(issue.status_summary) or "" + _logger.info("Summarizing: %s", issue) # if we have not reached max-depth, summarize the child issues for inclusion in this summary child_summaries: List[Tuple[RelatedIssue, str]] = [] for child in issue.children: @@ -322,8 +322,10 @@ def _chat_model(model_name: str = _MODEL_ID) -> LLM: def get_issues_to_summarize( - client: Jira, since: datetime = datetime.fromisoformat("2020-01-01") -) -> List[str]: + client: Jira, + since: datetime = datetime.fromisoformat("2020-01-01"), + limit: int = 25, +) -> tuple[List[str], datetime]: """ Get a list of issues to summarize. @@ -333,6 +335,7 @@ def get_issues_to_summarize( Parameters: - client: The Jira client to use - since: Only return issues updated after this time + - limit: The maximum number of issues to return Returns: A list of issue keys @@ -343,22 +346,29 @@ def get_issues_to_summarize( since_string = since.astimezone(user_zi).strftime("%Y-%m-%d %H:%M") updated_issues = check_response( client.jql( - f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated DESC", # pylint: disable=line-too-long - limit=50, + f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated ASC", # pylint: disable=line-too-long + limit=limit, fields="key,updated", ) ) keys: List[str] = [issue["key"] for issue in updated_issues["issues"]] # Filter out any issues that are not in the allowed projects filtered_keys = [] + most_recent = since issue_cache.clear() # Clear the cache to ensure we have the latest data for key in keys: issue = issue_cache.get_issue(client, key) if is_ok_to_post_summary(issue): filtered_keys.append(key) + most_recent = max(most_recent, issue.updated) keys = filtered_keys - _logger.info("Issues updated since %s: %s", since_string, ", ".join(keys)) + _logger.info( + "Issues updated since %s: (%d) %s", + since.isoformat(), + len(keys), + ", ".join(keys), + ) # Given the updated issues, we also need to propagate the summaries up the # hierarchy. We first need to add the parent issues of all the updated @@ -378,7 +388,12 @@ def get_issues_to_summarize( # Sort the keys by level so that we summarize the children before the # parents, making the updated summaries available to the parents. keys = sorted(set(all_keys), key=lambda x: issue_cache.get_issue(client, x).level) - return keys + _logger.info( + "Total keys: %d, most recent modification: %s", + len(keys), + most_recent.isoformat(), + ) + return (keys, most_recent) def count_tokens(text: Union[str, list[str]]) -> int: