Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bot fixes #82

Merged
merged 2 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 67 additions & 22 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import logging
import os
import time
from datetime import datetime
from datetime import UTC, datetime

import requests
from atlassian import Jira # type: ignore
Expand All @@ -15,7 +15,7 @@
from summarizer import get_issues_to_summarize, summarize_issue


def main():
def main() -> None: # pylint: disable=too-many-locals,too-many-statements
"""Main function for the bot."""
# pylint: disable=duplicate-code
parser = argparse.ArgumentParser(description="Summarizer bot")
Expand All @@ -32,6 +32,13 @@ def main():
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set the logging level",
)
parser.add_argument(
"-l",
"--limit",
type=int,
default=150,
help="Maximum number of issues to summarize in each iteration",
)
parser.add_argument(
"-m",
"--modified-since",
Expand All @@ -57,33 +64,71 @@ def main():
logging.basicConfig(level=getattr(logging, args.log_level))
max_depth = args.max_depth
send_updates = not args.no_update
delay = args.seconds
since = datetime.fromisoformat(args.modified_since)
delay: int = args.seconds
limit: int = args.limit
since = datetime.fromisoformat(args.modified_since).astimezone(UTC)

jira = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"])

most_recent_modification = since
while True:
start_time = datetime.now()
start_time = datetime.now(UTC)
logging.info("Starting iteration at %s", start_time.isoformat())
try:
issue_keys = get_issues_to_summarize(jira, since)
for issue_key in issue_keys:
issue_start_time = datetime.now()
issue = issue_cache.get_issue(jira, issue_key)
summary = summarize_issue(
issue, max_depth=max_depth, send_updates=send_updates
issue_keys: list[str] = []
successful = False
while not successful:
try:
(issue_keys, most_recent_modification) = get_issues_to_summarize(
jira, since, limit
)
elapsed = datetime.now() - issue_start_time
print(f"Summarized {issue_key} ({elapsed}s):\n{summary}\n")
since = start_time # Only update if we succeeded
except requests.exceptions.HTTPError as error:
logging.error("HTTPError exception: %s", error.response.reason)
except requests.exceptions.ReadTimeout as error:
logging.error("ReadTimeout exception: %s", error, exc_info=True)
successful = True
except requests.exceptions.HTTPError as error:
logging.error(
"HTTPError exception (%s): %s",
error.request.url,
error.response.reason,
)
time.sleep(5)
except requests.exceptions.ReadTimeout as error:
logging.error("ReadTimeout exception: %s", error, exc_info=True)
time.sleep(5)

if len(issue_keys) < limit - 5:
# We retrieved all the modified issues, so we can advance farther
# and avoid re-fetching old issues
most_recent_modification = start_time
logging.info("Got updates through %s", most_recent_modification.isoformat())

for issue_key in issue_keys:
successful = False
while not successful:
try:
issue_start_time = datetime.now(UTC)
issue = issue_cache.get_issue(jira, issue_key)
summary = summarize_issue(
issue, max_depth=max_depth, send_updates=send_updates
)
elapsed = datetime.now(UTC) - issue_start_time
print(f"Summarized {issue_key} ({elapsed}s):\n{summary}\n")
successful = True
except requests.exceptions.HTTPError as error:
logging.error(
"HTTPError exception (%s): %s",
error.request.url,
error.response.reason,
)
time.sleep(5)
except requests.exceptions.ReadTimeout as error:
logging.error("ReadTimeout exception: %s", error, exc_info=True)
time.sleep(5)
since = most_recent_modification # Only update if we succeeded
logging.info("Cache stats: %s", issue_cache)
print(f"Iteration elapsed time: {datetime.now() - start_time}")
print(f"Sleeping for {delay} seconds...")
time.sleep(delay)
now = datetime.now(UTC)
elapsed = now - start_time
print(f"Iteration elapsed time: {elapsed}")
sleep_time = max(delay - elapsed.total_seconds(), 0)
print(f"Sleeping for {sleep_time} seconds...")
time.sleep(sleep_time)


if __name__ == "__main__":
Expand Down
21 changes: 16 additions & 5 deletions jiraissues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import queue
from dataclasses import dataclass, field
from datetime import datetime
from datetime import UTC, datetime
from time import sleep
from typing import Any, List, Optional, Set
from zoneinfo import ZoneInfo
Expand Down Expand Up @@ -106,6 +106,7 @@ def __init__(self, client: Jira, issue_key: str) -> None:
"resolution",
"updated",
CF_STATUS_SUMMARY,
"comment",
]
data = check_response(client.issue(issue_key, fields=",".join(fields)))

Expand All @@ -124,15 +125,22 @@ def __init__(self, client: Jira, issue_key: str) -> None:
if data["fields"]["resolution"]
else "Unresolved"
)
# The "last updated" time is provided w/ TZ info
self.updated: datetime = datetime.fromisoformat(data["fields"]["updated"])
self.status_summary: str = data["fields"].get(CF_STATUS_SUMMARY) or ""
self._changelog: Optional[List[ChangelogEntry]] = None
self._comments: Optional[List[Comment]] = None
# Go ahead and parse the comments to avoid an extra API call
self._comments = self._parse_comment_data(data["fields"]["comment"]["comments"])
self._related: Optional[List[RelatedIssue]] = None
_logger.info("Retrieved issue: %s - %s", self.key, self.summary)
_logger.info("Retrieved issue: %s", self)

def __str__(self) -> str:
return f"{self.key}: {self.summary} ({self.status}/{self.resolution})"
updated = self.updated.strftime("%Y-%m-%d %H:%M:%S")
return (
f"{self.key} ({self.issue_type}) {updated} - "
+ f"{self.summary} ({self.status}/{self.resolution})"
)

def _fetch_changelog(self) -> List[ChangelogEntry]:
"""Fetch the changelog from the API."""
Expand Down Expand Up @@ -175,6 +183,9 @@ def _fetch_comments(self) -> List[Comment]:
comments = check_response(self.client.issue(self.key, fields="comment"))[
"fields"
]["comment"]["comments"]
return self._parse_comment_data(comments)

def _parse_comment_data(self, comments: List[dict[str, Any]]) -> List[Comment]:
items: List[Comment] = []
for comment in comments:
items.append(
Expand Down Expand Up @@ -400,13 +411,13 @@ def update_labels(self, new_labels: Set[str]) -> None:
issue_cache.remove(self.key) # Invalidate any cached copy


_last_call_time = datetime.now()
_last_call_time = datetime.now(UTC)


def _rate_limit() -> None:
"""Rate limit the API calls to avoid hitting the rate limit of the Jira server"""
global _last_call_time # pylint: disable=global-statement
now = datetime.now()
now = datetime.now(UTC)
delta = now - _last_call_time
required_delay = MIN_CALL_DELAY - delta.total_seconds()
if required_delay > 0:
Expand Down
31 changes: 23 additions & 8 deletions summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ def summarize_issue(
A string containing the summary
"""

_logger.info("Summarizing %s...", issue.key)
# If the current summary is up-to-date and we're not asked to regenerate it,
# return what's there
if not regenerate and is_summary_current(issue):
_logger.debug("Summary for %s is current, using that.", issue.key)
_logger.info("Summarizing (using current): %s", issue)
return _wrapper.get(issue.status_summary) or ""

_logger.info("Summarizing: %s", issue)
# if we have not reached max-depth, summarize the child issues for inclusion in this summary
child_summaries: List[Tuple[RelatedIssue, str]] = []
for child in issue.children:
Expand Down Expand Up @@ -322,8 +322,10 @@ def _chat_model(model_name: str = _MODEL_ID) -> LLM:


def get_issues_to_summarize(
client: Jira, since: datetime = datetime.fromisoformat("2020-01-01")
) -> List[str]:
client: Jira,
since: datetime = datetime.fromisoformat("2020-01-01"),
limit: int = 25,
) -> tuple[List[str], datetime]:
"""
Get a list of issues to summarize.

Expand All @@ -333,6 +335,7 @@ def get_issues_to_summarize(
Parameters:
- client: The Jira client to use
- since: Only return issues updated after this time
- limit: The maximum number of issues to return

Returns:
A list of issue keys
Expand All @@ -343,22 +346,29 @@ def get_issues_to_summarize(
since_string = since.astimezone(user_zi).strftime("%Y-%m-%d %H:%M")
updated_issues = check_response(
client.jql(
f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated DESC", # pylint: disable=line-too-long
limit=50,
f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated ASC", # pylint: disable=line-too-long
limit=limit,
fields="key,updated",
)
)
keys: List[str] = [issue["key"] for issue in updated_issues["issues"]]
# Filter out any issues that are not in the allowed projects
filtered_keys = []
most_recent = since
issue_cache.clear() # Clear the cache to ensure we have the latest data
for key in keys:
issue = issue_cache.get_issue(client, key)
if is_ok_to_post_summary(issue):
filtered_keys.append(key)
most_recent = max(most_recent, issue.updated)
keys = filtered_keys

_logger.info("Issues updated since %s: %s", since_string, ", ".join(keys))
_logger.info(
"Issues updated since %s: (%d) %s",
since.isoformat(),
len(keys),
", ".join(keys),
)

# Given the updated issues, we also need to propagate the summaries up the
# hierarchy. We first need to add the parent issues of all the updated
Expand All @@ -378,7 +388,12 @@ def get_issues_to_summarize(
# Sort the keys by level so that we summarize the children before the
# parents, making the updated summaries available to the parents.
keys = sorted(set(all_keys), key=lambda x: issue_cache.get_issue(client, x).level)
return keys
_logger.info(
"Total keys: %d, most recent modification: %s",
len(keys),
most_recent.isoformat(),
)
return (keys, most_recent)


def count_tokens(text: Union[str, list[str]]) -> int:
Expand Down
Loading