Skip to content

Commit

Permalink
Fix TZ issues, autofetch comments
Browse files Browse the repository at this point in the history
Signed-off-by: John Strunk <[email protected]>
  • Loading branch information
JohnStrunk committed May 23, 2024
1 parent 60a25a7 commit 612e535
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 18 deletions.
31 changes: 23 additions & 8 deletions bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from summarizer import get_issues_to_summarize, summarize_issue


def main():
def main() -> None: # pylint: disable=too-many-locals,too-many-statements
"""Main function for the bot."""
# pylint: disable=duplicate-code
parser = argparse.ArgumentParser(description="Summarizer bot")
Expand All @@ -32,6 +32,13 @@ def main():
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set the logging level",
)
parser.add_argument(
"-l",
"--limit",
type=int,
default=150,
help="Maximum number of issues to summarize in each iteration",
)
parser.add_argument(
"-m",
"--modified-since",
Expand All @@ -58,19 +65,22 @@ def main():
max_depth = args.max_depth
send_updates = not args.no_update
delay: int = args.seconds
limit: int = args.limit
since = datetime.fromisoformat(args.modified_since).astimezone(UTC)

jira = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"])

most_recent_modification = since
while True:
start_time = datetime.now()
start_time = datetime.now(UTC)
logging.info("Starting iteration at %s", start_time.isoformat())
issue_keys: list[str] = []
successful = False
while not successful:
try:
issue_keys = get_issues_to_summarize(jira, since)
(issue_keys, most_recent_modification) = get_issues_to_summarize(
jira, since, limit
)
successful = True
except requests.exceptions.HTTPError as error:
logging.error(
Expand All @@ -82,19 +92,24 @@ def main():
except requests.exceptions.ReadTimeout as error:
logging.error("ReadTimeout exception: %s", error, exc_info=True)
time.sleep(5)

if len(issue_keys) < limit - 5:
# We retrieved all the modified issues, so we can advance farther
# and avoid re-fetching old issues
most_recent_modification = start_time
logging.info("Got updates through %s", most_recent_modification.isoformat())

for issue_key in issue_keys:
successful = False
while not successful:
try:
issue_start_time = datetime.now()
issue_start_time = datetime.now(UTC)
issue = issue_cache.get_issue(jira, issue_key)
summary = summarize_issue(
issue, max_depth=max_depth, send_updates=send_updates
)
elapsed = datetime.now() - issue_start_time
elapsed = datetime.now(UTC) - issue_start_time
print(f"Summarized {issue_key} ({elapsed}s):\n{summary}\n")
if issue.updated > most_recent_modification:
most_recent_modification = issue.updated
successful = True
except requests.exceptions.HTTPError as error:
logging.error(
Expand All @@ -108,7 +123,7 @@ def main():
time.sleep(5)
since = most_recent_modification # Only update if we succeeded
logging.info("Cache stats: %s", issue_cache)
now = datetime.now()
now = datetime.now(UTC)
elapsed = now - start_time
print(f"Iteration elapsed time: {elapsed}")
sleep_time = max(delay - elapsed.total_seconds(), 0)
Expand Down
21 changes: 16 additions & 5 deletions jiraissues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import queue
from dataclasses import dataclass, field
from datetime import datetime
from datetime import UTC, datetime
from time import sleep
from typing import Any, List, Optional, Set
from zoneinfo import ZoneInfo
Expand Down Expand Up @@ -106,6 +106,7 @@ def __init__(self, client: Jira, issue_key: str) -> None:
"resolution",
"updated",
CF_STATUS_SUMMARY,
"comment",
]
data = check_response(client.issue(issue_key, fields=",".join(fields)))

Expand All @@ -124,15 +125,22 @@ def __init__(self, client: Jira, issue_key: str) -> None:
if data["fields"]["resolution"]
else "Unresolved"
)
# The "last updated" time is provided w/ TZ info
self.updated: datetime = datetime.fromisoformat(data["fields"]["updated"])
self.status_summary: str = data["fields"].get(CF_STATUS_SUMMARY) or ""
self._changelog: Optional[List[ChangelogEntry]] = None
self._comments: Optional[List[Comment]] = None
# Go ahead and parse the comments to avoid an extra API call
self._comments = self._parse_comment_data(data["fields"]["comment"]["comments"])
self._related: Optional[List[RelatedIssue]] = None
_logger.info("Retrieved issue: %s - %s", self.key, self.summary)
_logger.info("Retrieved issue: %s", self)

def __str__(self) -> str:
return f"{self.key}: {self.summary} ({self.status}/{self.resolution})"
updated = self.updated.strftime("%Y-%m-%d %H:%M:%S")
return (
f"{self.key} ({self.issue_type}) {updated} - "
+ f"{self.summary} ({self.status}/{self.resolution})"
)

def _fetch_changelog(self) -> List[ChangelogEntry]:
"""Fetch the changelog from the API."""
Expand Down Expand Up @@ -175,6 +183,9 @@ def _fetch_comments(self) -> List[Comment]:
comments = check_response(self.client.issue(self.key, fields="comment"))[
"fields"
]["comment"]["comments"]
return self._parse_comment_data(comments)

def _parse_comment_data(self, comments: List[dict[str, Any]]) -> List[Comment]:
items: List[Comment] = []
for comment in comments:
items.append(
Expand Down Expand Up @@ -400,13 +411,13 @@ def update_labels(self, new_labels: Set[str]) -> None:
issue_cache.remove(self.key) # Invalidate any cached copy


_last_call_time = datetime.now()
_last_call_time = datetime.now(UTC)


def _rate_limit() -> None:
"""Rate limit the API calls to avoid hitting the rate limit of the Jira server"""
global _last_call_time # pylint: disable=global-statement
now = datetime.now()
now = datetime.now(UTC)
delta = now - _last_call_time
required_delay = MIN_CALL_DELAY - delta.total_seconds()
if required_delay > 0:
Expand Down
23 changes: 18 additions & 5 deletions summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,10 @@ def _chat_model(model_name: str = _MODEL_ID) -> LLM:


def get_issues_to_summarize(
client: Jira, since: datetime = datetime.fromisoformat("2020-01-01")
) -> List[str]:
client: Jira,
since: datetime = datetime.fromisoformat("2020-01-01"),
limit: int = 25,
) -> tuple[List[str], datetime]:
"""
Get a list of issues to summarize.
Expand All @@ -333,6 +335,7 @@ def get_issues_to_summarize(
Parameters:
- client: The Jira client to use
- since: Only return issues updated after this time
- limit: The maximum number of issues to return
Returns:
A list of issue keys
Expand All @@ -344,22 +347,27 @@ def get_issues_to_summarize(
updated_issues = check_response(
client.jql(
f"labels = '{SUMMARY_ALLOWED_LABEL}' and updated >= '{since_string}' ORDER BY updated ASC", # pylint: disable=line-too-long
limit=100,
limit=limit,
fields="key,updated",
)
)
keys: List[str] = [issue["key"] for issue in updated_issues["issues"]]
# Filter out any issues that are not in the allowed projects
filtered_keys = []
most_recent = since
issue_cache.clear() # Clear the cache to ensure we have the latest data
for key in keys:
issue = issue_cache.get_issue(client, key)
if is_ok_to_post_summary(issue):
filtered_keys.append(key)
most_recent = max(most_recent, issue.updated)
keys = filtered_keys

_logger.info(
"Issues updated since %s: (%d) %s", since_string, len(keys), ", ".join(keys)
"Issues updated since %s: (%d) %s",
since.isoformat(),
len(keys),
", ".join(keys),
)

# Given the updated issues, we also need to propagate the summaries up the
Expand All @@ -380,7 +388,12 @@ def get_issues_to_summarize(
# Sort the keys by level so that we summarize the children before the
# parents, making the updated summaries available to the parents.
keys = sorted(set(all_keys), key=lambda x: issue_cache.get_issue(client, x).level)
return keys
_logger.info(
"Total keys: %d, most recent modification: %s",
len(keys),
most_recent.isoformat(),
)
return (keys, most_recent)


def count_tokens(text: Union[str, list[str]]) -> int:
Expand Down

0 comments on commit 612e535

Please sign in to comment.