From 6b289da11da8cc93b51a1ef244add184a8af0565 Mon Sep 17 00:00:00 2001
From: John Strunk <jstrunk@redhat.com>
Date: Thu, 27 Jun 2024 20:47:47 +0000
Subject: [PATCH] Change rollup to post summaries to Confluence

- Add cfhelper module for working with Confluence API
- Add concept of active issues & contributors

Signed-off-by: John Strunk <jstrunk@redhat.com>
---
 cfhelper.py      | 248 +++++++++++++++++++++++++++++++++++++++++++++++
 jiraissues.py    |  18 +++-
 rollup_status.py | 177 ++++++++++++++++++++++++++-------
 summarizer.py    |  88 +++++++++++++++--
 4 files changed, 485 insertions(+), 46 deletions(-)
 create mode 100644 cfhelper.py
diff --git a/cfhelper.py b/cfhelper.py
new file mode 100644
index 0000000..425912a
--- /dev/null
+++ b/cfhelper.py
@@ -0,0 +1,248 @@
+"""
+Helper functions for working with the Confluence API
+
+See the Confluence Storage Format documentation for more information on the tags
+that are supported for building content:
+https://confluence.atlassian.com/doc/confluence-storage-format-790796544.html
+"""
+
+import logging
+import xml.etree.ElementTree as ET
+from typing import Optional
+
+_logger = logging.getLogger(__name__)
+
+# Atlassian Confluence uses XML to store content, but the XML that is retrieved
+# isn't a well-formed document. To properly parse, the below header and footer
+# are necessary (e.g., _CONFLUENCE_HEADER + content + _CONFLUENCE_FOOTER).
+#
+# References:
+#     - https://jira.atlassian.com/browse/CONFCLOUD-60739
+#     - https://confluence.atlassian.com/doc/confluence-storage-format-790796544.html
+_CONFLUENCE_HEADER = """\
+    <?xml version='1.0'?>
+      <xml xmlns:atlassian-content="http://atlassian.com/content"
+           xmlns:ac="http://atlassian.com/content"
+           xmlns:ri="http://atlassian.com/resource/identifier"
+           xmlns:atlassian-template="http://atlassian.com/template"
+           xmlns:at="http://atlassian.com/template">
+    """
+_CONFLUENCE_FOOTER = "</xml>"
+
+# The namespaces used in the Confluence XML also need to be registered w/ ET
+ET.register_namespace("ac", "http://atlassian.com/content")
+ET.register_namespace("ri", "http://atlassian.com/resource/identifier")
+ET.register_namespace("at", "http://atlassian.com/template")
+
+
+class CFElement(ET.Element):
+    """
+    XML Element with some convenience methods
+
+    This class extends the standard xml.etree.ElementTree.Element class with:
+    - A constructor that takes optional content
+    - An add() method to append content to the element
+    - An unwrap() method to return the XML content as a string without the tag
+    """
+
+    def __init__(
+        self,
+        tag,
+        attrib: Optional[dict[str, str]] = None,
+        content: Optional[str | ET.Element] = None,
+        **extra
+    ):
+        """
+        Create a new XML element with optional content
+
+        Parameters:
+            - tag: The tag name
+            - attrib: Dictionary of attributes
+            - content: Optional text content
+
+        Examples:
+            Create an element with content:
+            >>> e = CFElement("p", content="Hello, world!")
+            >>> print(ET.tostring(e, encoding="unicode"))
+            <p>Hello, world!</p>
+
+            Create an element without content:
+            >>> e = CFElement("br")
+            >>> print(ET.tostring(e, encoding="unicode"))
+            <br />
+        """
+        super().__init__(tag, attrib or {}, **extra)
+        if content is not None:
+            self.add(content)
+
+    def add(self, content: int | str | ET.Element) -> "CFElement":
+        """
+        Add content to the end of the Element
+
+        The content can be a string or another Element. If the content is a
+        string, it will be added as text content to the end of the element,
+        after any existing text and subelements. If the content is an Element,
+        it will be added as the last subelement.
+
+        Parameters:
+            - content: The content to add
+
+        Returns:
+            The Element itself, to allow chaining
+
+        Example:
+            >>> e = CFElement("p")
+            >>> _ = e.add("Hello, ")
+            >>> _ = e.add(CFElement("b", content="world"))
+            >>> _ = e.add("!")
+            >>> print(ET.tostring(e, encoding="unicode"))
+            <p>Hello, <b>world</b>!</p>
+
+            # The same example using method chaining
+            >>> e = CFElement("p").add("Hello, ").add(CFElement("b", content="world")).add("!")
+            >>> print(ET.tostring(e, encoding="unicode"))
+            <p>Hello, <b>world</b>!</p>
+        """
+        if isinstance(content, int):
+            content = str(content)
+        if isinstance(content, str):
+            has_subelements = len(self) > 0
+            if has_subelements:
+                self[-1].tail = self[-1].tail or ""
+                self[-1].tail += content
+            else:
+                self.text = content
+        else:
+            self.append(content)
+        return self
+
+    def unwrap(self, encoding="unicode") -> str:
+        """
+        Return the XML content of the Element as a string, omitting the tag of
+        the Element itself.
+
+        Returns:
+            The XML content as a string
+
+        Example:
+            >>> root = CFElement("root")
+            >>> _ = root.add(CFElement("p", content="Hello, world!"))
+            >>> print(ET.tostring(root, encoding="unicode"))
+            <root><p>Hello, world!</p></root>
+            >>> print(root.unwrap())  # without the <root> tag
+            <p>Hello, world!</p>
+        """
+        return "".join(ET.tostring(e, encoding=encoding) for e in self)
+
+
+def anchor(title: str, url: str) -> CFElement:
+    """
+    Create an anchor element
+
+    Parameters:
+        - title: The title of the anchor
+        - url: The URL to link to
+
+    Returns:
+        A CFElement representing an anchor
+
+    Example:
+        >>> e = anchor("Google", "https://www.google.com")
+        >>> print(ET.tostring(e, encoding="unicode"))
+        <a href="https://www.google.com">Google</a>
+    """
+    return CFElement("a", {"href": url}, content=title)
+
+
+def list_to_li(items: list[str | ET.Element], ordered=False) -> CFElement:
+    """
+    Create a list Element
+
+    Parameters:
+        - items: A list of strings or Elements to add as list items
+        - ordered: Whether the list should be ordered (True) or unordered (False)
+
+    Returns:
+        A CFElement representing a list
+
+    Examples:
+        >>> e = list_to_li(["One", "Two", "Three"])
+        >>> print(ET.tostring(e, encoding="unicode"))
+        <ul><li>One</li><li>Two</li><li>Three</li></ul>
+
+        >>> e = list_to_li(["One", CFElement("b", content="Two"), "Three"], ordered=True)
+        >>> print(ET.tostring(e, encoding="unicode"))
+        <ol><li>One</li><li><b>Two</b></li><li>Three</li></ol>
+    """
+    tag = "ol" if ordered else "ul"
+    e = CFElement(tag)
+    for item in items:
+        e.add(CFElement("li", content=item))
+    return e
+
+
+def jiralink(issue_key: str) -> CFElement:
+    # pylint: disable=line-too-long
+    """
+    Link to a Jira issue.
+
+    This is a special Confluence link that will render as a Jira issue link. It
+    appears to render as an inline element.
+
+    Parameters:
+        - issue_key: The Jira issue key
+
+    Returns:
+        A CFElement representing a Jira issue link
+
+    Example:
+        >>> e = jiralink("ABC-123")
+        >>> print(ET.tostring(e, encoding="unicode"))
+        <ac:structured-macro ac:name="jira" ac:schema-version="1" ac:macro-id="9245001e-9ae4-4e0f-b383-dd3952c98ae0"><ac:parameter ac:name="server">Red Hat Issue Tracker</ac:parameter><ac:parameter ac:name="columnIds">issuekey,summary,issuetype,created,updated,duedate,assignee,reporter,priority,status,resolution</ac:parameter><ac:parameter ac:name="columns">key,summary,type,created,updated,due,assignee,reporter,priority,status,resolution</ac:parameter><ac:parameter ac:name="serverId">6a7247df-aeb5-31ba-bf94-111b6698af21</ac:parameter><ac:parameter ac:name="key">ABC-123</ac:parameter></ac:structured-macro>
+    """
+    #   <p>
+    #     <ac:structured-macro ac:name="jira" ac:schema-version="1" ac:macro-id="9245001e-9ae4-4e0f-b383-dd3952c98ae0">
+    #       <ac:parameter ac:name="server">Red Hat Issue Tracker</ac:parameter>
+    #       <ac:parameter ac:name="columnIds">issuekey,summary,issuetype,created,updated,duedate,assignee,reporter,priority,status,resolution</ac:parameter>
+    #       <ac:parameter ac:name="columns">key,summary,type,created,updated,due,assignee,reporter,priority,status,resolution</ac:parameter>
+    #       <ac:parameter ac:name="serverId">6a7247df-aeb5-31ba-bf94-111b6698af21</ac:parameter>
+    #       <ac:parameter ac:name="key">OCTO-2</ac:parameter>
+    #     </ac:structured-macro>
+    #   </p>
+
+    macro = CFElement(
+        "ac:structured-macro",
+        {
+            "ac:name": "jira",
+            "ac:schema-version": "1",
+            "ac:macro-id": "9245001e-9ae4-4e0f-b383-dd3952c98ae0",
+        },
+    )
+    macro.add(
+        CFElement(
+            "ac:parameter", {"ac:name": "server"}, content="Red Hat Issue Tracker"
+        )
+    )
+    macro.add(
+        CFElement(
+            "ac:parameter",
+            {"ac:name": "columnIds"},
+            content="issuekey,summary,issuetype,created,updated,duedate,assignee,reporter,priority,status,resolution",
+        )
+    )
+    macro.add(
+        CFElement(
+            "ac:parameter",
+            {"ac:name": "columns"},
+            content="key,summary,type,created,updated,due,assignee,reporter,priority,status,resolution",
+        )
+    )
+    macro.add(
+        CFElement(
+            "ac:parameter",
+            {"ac:name": "serverId"},
+            content="6a7247df-aeb5-31ba-bf94-111b6698af21",
+        )
+    )
+    macro.add(CFElement("ac:parameter", {"ac:name": "key"}, content=issue_key))
+    return macro
diff --git a/jiraissues.py b/jiraissues.py
index 23181a3..f5d14a5 100644
--- a/jiraissues.py
+++ b/jiraissues.py
@@ -226,6 +226,16 @@ def __str__(self) -> str:
             + f"{self.summary} ({self.status}/{self.resolution})"
         )
 
+    def __lt__(self, other: "Issue") -> bool:
+        # Issue keys consist of a prefix and a number such as ABCD-1234. We
+        # define the sort order based on the prefix as a string, followed by the
+        # number as an integer.
+        self_prefix, self_number = self.key.split("-")
+        other_prefix, other_number = other.key.split("-")
+        if self_prefix != other_prefix:
+            return self_prefix < other_prefix
+        return int(self_number) < int(other_number)
+
     @measure_function
     def _fetch_changelog(self) -> List[ChangelogEntry]:
         """Fetch the changelog from the API."""
@@ -490,7 +500,9 @@ def update_status_summary(self, contents: str) -> None:
         """
         _logger.info("Sending updated status summary for %s to server", self.key)
         fields = {CF_STATUS_SUMMARY: contents}
-        with_retry(lambda: self.client.update_issue_field(self.key, fields))  # type: ignore
+        with_retry(
+            lambda: self.client.update_issue_field(self.key, fields)
+        )  # type: ignore
         self.status_summary = contents
         issue_cache.remove(self.key)  # Invalidate any cached copy
 
@@ -503,7 +515,9 @@ def update_labels(self, new_labels: Set[str]) -> None:
         """
         _logger.info("Sending updated labels for %s to server", self.key)
         fields = {"labels": list(new_labels)}
-        with_retry(lambda: self.client.update_issue_field(self.key, fields))  # type: ignore
+        with_retry(
+            lambda: self.client.update_issue_field(self.key, fields)
+        )  # type: ignore
         self.labels = new_labels
         issue_cache.remove(self.key)  # Invalidate any cached copy
 
diff --git a/rollup_status.py b/rollup_status.py
index 5f9e2f6..ed09795 100755
--- a/rollup_status.py
+++ b/rollup_status.py
@@ -8,13 +8,15 @@
 import textwrap
 from dataclasses import dataclass, field
 
-from atlassian import Jira  # type: ignore
+from atlassian import Confluence, Jira  # type: ignore
 
+from cfhelper import CFElement, jiralink
 from jiraissues import Issue, User, issue_cache
 from simplestats import Timer
-from summarizer import get_chat_model, rollup_contributors, summarize_issue
+from summarizer import get_chat_model, is_active, rollup_contributors, summarize_issue
 
 LINK_BASE = "https://issues.redhat.com/browse/"
+CONFLUENCE_SPACE = "OCTOET"
 
 
 @dataclass
@@ -25,6 +27,81 @@ class IssueSummary:
     summary: str = ""
     exec_summary: str = ""
     contributors: set[User] = field(default_factory=set)
+    active_contributors: set[User] = field(default_factory=set)
+
+
+def lookup_page(cclient: Confluence, title_or_id: str) -> int:
+    """
+    Look up a page by title or ID
+
+    Parameters:
+        - cclient: The Confluence client
+        - title: The title or ID of the page
+
+    Returns:
+        The page ID
+    """
+    try:
+        # If it's an integer, assume it's the ID and return it
+        return int(title_or_id)
+    except ValueError:
+        pass
+
+    page_id = cclient.get_page_id(CONFLUENCE_SPACE, title_or_id)
+    if page_id is None:
+        logging.error("Unable to find page %s", title_or_id)
+        raise ValueError(f"Unable to find page {title_or_id}")
+    return int(page_id)
+
+
+def element_contrib_count(header: str, contributors: set[User]) -> CFElement:
+    """
+    Generate an element for the number of contributors
+
+    Parameters:
+        - header: The header for the tag
+        - contributors: The set of contributors
+
+    Returns:
+        A CFElement representing the tag
+    """
+    # The initial contributor "set" ensures uniqueness of Jira User objects,
+    # here we convert to a "set" of display names to catch the case of multiple
+    # users with the same display name (i.e. the same person w/ multiple
+    # accounts)
+    contributor_names = {c.display_name for c in contributors}
+    return (
+        CFElement("p")
+        .add(CFElement("strong", content=header))
+        .add(" ")
+        .add(len(contributor_names))
+    )
+
+
+def element_contrib_list(header: str, contributors: set[User]) -> CFElement:
+    """
+    Generate an element for the list of contributors
+
+    Parameters:
+        - header: The header for the tag
+        - contributors: The set of contributors
+
+    Returns:
+        A CFElement representing the tag
+    """
+    # The initial contributor "set" ensures uniqueness of Jira User objects,
+    # here we convert to a "set" of display names to catch the case of multiple
+    # users with the same display name (i.e. the same person w/ multiple
+    # accounts)
+    contributor_names = {c.display_name for c in contributors}
+    # Sort the names by last name
+    contributor_names_sorted = sorted(contributor_names, key=lambda x: x.split()[-1])
+    return (
+        CFElement("p")
+        .add(CFElement("strong", content=header + f" ({len(contributor_names)}):"))
+        .add(" ")
+        .add(", ".join(contributor_names_sorted))
+    )
 
 
 def main() -> None:  # pylint: disable=too-many-locals,too-many-statements
@@ -37,27 +114,50 @@ def main() -> None:  # pylint: disable=too-many-locals,too-many-statements
         choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
         help="Set the logging level",
     )
+    parser.add_argument(
+        "--inactive-days",
+        type=int,
+        default=14,
+        help="Number of days before an issue is considered inactive",
+    )
+    parser.add_argument(
+        "-p",
+        "--parent",
+        type=str,
+        required=True,
+        help="Title or ID of the parent page",
+    )
     parser.add_argument("jira_issue_key", type=str, help="JIRA issue key")
 
     args = parser.parse_args()
     logging.basicConfig(level=getattr(logging, str(args.log_level).upper()))
     issue_key: str = args.jira_issue_key
+    inactive_days: int = args.inactive_days
 
-    client = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"])
+    jclient = Jira(url=os.environ["JIRA_URL"], token=os.environ["JIRA_TOKEN"])
+    cclient = Confluence(
+        os.environ["CONFLUENCE_URL"], token=os.environ["CONFLUENCE_TOKEN"]
+    )
 
     # Get the existing summaries from the Jira issues
     stime = Timer("Collect")
     stime.start()
     logging.info("Collecting issue summaries for children of %s", issue_key)
     child_inputs: list[IssueSummary] = []
-    epic = issue_cache.get_issue(client, issue_key)
+    epic = issue_cache.get_issue(jclient, issue_key)
     for child in epic.children:
-        issue = issue_cache.get_issue(client, child.key)
+        issue = issue_cache.get_issue(jclient, child.key)
+        if not is_active(issue, inactive_days, True):
+            logging.info("Skipping inactive issue %s", issue.key)
+            continue
         text = f"{issue}\n"
         text += summarize_issue(issue, max_depth=1)
         child_inputs.append(
             IssueSummary(
-                issue=issue, summary=text, contributors=rollup_contributors(issue)
+                issue=issue,
+                summary=text,
+                contributors=rollup_contributors(issue),
+                active_contributors=rollup_contributors(issue, True, inactive_days),
             )
         )
     stime.stop()
@@ -72,7 +172,7 @@ def main() -> None:  # pylint: disable=too-many-locals,too-many-statements
         data = f"""\
 {item.issue}
 {item.summary}
-Contributors: {', '.join(c.display_name for c in item.contributors)}"""
+"""
         prompt = f"""\
 Condense the following technical status update into a short, high-level summary for an engineering leader.
 Focus on the high-level objective, keeping the technical detail to a minimum.
@@ -96,39 +196,42 @@ def main() -> None:  # pylint: disable=too-many-locals,too-many-statements
 """
     exec_paragraph = textwrap.fill(llm.invoke(prompt, stop=["<|endoftext|>"]).strip())
 
-    # Determine contributors to the whole top-level issue, sorted by last name.
-    # The rollup function returns a set, ensuring uniqueness of User, but here
-    # we convert to the display name, ensuring uniqueness of display name,
-    # handling the case of multiple users with the same display name
-    all_contributor_names = {c.display_name for c in rollup_contributors(epic)}
-    all_contributor_names_sorted = sorted(
-        all_contributor_names, key=lambda x: x.split()[-1]
-    )
-
     # Generate the overall status update
-    print(f"# Executive Summary for [{issue_key}]({LINK_BASE}{issue_key})")
-    print()
-    print(exec_paragraph)
-    print()
-    print(f"**Resource count:** {len(all_contributor_names_sorted)}")
-    print()
-    print(f"**Contributors:** {', '.join(all_contributor_names_sorted)}")
-    print()
-    print("## Individual issue status")
-    print()
-    for item in child_inputs:
+    parent_page_id = lookup_page(cclient, args.parent)
+
+    page_title = f"Initiative status: {epic.key} - {epic.summary}"
+
+    # Root element for the page; tag doesn't matter as it will be stripped off later
+    page = CFElement("root")
+
+    # Top of the page; overall executive summary and initiative contributors
+    page.add(CFElement("h1", content="Executive Summary"))
+    page.add(CFElement("p", content=jiralink(epic.key)))
+    page.add(CFElement("p", content=exec_paragraph))
+    contributors = rollup_contributors(epic)
+    active_contributors = rollup_contributors(epic, active_days=inactive_days)
+    if active_contributors:
+        page.add(element_contrib_list("Active contributors", active_contributors))
+    if contributors:
+        page.add(element_contrib_list("All contributors", contributors))
+
+    # Individual issue summaries
+    page.add(CFElement("h2", content="Status of individual issues"))
+    sorted_issues = sorted(child_inputs, key=lambda x: x.issue)
+    for item in sorted_issues:
         issue = item.issue
-        print(f"### [{issue.key}]({LINK_BASE}{issue.key}) - {issue.summary}")
-        print()
-        print(item.exec_summary)
-        print()
-        contrib_names = {c.display_name for c in item.contributors}
-        contrib_names_sorted = sorted(contrib_names, key=lambda x: x.split()[-1])
-        if contrib_names_sorted:
-            print(f"**Contributors:** {', '.join(contrib_names_sorted)}")
-            print()
+        page.add(CFElement("h3", content=jiralink(issue.key)))
+        page.add(CFElement("p", content=item.exec_summary))
+        if item.active_contributors:
+            page.add(
+                element_contrib_list("Active contributors", item.active_contributors)
+            )
+        if item.contributors:
+            page.add(element_contrib_list("All contributors", item.contributors))
+
+    cclient.update_or_create(parent_page_id, page_title, page.unwrap())
 
 
 if __name__ == "__main__":
-    with Timer("Total execution"):
+    with Timer(__name__):
         main()
diff --git a/summarizer.py b/summarizer.py
index d56b555..91c0569 100644
--- a/summarizer.py
+++ b/summarizer.py
@@ -4,7 +4,7 @@
 import logging
 import os
 import textwrap
-from datetime import UTC, datetime
+from datetime import UTC, datetime, timedelta
 from typing import Any, List, Optional, Tuple, Union
 
 import genai.exceptions
@@ -593,7 +593,9 @@ def add_summary_label_to_descendants(client: Jira, issue_key: str) -> None:
 
 
 @measure_function
-def rollup_contributors(issue: Issue, include_assignee=True) -> set[User]:
+def rollup_contributors(
+    issue: Issue, include_assignee=True, active_days: int = 0
+) -> set[User]:
     """
     Roll up the set of contributors from the issue and its children.
 
@@ -601,15 +603,87 @@ def rollup_contributors(issue: Issue, include_assignee=True) -> set[User]:
         - issue: The issue to roll up the contributors from
         - include_assignee: Include the issue assignee in the set of
           contributors
+        - active_days: Only include contributors if the issue has been updated
+          within the last `active_days` days. If 0, include contributors from
+          all issues.
 
     Returns:
         The set of contributors
     """
-    contributors = set()
+    contributors: set[User] = set()
     for child in issue.children:
         child_issue = issue_cache.get_issue(issue.client, child.key)
-        contributors.update(rollup_contributors(child_issue))
-    contributors.update(issue.contributors)
-    if include_assignee and issue.assignee is not None:
-        contributors.add(issue.assignee)
+        contributors.update(
+            rollup_contributors(child_issue, include_assignee, active_days)
+        )
+    if active_days == 0 or is_active(issue, active_days):
+        contributors.update(issue.contributors)
+        if include_assignee and issue.assignee is not None:
+            contributors.add(issue.assignee)
     return contributors
+
+
+def is_active(issue: Issue, within_days: int, recursive: bool = False) -> bool:
+    """
+    Determine if an issue is active.
+
+    An issue is considered active if it has been updated in the last
+    `within_days` days or carries the "active" label. Changes to certain fields
+    are ignored when determining the last update time.
+
+    Parameters:
+        - issue: The issue to check
+        - within_days: The number of days to consider as active
+        - recursive: If True, recursively check child issues
+
+    Returns:
+        True if the issue is active, False otherwise
+    """
+    excluded_fields = {"Jira Link", "Status Summary", "Test Link", "labels"}
+
+    if "active" in issue.labels:
+        _logger.debug("Issue %s is active: has the 'active' label", issue.key)
+        return True
+    for change in issue.changelog:
+        if change.created > datetime.now(UTC) - timedelta(days=within_days):
+            if any(chg.field not in excluded_fields for chg in change.changes):
+                _logger.debug(
+                    "Issue %s is active; [%s] changed on %s",
+                    issue.key,
+                    ",".join([chg.field for chg in change.changes]),
+                    change.created,
+                )
+                return True
+    if recursive:
+        for child in issue.children:
+            if is_active(
+                issue_cache.get_issue(issue.client, child.key), within_days, recursive
+            ):
+                _logger.debug(
+                    "Issue %s is active; because %s is active", issue.key, child.key
+                )
+                return True
+    _logger.debug("Issue %s is inactive", issue.key)
+    return False
+
+
+def active_children(issue: Issue, within_days: int, recursive: bool) -> set[Issue]:
+    """
+    Get the set of active child issues for an issue.
+
+    Parameters:
+        - issue: The issue to check
+        - within_days: The number of days to consider as active
+        - recursive: If True, recursively check entire child issue tree
+
+    Returns:
+        The set of active child issues
+    """
+    active = set()
+    for child in issue.children:
+        child_issue = issue_cache.get_issue(issue.client, child.key)
+        if is_active(child_issue, within_days):
+            active.add(child_issue)
+        if recursive:
+            active.update(active_children(child_issue, within_days, recursive))
+    return active