From 0da67c5f00b20482d9b1b8e8d87c424b8397cf10 Mon Sep 17 00:00:00 2001 From: Chris Josten Date: Thu, 5 Dec 2024 23:06:36 +0100 Subject: [PATCH] Add package changelogs Adds code to load package changelogs from the repository information. All entries are then displayed on each individual app page and the most recent changelog entry is included in the RSS feed. --- chumweb/atom_feed.py | 3 ++ chumweb/package.py | 73 ++++++++++++++++++++----- chumweb/repo_loader.py | 81 +++++++++++++++++----------- chumweb/static_site_gen.py | 5 ++ chumweb/www/views/pages/package.html | 17 ++++++ 5 files changed, 134 insertions(+), 45 deletions(-) diff --git a/chumweb/atom_feed.py b/chumweb/atom_feed.py index db9d9c4..34ea70d 100644 --- a/chumweb/atom_feed.py +++ b/chumweb/atom_feed.py @@ -81,6 +81,9 @@ def _create_pkg_entry(doc: Document, pkg: Package) -> Element: entry.appendChild(entry_link) entry_content_text = f"Package {pkg.name} was updated to version {pkg.version.to_short_str()}" + if len(pkg.changelog_entries) > 0: + entry_content_text += ":\n\n" + pkg.changelog_entries[0].text + entry_content = _create_simple_element(doc, "content", entry_content_text, type="text") entry.appendChild(entry_content) diff --git a/chumweb/package.py b/chumweb/package.py index d4af859..7dee922 100644 --- a/chumweb/package.py +++ b/chumweb/package.py @@ -2,6 +2,7 @@ Data classes for package metadata; this also parses metadata of a single package. """ import logging +import re from dataclasses import dataclass, field import enum from datetime import datetime, UTC @@ -16,6 +17,28 @@ logger = logging.getLogger(__name__) + +def _try_get_str(dom_element, name) -> str | None: + """Return content of XML tag with `name` or None""" + try: + return dom_element.getElementsByTagName(name)[0].firstChild.nodeValue + except (IndexError, AttributeError): + return None + + +def _try_get_attribute_tags(dom_element, name, *args: str): + """Return a tuple of the given attributes from an XML tag""" + result = (()) + try: + el = dom_element.getElementsByTagName(name)[0] + + for attr in args: + result += (el.getAttribute(attr),) + + return result + except IndexError: + return tuple([None for _ in args]) + class PackageApplicationCategory(StrEnum): """ Application categories, for their specification(s) and references see entry "Categories:" in @@ -183,6 +206,39 @@ class PackageApplicationType(StrEnum): firmware = enum.auto() +AUTHOR_VERSION_REGEX = re.compile(r"(?P.*) *<(?P.*)>[ -]*(?P.*)") +@dataclass +class ChangelogEntry: + timestamp: datetime + author: str + email: str + version: str + text: str + + @staticmethod + def from_node(pkg_name: str, dom_element) -> List: + entries: List["ChangelogEntry"] = [] + + for entry in dom_element.getElementsByTagName("changelog"): + try: + text: str = entry.firstChild.nodeValue + author_and_version = entry.getAttribute("author") + timestamp = datetime.fromtimestamp(int(entry.getAttribute("date"))) + m = AUTHOR_VERSION_REGEX.fullmatch(author_and_version) + if m: + author, email, version = m.group("author", "email", "version") + else: + author = author_and_version + email = "" + version = "" + entries.append(ChangelogEntry(timestamp, author, email, version, text)) + except Exception as e: + logger.warning(f"Parsing failed for changelog entry from {pkg_name}", exc_info=e) + + # Changelog entries are found from old to new in the XML + entries.reverse() + return entries + @dataclass class PackageVersion: epoch: str @@ -236,6 +292,7 @@ class Package: download_url: Dict[str, str] = field(default_factory=dict) checksum_type: Dict[str, str] = field(default_factory=dict) checksum_value: Dict[str, str] = field(default_factory=dict) + changelog_entries: List[ChangelogEntry] = field(default_factory=list) @staticmethod def from_node(dom_element, repo_arch: str): @@ -246,22 +303,10 @@ def from_node(dom_element, repo_arch: str): def try_get_str(name) -> str | None: """Return content of XML tag with `name` or None""" - try: - return dom_element.getElementsByTagName(name)[0].firstChild.nodeValue - except (IndexError, AttributeError): - return None + return _try_get_str(dom_element, name) def try_get_attribute_tags(name, *args: str): - result = (()) - try: - el = dom_element.getElementsByTagName(name)[0] - - for attr in args: - result += (el.getAttribute(attr),) - - return result - except IndexError: - return tuple([None for _ in args]) + return _try_get_attribute_tags(dom_element, name, *args) def try_get_version(): """Parse version""" diff --git a/chumweb/repo_loader.py b/chumweb/repo_loader.py index 11a233b..795942e 100644 --- a/chumweb/repo_loader.py +++ b/chumweb/repo_loader.py @@ -6,11 +6,11 @@ from gzip import GzipFile from os import makedirs from pathlib import Path -from typing import List, TypedDict, NotRequired, Unpack, Tuple, Dict +from typing import List, TypedDict, NotRequired, Unpack, Tuple, Dict, Optional from urllib.parse import urljoin from . import CONFIG -from .package import Package +from .package import Package, ChangelogEntry import requests import xml.dom.minidom as minidom @@ -101,8 +101,9 @@ def load_repo(obs_url: str, obs_project: str, obs_auth: Tuple[str, str], repo_ur begin_step("Listing repos") repos = filter_newest_repos(list_obs_project_repos(obs_url, obs_project, obs_auth)) + data_paths: List[Dict[str, Path]] if "data_path" in kwargs: - data_paths = [Path(kwargs["data_path"], f"{repo}.xml.gz") for repo in repos] + data_paths = [{'primary': Path(kwargs["data_path"], f"{repo}.xml.gz")} for repo in repos] else: begin_step("Downloading repos") data_paths = [save_repo_data(urljoin(repo_url, repo_name + "/"), repo_name, out_dir) for repo_name in repos] @@ -123,7 +124,7 @@ def load_repo(obs_url: str, obs_project: str, obs_auth: Tuple[str, str], repo_ur begin_step("Combining repos") for arch, pkg_list in all_pkgs.items(): - for pkg in pkg_list: + for pkg in pkg_list.values(): if pkg.name in all_pkg_list: all_pkg_list[pkg.name].merge_arch(pkg) else: @@ -142,65 +143,83 @@ def save_repo_data(repo_url: str, repo_name: str, out_dir: Path): """ For a given a `repo_url`, find and download the `primary.xml.gz` file to `out_dir`. """ + + files_to_download = ["primary", "other"] + data_urls = {} + data_paths = {} + + def download_file(url: str, destination: Path): + """ + Downloads the file at `url` to the given `destination` + """ + with open(destination, "wb") as gzFile: + r = requests.get(url, headers=DEFAULT_HEADERS) + + for chunk in r.iter_content(8096): + gzFile.write(chunk) + makedirs(out_dir, exist_ok=True) r = requests.get(urljoin(repo_url, "repodata/repomd.xml"), headers=DEFAULT_HEADERS) xml = minidom.parseString(r.content) data_elements = xml.getElementsByTagName("data") - primary_url: str | None = None for dataElement in data_elements: - if dataElement.getAttribute("type") == "primary": + data_type = dataElement.getAttribute("type") + if data_type in files_to_download: locations = dataElement.getElementsByTagName("location") if len(locations) > 0: - primary_url = locations[0].getAttribute("href") - break + data_urls[data_type] = locations[0].getAttribute("href") - if not primary_url: + if "primary" not in data_urls: return - primary_url = urljoin(repo_url, primary_url) - primary_gz_path = out_dir.joinpath(f"{repo_name}.xml.gz") - - with open(primary_gz_path, "wb") as primaryGzFile: - r = requests.get(primary_url, headers=DEFAULT_HEADERS) + for (data_type, data_url) in data_urls.items(): + data_url = urljoin(repo_url, data_url) + data_path = out_dir.joinpath(f"{repo_name}-{data_type}.xml.gz") + download_file(data_url, data_path) + data_paths[data_type] = data_path - for chunk in r.iter_content(8096): - primaryGzFile.write(chunk) + return data_paths - return primary_gz_path - -def read_repo_data(repo_url, repo_info: Path, repo_name: str) -> List[Package]: +def read_repo_data(repo_url, repo_info: Dict[str, Path], repo_name: str) -> Dict[str, Package]: """ Read all package data from a `primary.xml.gz` file. """ - pkgs = [] - with GzipFile(repo_info) as gz: + pkgs = {} + with GzipFile(repo_info["primary"]) as gz: xml = minidom.parse(gz) for xmlPkg in xml.getElementsByTagName("package"): - pkgs.append(Package.from_node(xmlPkg, repo_name)) + pkg = Package.from_node(xmlPkg, repo_name) + pkgs[pkg.name] = pkg + + if "other" in repo_info: + with GzipFile(repo_info["other"]) as gz: + xml = minidom.parse(gz) + for xmlPkg in xml.getElementsByTagName("package"): + name = xmlPkg.getAttribute("name") + entries = ChangelogEntry.from_node(name, xmlPkg) + pkgs[name].changelog_entries = entries return pkgs -def link_debug_packages(pkgs: List[Package]) -> None: +def link_debug_packages(pkgs: Dict[str, Package]) -> None: """ Link debug packages to their corresponding package. - This method modifies given `pkgs` in-place. """ - list.sort(pkgs, key=lambda p: p.name) last_pkg = None - for pkg in pkgs: - if pkg.name.endswith("-debuginfo"): - last_pkg.debuginfo_package = pkg + for (name, pkg) in pkgs.items(): + if name.endswith("-debuginfo"): + base_name = name.removesuffix("-debuginfo") + pkgs[base_name].debuginfo_package = pkg pass elif pkg.name.endswith("-debugsource"): - last_pkg.debugsource_package = pkg - else: - last_pkg = pkg + base_name = name.removesuffix("-debugsource") + pkgs[base_name].debugsource_package = pkg def load_remote_descriptions(pkgs: List[Package], step: StepHandle): diff --git a/chumweb/static_site_gen.py b/chumweb/static_site_gen.py index cafa964..172dd5a 100644 --- a/chumweb/static_site_gen.py +++ b/chumweb/static_site_gen.py @@ -280,6 +280,7 @@ def create_package_page(pkg: Package): env.filters["paragraphise"] = _paragraphise_filter env.filters["fallback_icon"] = _fallback_icon_filter env.filters["format_datetime"] = _format_datetime + env.filters["format_date"] = _format_date env.filters["to_public_url"] = _to_absolute_url_filter step_progress(sitegen_step, "Generating static pages", 2, total_sitegen_steps) @@ -412,6 +413,10 @@ def _format_datetime(value: datetime, format_str=None): return value.strftime("%Y-%m-%d %H:%M:%S") +def _format_date(value: datetime): + return _format_datetime(value, format_str="%Y-%m-%d") + + def _to_absolute_url_filter(path: str) -> str: """ Resolves a path to an absolute URL based on the public URL in the configuration. diff --git a/chumweb/www/views/pages/package.html b/chumweb/www/views/pages/package.html index cec3b90..0d3b211 100644 --- a/chumweb/www/views/pages/package.html +++ b/chumweb/www/views/pages/package.html @@ -63,6 +63,23 @@

{{ pkg.title }}

{% endif %} + {% if pkg.changelog_entries|length > 0 %} +
+

Changelog

+

{{ pkg.changelog_entries[0].version }} ({{ pkg.changelog_entries[0].timestamp | format_date }})

+
{{ pkg.changelog_entries[0].text }}
+ {% if pkg.changelog_entries|length > 1 %} +
+

Older changelogs

+ {% for entry in pkg.changelog_entries[1:] %} +

{{ entry.version }} ({{ entry.timestamp | format_date }})

+
{{ entry.text }}
+ {% endfor %} +
+ {% endif %} +
+ + {% endif %}

App information