Skip to content

Commit

Permalink
Add package changelogs
Browse files Browse the repository at this point in the history
Adds code to load package changelogs from the repository information.
All entries are then displayed on each individual app page and the most
recent changelog entry is included in the RSS feed.
  • Loading branch information
HenkKalkwater committed Dec 5, 2024
1 parent 43378ff commit 0da67c5
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 45 deletions.
3 changes: 3 additions & 0 deletions chumweb/atom_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def _create_pkg_entry(doc: Document, pkg: Package) -> Element:
entry.appendChild(entry_link)

entry_content_text = f"Package {pkg.name} was updated to version {pkg.version.to_short_str()}"
if len(pkg.changelog_entries) > 0:
entry_content_text += ":\n\n" + pkg.changelog_entries[0].text

entry_content = _create_simple_element(doc, "content", entry_content_text, type="text")
entry.appendChild(entry_content)

Expand Down
73 changes: 59 additions & 14 deletions chumweb/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Data classes for package metadata; this also parses metadata of a single package.
"""
import logging
import re
from dataclasses import dataclass, field
import enum
from datetime import datetime, UTC
Expand All @@ -16,6 +17,28 @@

logger = logging.getLogger(__name__)


def _try_get_str(dom_element, name) -> str | None:
"""Return content of XML tag with `name` or None"""
try:
return dom_element.getElementsByTagName(name)[0].firstChild.nodeValue
except (IndexError, AttributeError):
return None


def _try_get_attribute_tags(dom_element, name, *args: str):
"""Return a tuple of the given attributes from an XML tag"""
result = (())
try:
el = dom_element.getElementsByTagName(name)[0]

for attr in args:
result += (el.getAttribute(attr),)

return result
except IndexError:
return tuple([None for _ in args])

class PackageApplicationCategory(StrEnum):
"""
Application categories, for their specification(s) and references see entry "Categories:" in
Expand Down Expand Up @@ -183,6 +206,39 @@ class PackageApplicationType(StrEnum):
firmware = enum.auto()


AUTHOR_VERSION_REGEX = re.compile(r"(?P<author>.*) *<(?P<email>.*)>[ -]*(?P<version>.*)")
@dataclass
class ChangelogEntry:
timestamp: datetime
author: str
email: str
version: str
text: str

@staticmethod
def from_node(pkg_name: str, dom_element) -> List:
entries: List["ChangelogEntry"] = []

for entry in dom_element.getElementsByTagName("changelog"):
try:
text: str = entry.firstChild.nodeValue
author_and_version = entry.getAttribute("author")
timestamp = datetime.fromtimestamp(int(entry.getAttribute("date")))
m = AUTHOR_VERSION_REGEX.fullmatch(author_and_version)
if m:
author, email, version = m.group("author", "email", "version")
else:
author = author_and_version
email = ""
version = ""
entries.append(ChangelogEntry(timestamp, author, email, version, text))
except Exception as e:
logger.warning(f"Parsing failed for changelog entry from {pkg_name}", exc_info=e)

# Changelog entries are found from old to new in the XML
entries.reverse()
return entries

@dataclass
class PackageVersion:
epoch: str
Expand Down Expand Up @@ -236,6 +292,7 @@ class Package:
download_url: Dict[str, str] = field(default_factory=dict)
checksum_type: Dict[str, str] = field(default_factory=dict)
checksum_value: Dict[str, str] = field(default_factory=dict)
changelog_entries: List[ChangelogEntry] = field(default_factory=list)

@staticmethod
def from_node(dom_element, repo_arch: str):
Expand All @@ -246,22 +303,10 @@ def from_node(dom_element, repo_arch: str):

def try_get_str(name) -> str | None:
"""Return content of XML tag with `name` or None"""
try:
return dom_element.getElementsByTagName(name)[0].firstChild.nodeValue
except (IndexError, AttributeError):
return None
return _try_get_str(dom_element, name)

def try_get_attribute_tags(name, *args: str):
result = (())
try:
el = dom_element.getElementsByTagName(name)[0]

for attr in args:
result += (el.getAttribute(attr),)

return result
except IndexError:
return tuple([None for _ in args])
return _try_get_attribute_tags(dom_element, name, *args)

def try_get_version():
"""Parse version"""
Expand Down
81 changes: 50 additions & 31 deletions chumweb/repo_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from gzip import GzipFile
from os import makedirs
from pathlib import Path
from typing import List, TypedDict, NotRequired, Unpack, Tuple, Dict
from typing import List, TypedDict, NotRequired, Unpack, Tuple, Dict, Optional
from urllib.parse import urljoin

from . import CONFIG
from .package import Package
from .package import Package, ChangelogEntry

import requests
import xml.dom.minidom as minidom
Expand Down Expand Up @@ -101,8 +101,9 @@ def load_repo(obs_url: str, obs_project: str, obs_auth: Tuple[str, str], repo_ur
begin_step("Listing repos")
repos = filter_newest_repos(list_obs_project_repos(obs_url, obs_project, obs_auth))

data_paths: List[Dict[str, Path]]
if "data_path" in kwargs:
data_paths = [Path(kwargs["data_path"], f"{repo}.xml.gz") for repo in repos]
data_paths = [{'primary': Path(kwargs["data_path"], f"{repo}.xml.gz")} for repo in repos]
else:
begin_step("Downloading repos")
data_paths = [save_repo_data(urljoin(repo_url, repo_name + "/"), repo_name, out_dir) for repo_name in repos]
Expand All @@ -123,7 +124,7 @@ def load_repo(obs_url: str, obs_project: str, obs_auth: Tuple[str, str], repo_ur

begin_step("Combining repos")
for arch, pkg_list in all_pkgs.items():
for pkg in pkg_list:
for pkg in pkg_list.values():
if pkg.name in all_pkg_list:
all_pkg_list[pkg.name].merge_arch(pkg)
else:
Expand All @@ -142,65 +143,83 @@ def save_repo_data(repo_url: str, repo_name: str, out_dir: Path):
"""
For a given a `repo_url`, find and download the `primary.xml.gz` file to `out_dir`.
"""

files_to_download = ["primary", "other"]
data_urls = {}
data_paths = {}

def download_file(url: str, destination: Path):
"""
Downloads the file at `url` to the given `destination`
"""
with open(destination, "wb") as gzFile:
r = requests.get(url, headers=DEFAULT_HEADERS)

for chunk in r.iter_content(8096):
gzFile.write(chunk)

makedirs(out_dir, exist_ok=True)
r = requests.get(urljoin(repo_url, "repodata/repomd.xml"), headers=DEFAULT_HEADERS)
xml = minidom.parseString(r.content)

data_elements = xml.getElementsByTagName("data")

primary_url: str | None = None
for dataElement in data_elements:
if dataElement.getAttribute("type") == "primary":
data_type = dataElement.getAttribute("type")
if data_type in files_to_download:
locations = dataElement.getElementsByTagName("location")
if len(locations) > 0:
primary_url = locations[0].getAttribute("href")
break
data_urls[data_type] = locations[0].getAttribute("href")

if not primary_url:
if "primary" not in data_urls:
return

primary_url = urljoin(repo_url, primary_url)
primary_gz_path = out_dir.joinpath(f"{repo_name}.xml.gz")

with open(primary_gz_path, "wb") as primaryGzFile:
r = requests.get(primary_url, headers=DEFAULT_HEADERS)
for (data_type, data_url) in data_urls.items():
data_url = urljoin(repo_url, data_url)
data_path = out_dir.joinpath(f"{repo_name}-{data_type}.xml.gz")
download_file(data_url, data_path)
data_paths[data_type] = data_path

for chunk in r.iter_content(8096):
primaryGzFile.write(chunk)
return data_paths

return primary_gz_path


def read_repo_data(repo_url, repo_info: Path, repo_name: str) -> List[Package]:
def read_repo_data(repo_url, repo_info: Dict[str, Path], repo_name: str) -> Dict[str, Package]:
"""
Read all package data from a `primary.xml.gz` file.
"""
pkgs = []
with GzipFile(repo_info) as gz:
pkgs = {}
with GzipFile(repo_info["primary"]) as gz:
xml = minidom.parse(gz)
for xmlPkg in xml.getElementsByTagName("package"):
pkgs.append(Package.from_node(xmlPkg, repo_name))
pkg = Package.from_node(xmlPkg, repo_name)
pkgs[pkg.name] = pkg

if "other" in repo_info:
with GzipFile(repo_info["other"]) as gz:
xml = minidom.parse(gz)
for xmlPkg in xml.getElementsByTagName("package"):
name = xmlPkg.getAttribute("name")
entries = ChangelogEntry.from_node(name, xmlPkg)
pkgs[name].changelog_entries = entries

return pkgs


def link_debug_packages(pkgs: List[Package]) -> None:
def link_debug_packages(pkgs: Dict[str, Package]) -> None:
"""
Link debug packages to their corresponding package.
This method modifies given `pkgs` in-place.
"""

list.sort(pkgs, key=lambda p: p.name)
last_pkg = None

for pkg in pkgs:
if pkg.name.endswith("-debuginfo"):
last_pkg.debuginfo_package = pkg
for (name, pkg) in pkgs.items():
if name.endswith("-debuginfo"):
base_name = name.removesuffix("-debuginfo")
pkgs[base_name].debuginfo_package = pkg
pass
elif pkg.name.endswith("-debugsource"):
last_pkg.debugsource_package = pkg
else:
last_pkg = pkg
base_name = name.removesuffix("-debugsource")
pkgs[base_name].debugsource_package = pkg


def load_remote_descriptions(pkgs: List[Package], step: StepHandle):
Expand Down
5 changes: 5 additions & 0 deletions chumweb/static_site_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def create_package_page(pkg: Package):
env.filters["paragraphise"] = _paragraphise_filter
env.filters["fallback_icon"] = _fallback_icon_filter
env.filters["format_datetime"] = _format_datetime
env.filters["format_date"] = _format_date
env.filters["to_public_url"] = _to_absolute_url_filter

step_progress(sitegen_step, "Generating static pages", 2, total_sitegen_steps)
Expand Down Expand Up @@ -412,6 +413,10 @@ def _format_datetime(value: datetime, format_str=None):
return value.strftime("%Y-%m-%d %H:%M:%S")


def _format_date(value: datetime):
return _format_datetime(value, format_str="%Y-%m-%d")


def _to_absolute_url_filter(path: str) -> str:
"""
Resolves a path to an absolute URL based on the public URL in the configuration.
Expand Down
17 changes: 17 additions & 0 deletions chumweb/www/views/pages/package.html
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ <h1 class="pkg-title">{{ pkg.title }} </h1>
</details>
{% endif %}
</section>
{% if pkg.changelog_entries|length > 0 %}
<section>
<header><h2>Changelog</h2></header>
<h3>{{ pkg.changelog_entries[0].version }} ({{ pkg.changelog_entries[0].timestamp | format_date }})</h3>
<pre>{{ pkg.changelog_entries[0].text }}</pre>
{% if pkg.changelog_entries|length > 1 %}
<details>
<summary><h3 style="display: inline-block; margin: 0.5em 0;">Older changelogs</h3></summary>
{% for entry in pkg.changelog_entries[1:] %}
<h4>{{ entry.version }} ({{ entry.timestamp | format_date }})</h4>
<pre>{{ entry.text }}</pre>
{% endfor %}
</details>
{% endif %}
</section>

{% endif %}
<section>
<header><h2>App information</h2></header>
<dl>
Expand Down

0 comments on commit 0da67c5

Please sign in to comment.