Generate Atom feeds for updated apps

Adds /pkgs/updates.atom and /apps/updates.atom for updated applications. This is a basic implementation. In the future, the entries could contain the application changelogs for example. Some refractoring had to be done as well related to generating download URLs, to avoid code duplication.
sailfishos-chum · Dec 23, 2023 · d70de9b · d70de9b
1 parent f806e44
commit d70de9b
Show file tree

Hide file tree

Showing 8 changed files with 249 additions and 21 deletions.
diff --git a/chumweb/atom_feed.py b/chumweb/atom_feed.py
@@ -0,0 +1,176 @@
+"""
+This package contains methods for writing Atom feeds
+"""
+from datetime import datetime
+from typing import List, Optional, Iterable
+from xml.dom.minidom import Document, Element
+
+from chumweb import CONFIG
+from chumweb.package import Package
+
+# Reuse the namespace that the primary.xml.gz file uses
+REPO_NS = "http://linux.duke.edu/metadata/common"
+
+
+def create_atom_feed(public_url: str, title: str, updated: datetime) -> Document:
+    """
+    Creates a basic Atom feed, with no entries
+
+    https://validator.w3.org/feed/docs/atom.html
+
+    :return: The created feed as an XML Document
+    """
+    doc = Document()
+    feed = doc.createElementNS("http://www.w3.org/2005/Atom", "feed")
+    feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
+    feed.setAttribute("xmlns:repo", "http://linux.duke.edu/metadata/common")
+    doc.appendChild(feed)
+
+    el_id = _create_simple_element(doc, "id", public_url)
+    feed.appendChild(el_id)
+
+    el_title = _create_simple_element(doc, "title", title)
+    feed.appendChild(el_title)
+
+    el_updated = _create_simple_element(doc, "updated", updated.isoformat())
+    feed.appendChild(el_updated)
+
+    el_icon = _create_simple_element(doc, "icon", CONFIG.public_url + "static/img/sailfishos-chum.png")
+    feed.appendChild(el_icon)
+
+    feed.appendChild(_create_link_el(doc, public_url, rel="self"))
+
+    return doc
+
+
+def create_package_atom_feed(pkgs: List[Package], public_url: str, title: str) -> Document:
+    """
+    Creates a Atom feed with packages
+    :return: An XML Document representing the feed
+    """
+    doc = create_atom_feed(public_url, title, pkgs[0].updated)
+    feed = doc.getElementsByTagName("feed")[0]
+
+    for pkg in pkgs:
+        feed.appendChild(_create_pkg_entry(doc, pkg))
+
+    return doc
+
+
+def _create_pkg_entry(doc: Document, pkg: Package) -> Element:
+    """
+    Create a single entry for a package in an Atom feed
+    :param doc: The document where the elements should be created in
+    :param pkg: The package to create the entry for
+    :return: An element representing the package
+    """
+    entry = doc.createElement("entry")
+
+    entry_id = _create_simple_element(doc, "id", CONFIG.public_url + pkg.web_url())
+    entry.appendChild(entry_id)
+
+    entry_updated = _create_simple_element(doc, "updated", pkg.updated.isoformat())
+    entry.appendChild(entry_updated)
+
+    entry_title = _create_simple_element(doc, "title", pkg.title)
+    entry.appendChild(entry_title)
+
+    entry_link = _create_link_el(doc, CONFIG.public_url + pkg.web_url())
+    entry.appendChild(entry_link)
+
+    entry_content_text = f"Package {pkg.name} was updated to version {pkg.version.to_short_str()}"
+    entry_content = _create_simple_element(doc, "content", entry_content_text, type="text")
+    entry.appendChild(entry_content)
+
+    # Add author names
+    author_names = []
+
+    if pkg.packager_name:
+        author_names += [pkg.packager_name]
+    if pkg.developer_name:
+        author_names += [pkg.developer_name]
+
+    for name in author_names:
+        entry_author = _create_simple_element(doc, "author")
+        entry_author_name = _create_simple_element(doc, "name", name)
+        entry_author.appendChild(entry_author_name)
+        entry.appendChild(entry_author)
+
+    # Add categories
+    for category in pkg.categories:
+        entry_category = _create_simple_element(doc, "category", term=category)
+        entry.appendChild(entry_category)
+
+    # Add download links for RPM files
+    for arch in pkg.archs:
+        download_size = pkg.download_size[arch]
+        entry_rpm_link = _create_link_el(doc, pkg.get_download_url(arch), rel="enclosure",
+                                         length=download_size,
+                                         title=f"{pkg.name}-{pkg.version.to_full_str()}-{arch}.rpm",
+                                         type="application/x-rpm")
+        entry.appendChild(entry_rpm_link)
+
+    # Add chum-related metadata
+    for arch in pkg.archs:
+        pkg_el = _create_simple_element(doc, "repo:package", ns=REPO_NS, type="rpm")
+
+        entry_chum_name = _create_simple_element(doc, "repo:name", pkg.name, REPO_NS)
+        pkg_el.appendChild(entry_chum_name)
+
+        entry_chum_arch = _create_simple_element(doc, "repo:arch", arch, ns=REPO_NS)
+        pkg_el.appendChild(entry_chum_arch)
+
+        entry_chum_version = _create_simple_element(doc, "repo:version", ns=REPO_NS,
+                                                    epoch=pkg.version.epoch, ver=pkg.version.ver, rel=pkg.version.rel)
+        pkg_el.appendChild(entry_chum_version)
+
+        entry_chum_summary = _create_simple_element(doc, "repo:summary", pkg.summary, ns=REPO_NS)
+        pkg_el.appendChild(entry_chum_summary)
+
+        entry_chum_description = _create_simple_element(doc, "repo:description", pkg.description, ns=REPO_NS)
+        pkg_el.appendChild(entry_chum_description)
+
+        entry_chum_url = _create_simple_element(doc, "repo:url", pkg.url, ns=REPO_NS)
+        pkg_el.appendChild(entry_chum_url)
+
+        entry.appendChild(pkg_el)
+
+    return entry
+
+
+def _create_simple_element(doc: Document, tag_name: str, content: Optional[str | Element | Iterable[Element]] = None,
+                           ns: Optional[str] = None, **attrs) -> Element:
+    """
+    Creates a XML tag with the given tag name, children and attributes
+    :param tag_name: The name of the tag
+    :param content: The content of the tag
+    :param attrs: The attributes to set on the tag
+    :return: The created tag
+    """
+    if ns:
+        el = doc.createElementNS(ns, tag_name)
+    else:
+        el = doc.createElement(tag_name)
+
+    if content is None:
+        # Okay, do nothing
+        pass
+    elif type(content) is str:
+        el.appendChild(doc.createTextNode(content))
+    elif type(content) is Element:
+        el.appendChild(content)
+    elif type(content) is Iterable[Element]:
+        for child in content:
+            el.appendChild(child)
+    else:
+        assert False, "Unsupported content type: " + str(type(content))
+
+    for key, value in attrs.items():
+        el.setAttribute(key, value)
+
+    return el
+
+
+def _create_link_el(doc: Document, href: str, **kwargs):
+    kwargs["href"] = href
+    return _create_simple_element(doc, "link", **kwargs)
diff --git a/chumweb/config.py b/chumweb/config.py
@@ -48,8 +48,12 @@ class Config:
     repo_data_dir: str | None = None
     user_agent: str = "chumweb/1.0"
     source_code_url: str = ""
+    # The amount of featured apps to show on the home page
     featured_apps_count = 10
+    # The amount of updated apps to show in the sidebar
     updated_apps_count = 6
+    # The amount of updated apps to put in the Atom feed
+    feed_updated_apps_count = 20
 
 
 def init_config() -> Config:

diff --git a/chumweb/package.py b/chumweb/package.py
@@ -1,17 +1,20 @@
 """
 Data classes for package metadata. It is also responsible for parsing the metadate of a single package
 """
+import logging
 from dataclasses import dataclass, field
 import enum
-from datetime import datetime
+from datetime import datetime, UTC
 from enum import StrEnum
 from types import NoneType
-from typing import List, Dict, Self, Set
+from typing import List, Dict, Self, Set, Optional
 
 from markupsafe import Markup
 
+from . import CONFIG
 from .remote_image import RemoteImage
 
+logger = logging.getLogger(__name__)
 
 class PackageApplicationCategory(StrEnum):
     """
@@ -97,6 +100,7 @@ class Package:
     debug_yaml_errors: List[Exception] = field(default_factory=list)
     updated: datetime | None = field(default_factory=lambda: datetime.fromtimestamp(0))
 
+    repos: Set[str] = field(default_factory=set)
     archs: Set[str] = field(default_factory=set)
     download_size: Dict[str, int] = field(default_factory=dict)
     install_size: Dict[str, int] = field(default_factory=dict)
@@ -105,7 +109,7 @@ class Package:
     checksum_value: Dict[str, str] = field(default_factory=dict)
 
     @staticmethod
-    def from_node(dom_element):
+    def from_node(dom_element, repo_arch: str):
         """
         Creates a Package class instance from a `<package>` XML node `dom_element` as found in the primary.xml
         metadata in RPM repositories.
@@ -210,13 +214,14 @@ def parse_description(description: str, name: str):
         arch = try_get_str("arch")
 
         p = Package(try_get_str("name"))
+        p.repos.add(repo_arch)
         p.archs.add(arch)
         p.summary = try_get_str("summary")
         p.version = try_get_version()
         p.url = try_get_str("url")
         p.title = name_to_title(p.name)
         p.licence = try_get_str("rpm:license")
-        p.updated = datetime.fromtimestamp(float(try_get_attribute_tags("time", "file")[0]))
+        p.updated = datetime.fromtimestamp(float(try_get_attribute_tags("time", "file")[0]), UTC)
 
         p.download_size[arch], p.install_size[arch] = try_get_attribute_tags("size", "package", "installed")
         p.download_url[arch] = try_get_attribute_tags("location", "href")[0]
@@ -239,6 +244,7 @@ def merge_arch(self, other_pkg: Self):
         Adds the architecture-specific information from another package to this package
         """
         for arch in other_pkg.archs:
+            self.repos = self.repos.union(other_pkg.repos)
             self.download_size[arch] = other_pkg.download_size[arch]
             self.install_size[arch] = other_pkg.install_size[arch]
             self.download_url[arch] = other_pkg.download_url[arch]
@@ -266,6 +272,24 @@ def web_url(self):
         else:
             return f"pkgs/{self.name}/"
 
+    def get_download_url(self, arch: str) -> Optional[str]:
+        # noarch does not have a dedicated repository, use the first available arch I suppose
+        # This may be an idea in the category "not smart"
+        if arch == "noarch":
+            repo = next(self.repos.__iter__())
+        else:
+            for repo in self.repos:
+                repo_arch = repo.split("_")[1]
+                if repo_arch == arch:
+                    break
+            else:
+                logger.warning(f"No repo found for architecture {arch} (package: {self.name})")
+                #assert False, f"No repo found for architecture {arch} (package: {self.name})"
+                return None
+
+        return f"{CONFIG.repo_url_prefix}{repo}/" + self.download_url[arch]
+
+
     def caused_requests(self):
         return type(self.markdown_url) == str
 

diff --git a/chumweb/repo_loader.py b/chumweb/repo_loader.py
@@ -115,7 +115,7 @@ def load_repo(obs_url: str, obs_project: str, obs_auth: Tuple[str, str], repo_ur
     for data_path, repo_name in zip(data_paths, repos):
         step_progress(parse_step, repo_name, i, repo_count)
         arch = repo_name.split("_")[1]
-        all_pkgs[arch] = read_repo_data(urljoin(repo_url, repo_name), data_path)
+        all_pkgs[arch] = read_repo_data(urljoin(repo_url, repo_name), data_path, repo_name)
         link_debug_packages(all_pkgs[arch])
         i += 1
 
@@ -171,15 +171,15 @@ def save_repo_data(repo_url: str, repo_name: str, out_dir: Path):
     return primary_gz_path
 
 
-def read_repo_data(repo_url, repo_info: Path) -> List[Package]:
+def read_repo_data(repo_url, repo_info: Path, repo_name: str) -> List[Package]:
     """
     Reads all package data from a `primary.xml.gz` file
     """
     pkgs = []
     with GzipFile(repo_info) as gz:
         xml = minidom.parse(gz)
         for xmlPkg in xml.getElementsByTagName("package"):
-            pkgs.append(Package.from_node(xmlPkg))
+            pkgs.append(Package.from_node(xmlPkg, repo_name))
 
     return pkgs