Skip to content

Commit

Permalink
Generate Atom feeds for updated apps
Browse files Browse the repository at this point in the history
Adds /pkgs/updates.atom and /apps/updates.atom for updated applications.
This is a basic implementation. In the future, the entries could contain
the application changelogs for example.

Some refractoring had to be done as well related to generating
download URLs, to avoid code duplication.
  • Loading branch information
HenkKalkwater committed Dec 23, 2023
1 parent f806e44 commit d70de9b
Show file tree
Hide file tree
Showing 8 changed files with 249 additions and 21 deletions.
176 changes: 176 additions & 0 deletions chumweb/atom_feed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
This package contains methods for writing Atom feeds
"""
from datetime import datetime
from typing import List, Optional, Iterable
from xml.dom.minidom import Document, Element

from chumweb import CONFIG
from chumweb.package import Package

# Reuse the namespace that the primary.xml.gz file uses
REPO_NS = "http://linux.duke.edu/metadata/common"


def create_atom_feed(public_url: str, title: str, updated: datetime) -> Document:
"""
Creates a basic Atom feed, with no entries
https://validator.w3.org/feed/docs/atom.html
:return: The created feed as an XML Document
"""
doc = Document()
feed = doc.createElementNS("http://www.w3.org/2005/Atom", "feed")
feed.setAttribute("xmlns", "http://www.w3.org/2005/Atom")
feed.setAttribute("xmlns:repo", "http://linux.duke.edu/metadata/common")
doc.appendChild(feed)

el_id = _create_simple_element(doc, "id", public_url)
feed.appendChild(el_id)

el_title = _create_simple_element(doc, "title", title)
feed.appendChild(el_title)

el_updated = _create_simple_element(doc, "updated", updated.isoformat())
feed.appendChild(el_updated)

el_icon = _create_simple_element(doc, "icon", CONFIG.public_url + "static/img/sailfishos-chum.png")
feed.appendChild(el_icon)

feed.appendChild(_create_link_el(doc, public_url, rel="self"))

return doc


def create_package_atom_feed(pkgs: List[Package], public_url: str, title: str) -> Document:
"""
Creates a Atom feed with packages
:return: An XML Document representing the feed
"""
doc = create_atom_feed(public_url, title, pkgs[0].updated)
feed = doc.getElementsByTagName("feed")[0]

for pkg in pkgs:
feed.appendChild(_create_pkg_entry(doc, pkg))

return doc


def _create_pkg_entry(doc: Document, pkg: Package) -> Element:
"""
Create a single entry for a package in an Atom feed
:param doc: The document where the elements should be created in
:param pkg: The package to create the entry for
:return: An element representing the package
"""
entry = doc.createElement("entry")

entry_id = _create_simple_element(doc, "id", CONFIG.public_url + pkg.web_url())
entry.appendChild(entry_id)

entry_updated = _create_simple_element(doc, "updated", pkg.updated.isoformat())
entry.appendChild(entry_updated)

entry_title = _create_simple_element(doc, "title", pkg.title)
entry.appendChild(entry_title)

entry_link = _create_link_el(doc, CONFIG.public_url + pkg.web_url())
entry.appendChild(entry_link)

entry_content_text = f"Package {pkg.name} was updated to version {pkg.version.to_short_str()}"
entry_content = _create_simple_element(doc, "content", entry_content_text, type="text")
entry.appendChild(entry_content)

# Add author names
author_names = []

if pkg.packager_name:
author_names += [pkg.packager_name]
if pkg.developer_name:
author_names += [pkg.developer_name]

for name in author_names:
entry_author = _create_simple_element(doc, "author")
entry_author_name = _create_simple_element(doc, "name", name)
entry_author.appendChild(entry_author_name)
entry.appendChild(entry_author)

# Add categories
for category in pkg.categories:
entry_category = _create_simple_element(doc, "category", term=category)
entry.appendChild(entry_category)

# Add download links for RPM files
for arch in pkg.archs:
download_size = pkg.download_size[arch]
entry_rpm_link = _create_link_el(doc, pkg.get_download_url(arch), rel="enclosure",
length=download_size,
title=f"{pkg.name}-{pkg.version.to_full_str()}-{arch}.rpm",
type="application/x-rpm")
entry.appendChild(entry_rpm_link)

# Add chum-related metadata
for arch in pkg.archs:
pkg_el = _create_simple_element(doc, "repo:package", ns=REPO_NS, type="rpm")

entry_chum_name = _create_simple_element(doc, "repo:name", pkg.name, REPO_NS)
pkg_el.appendChild(entry_chum_name)

entry_chum_arch = _create_simple_element(doc, "repo:arch", arch, ns=REPO_NS)
pkg_el.appendChild(entry_chum_arch)

entry_chum_version = _create_simple_element(doc, "repo:version", ns=REPO_NS,
epoch=pkg.version.epoch, ver=pkg.version.ver, rel=pkg.version.rel)
pkg_el.appendChild(entry_chum_version)

entry_chum_summary = _create_simple_element(doc, "repo:summary", pkg.summary, ns=REPO_NS)
pkg_el.appendChild(entry_chum_summary)

entry_chum_description = _create_simple_element(doc, "repo:description", pkg.description, ns=REPO_NS)
pkg_el.appendChild(entry_chum_description)

entry_chum_url = _create_simple_element(doc, "repo:url", pkg.url, ns=REPO_NS)
pkg_el.appendChild(entry_chum_url)

entry.appendChild(pkg_el)

return entry


def _create_simple_element(doc: Document, tag_name: str, content: Optional[str | Element | Iterable[Element]] = None,
ns: Optional[str] = None, **attrs) -> Element:
"""
Creates a XML tag with the given tag name, children and attributes
:param tag_name: The name of the tag
:param content: The content of the tag
:param attrs: The attributes to set on the tag
:return: The created tag
"""
if ns:
el = doc.createElementNS(ns, tag_name)
else:
el = doc.createElement(tag_name)

if content is None:
# Okay, do nothing
pass
elif type(content) is str:
el.appendChild(doc.createTextNode(content))
elif type(content) is Element:
el.appendChild(content)
elif type(content) is Iterable[Element]:
for child in content:
el.appendChild(child)
else:
assert False, "Unsupported content type: " + str(type(content))

for key, value in attrs.items():
el.setAttribute(key, value)

return el


def _create_link_el(doc: Document, href: str, **kwargs):
kwargs["href"] = href
return _create_simple_element(doc, "link", **kwargs)
4 changes: 4 additions & 0 deletions chumweb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@ class Config:
repo_data_dir: str | None = None
user_agent: str = "chumweb/1.0"
source_code_url: str = ""
# The amount of featured apps to show on the home page
featured_apps_count = 10
# The amount of updated apps to show in the sidebar
updated_apps_count = 6
# The amount of updated apps to put in the Atom feed
feed_updated_apps_count = 20


def init_config() -> Config:
Expand Down
32 changes: 28 additions & 4 deletions chumweb/package.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
"""
Data classes for package metadata. It is also responsible for parsing the metadate of a single package
"""
import logging
from dataclasses import dataclass, field
import enum
from datetime import datetime
from datetime import datetime, UTC
from enum import StrEnum
from types import NoneType
from typing import List, Dict, Self, Set
from typing import List, Dict, Self, Set, Optional

from markupsafe import Markup

from . import CONFIG
from .remote_image import RemoteImage

logger = logging.getLogger(__name__)

class PackageApplicationCategory(StrEnum):
"""
Expand Down Expand Up @@ -97,6 +100,7 @@ class Package:
debug_yaml_errors: List[Exception] = field(default_factory=list)
updated: datetime | None = field(default_factory=lambda: datetime.fromtimestamp(0))

repos: Set[str] = field(default_factory=set)
archs: Set[str] = field(default_factory=set)
download_size: Dict[str, int] = field(default_factory=dict)
install_size: Dict[str, int] = field(default_factory=dict)
Expand All @@ -105,7 +109,7 @@ class Package:
checksum_value: Dict[str, str] = field(default_factory=dict)

@staticmethod
def from_node(dom_element):
def from_node(dom_element, repo_arch: str):
"""
Creates a Package class instance from a `<package>` XML node `dom_element` as found in the primary.xml
metadata in RPM repositories.
Expand Down Expand Up @@ -210,13 +214,14 @@ def parse_description(description: str, name: str):
arch = try_get_str("arch")

p = Package(try_get_str("name"))
p.repos.add(repo_arch)
p.archs.add(arch)
p.summary = try_get_str("summary")
p.version = try_get_version()
p.url = try_get_str("url")
p.title = name_to_title(p.name)
p.licence = try_get_str("rpm:license")
p.updated = datetime.fromtimestamp(float(try_get_attribute_tags("time", "file")[0]))
p.updated = datetime.fromtimestamp(float(try_get_attribute_tags("time", "file")[0]), UTC)

p.download_size[arch], p.install_size[arch] = try_get_attribute_tags("size", "package", "installed")
p.download_url[arch] = try_get_attribute_tags("location", "href")[0]
Expand All @@ -239,6 +244,7 @@ def merge_arch(self, other_pkg: Self):
Adds the architecture-specific information from another package to this package
"""
for arch in other_pkg.archs:
self.repos = self.repos.union(other_pkg.repos)
self.download_size[arch] = other_pkg.download_size[arch]
self.install_size[arch] = other_pkg.install_size[arch]
self.download_url[arch] = other_pkg.download_url[arch]
Expand Down Expand Up @@ -266,6 +272,24 @@ def web_url(self):
else:
return f"pkgs/{self.name}/"

def get_download_url(self, arch: str) -> Optional[str]:
# noarch does not have a dedicated repository, use the first available arch I suppose
# This may be an idea in the category "not smart"
if arch == "noarch":
repo = next(self.repos.__iter__())
else:
for repo in self.repos:
repo_arch = repo.split("_")[1]
if repo_arch == arch:
break
else:
logger.warning(f"No repo found for architecture {arch} (package: {self.name})")
#assert False, f"No repo found for architecture {arch} (package: {self.name})"
return None

return f"{CONFIG.repo_url_prefix}{repo}/" + self.download_url[arch]


def caused_requests(self):
return type(self.markdown_url) == str

Expand Down
6 changes: 3 additions & 3 deletions chumweb/repo_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def load_repo(obs_url: str, obs_project: str, obs_auth: Tuple[str, str], repo_ur
for data_path, repo_name in zip(data_paths, repos):
step_progress(parse_step, repo_name, i, repo_count)
arch = repo_name.split("_")[1]
all_pkgs[arch] = read_repo_data(urljoin(repo_url, repo_name), data_path)
all_pkgs[arch] = read_repo_data(urljoin(repo_url, repo_name), data_path, repo_name)
link_debug_packages(all_pkgs[arch])
i += 1

Expand Down Expand Up @@ -171,15 +171,15 @@ def save_repo_data(repo_url: str, repo_name: str, out_dir: Path):
return primary_gz_path


def read_repo_data(repo_url, repo_info: Path) -> List[Package]:
def read_repo_data(repo_url, repo_info: Path, repo_name: str) -> List[Package]:
"""
Reads all package data from a `primary.xml.gz` file
"""
pkgs = []
with GzipFile(repo_info) as gz:
xml = minidom.parse(gz)
for xmlPkg in xml.getElementsByTagName("package"):
pkgs.append(Package.from_node(xmlPkg))
pkgs.append(Package.from_node(xmlPkg, repo_name))

return pkgs

Expand Down
Loading

0 comments on commit d70de9b

Please sign in to comment.