-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Updated ArchiveConfig to use single language (we'll only alow single-lang ZIMs for now) - New zimfarm module for ZF API communication - New endpoint /{project_id}/archives/{archive_id}/request to request a ZIM to be created by zimfarm. - generates collection.json based on files in project - uploads collection.json to S3 - calls zimfarm to create a dedicated, manual, schedule (passing a webhook url) - calls zimfarm to request a task for this schedule - calls zimfarm to delete schedule - records ZF task_id and status change in DB - New email sending capability via Mailgun API - DB Archive Model has new completed_on property - New endpoint /{project_id}/archives/{archive_id}/hook for the zimfarm to inform about status changes - records update in DB - sends notification emails with Templates - Lots of new configuration points (via environs) - Reorganized constants by feature - Reorganized some utils functions into utils modules - Updated alembic post-write hooks to use ruff instead of former isort
- Loading branch information
Showing
18 changed files
with
859 additions
and
143 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from uuid import UUID | ||
|
||
from sqlalchemy import select | ||
|
||
from api.database import Session as DBSession | ||
from api.database.models import File, Project | ||
|
||
|
||
def get_file_by_id(file_id: UUID) -> File: | ||
"""Get File instance by its id.""" | ||
with DBSession.begin() as session: | ||
stmt = select(File).where(File.id == file_id) | ||
file = session.execute(stmt).scalar() | ||
if not file: | ||
raise ValueError(f"File not found: {file_id}") | ||
session.expunge(file) | ||
return file | ||
|
||
|
||
def get_project_by_id(project_id: UUID) -> Project: | ||
"""Get Project instance by its id.""" | ||
with DBSession.begin() as session: | ||
stmt = select(Project).where(Project.id == project_id) | ||
project = session.execute(stmt).scalar() | ||
if not project: | ||
raise ValueError(f"Project not found: {project_id}") | ||
session.expunge(project) | ||
return project |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
from collections.abc import Iterable | ||
from pathlib import Path | ||
from typing import Any | ||
|
||
import humanfriendly | ||
import requests | ||
from jinja2 import Environment, FileSystemLoader, select_autoescape | ||
from werkzeug.datastructures import MultiDict | ||
|
||
from api.constants import constants, logger | ||
from api.database.models import Archive | ||
|
||
jinja_env = Environment( | ||
loader=FileSystemLoader("templates"), | ||
autoescape=select_autoescape(["html", "txt"]), | ||
) | ||
jinja_env.filters["short_id"] = lambda value: str(value)[:5] | ||
jinja_env.filters["format_size"] = lambda value: humanfriendly.format_size( | ||
value, binary=True | ||
) | ||
|
||
|
||
def send_email_via_mailgun( | ||
to: Iterable[str] | str, | ||
subject: str, | ||
contents: str, | ||
cc: Iterable[str] | None = None, | ||
bcc: Iterable[str] | None = None, | ||
attachments: Iterable[Path] | None = None, | ||
) -> str: | ||
if not constants.mailgun_api_url or not constants.mailgun_api_key: | ||
logger.warn(f"Mailgun not configured, ignoring email request to: {to!s}") | ||
return "" | ||
|
||
values = [ | ||
("from", constants.mailgun_from), | ||
("subject", subject), | ||
("html", contents), | ||
] | ||
|
||
values += [("to", list(to) if isinstance(to, Iterable) else [to])] | ||
values += [("cc", list(cc) if isinstance(cc, Iterable) else [cc])] | ||
values += [("bcc", list(bcc) if isinstance(bcc, Iterable) else [bcc])] | ||
data = MultiDict(values) | ||
|
||
try: | ||
resp = requests.post( | ||
url=f"{constants.mailgun_api_url}/messages", | ||
auth=("api", constants.mailgun_api_key), | ||
data=data, | ||
files=( | ||
[ | ||
("attachment", (fpath.name, open(fpath, "rb").read())) | ||
for fpath in attachments | ||
] | ||
if attachments | ||
else [] | ||
), | ||
timeout=constants.mailgun_request_timeout_sec, | ||
) | ||
resp.raise_for_status() | ||
except Exception as exc: | ||
logger.error(f"Failed to send mailgun notif: {exc}") | ||
logger.exception(exc) | ||
return resp.json().get("id") or resp.text | ||
|
||
|
||
def get_context(task: dict[str, Any], archive: Archive): | ||
"""Jinja context dict for email notifications""" | ||
return { | ||
"base_url": constants.public_url, | ||
"download_url": constants.download_url, | ||
"task": task, | ||
"archive": archive, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import hashlib | ||
from collections.abc import Iterator | ||
from pathlib import Path | ||
from typing import BinaryIO | ||
from uuid import UUID | ||
|
||
from api.constants import constants | ||
from api.database import get_local_fpath_for | ||
|
||
|
||
def calculate_file_size(file: BinaryIO) -> int: | ||
"""Calculate the size of a file chunk by chunk""" | ||
size = 0 | ||
for chunk in read_file_in_chunks(file): | ||
size += len(chunk) | ||
return size | ||
|
||
|
||
def read_file_in_chunks( | ||
reader: BinaryIO, chunk_size=constants.chunk_size | ||
) -> Iterator[bytes]: | ||
"""Read Big file chunk by chunk. Default chunk size is 2k""" | ||
while True: | ||
chunk = reader.read(chunk_size) | ||
if not chunk: | ||
break | ||
yield chunk | ||
reader.seek(0) | ||
|
||
|
||
def save_file(file: BinaryIO, file_name: str, project_id: UUID) -> Path: | ||
"""Saves a binary file to a specific location and returns its path.""" | ||
fpath = get_local_fpath_for(file_name, project_id) | ||
if not fpath.is_file(): | ||
with open(fpath, "wb") as file_object: | ||
for chunk in read_file_in_chunks(file): | ||
file_object.write(chunk) | ||
return fpath | ||
|
||
|
||
def generate_file_hash(file: BinaryIO) -> str: | ||
"""Generate sha256 hash of a file, optimized for large files""" | ||
hasher = hashlib.sha256() | ||
for chunk in read_file_in_chunks(file): | ||
hasher.update(chunk) | ||
return hasher.hexdigest() | ||
|
||
|
||
def normalize_filename(filename: str) -> str: | ||
"""filesystem (ext4,apfs,hfs+,ntfs,exfat) and S3 compliant filename""" | ||
|
||
normalized = str(filename) | ||
|
||
# we replace / with __ as it would have a meaning | ||
replacements = (("/", "__"),) | ||
for pattern, repl in replacements: | ||
normalized = filename.replace(pattern, repl) | ||
|
||
# other prohibited chars are removed (mostly for Windows context) | ||
removals = ["\\", ":", "*", "?", '"', "<", ">", "|"] + [ | ||
chr(idx) for idx in range(1, 32) | ||
] | ||
for char in removals: | ||
normalized.replace(char, "") | ||
|
||
# ext4/exfat has a 255B filename limit (s3 is 1KiB) | ||
return normalized.encode("utf-8")[:255].decode("utf-8") |
Oops, something went wrong.