Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved download_attachments_from_page: added filename filter for si… #1476

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 89 additions & 34 deletions atlassian/confluence.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
# coding=utf-8
import io
import json
import logging
import os
import time
import json
import re
from requests import HTTPError
import requests
from deprecated import deprecated
import time

from bs4 import BeautifulSoup
from deprecated import deprecated
import requests
from requests import HTTPError

from atlassian import utils
from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable
from .errors import (
ApiConflictError,
ApiError,
ApiNotAcceptable,
ApiNotFoundError,
ApiPermissionError,
ApiValueError,
)
from .rest_client import AtlassianRestAPI

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -1390,40 +1400,85 @@ def attach_file(
comment=comment,
)

def download_attachments_from_page(self, page_id, path=None, start=0, limit=50):
"""
Downloads all attachments from a page
:param page_id:
:param path: OPTIONAL: path to directory where attachments will be saved. If None, current working directory will be used.
:param start: OPTIONAL: The start point of the collection to return. Default: None (0).
:param limit: OPTIONAL: The limit of the number of attachments to return, this may be restricted by
fixed system limits. Default: 50
:return info message: number of saved attachments + path to directory where attachments were saved:
"""
if path is None:
def download_attachments_from_page(self, page_id, path=None, start=0, limit=50, filename=None, to_memory=False):
"""
Downloads attachments from a Confluence page. Supports downloading all files or a specific file.
Files can either be saved to disk or returned as BytesIO objects for in-memory handling.

:param page_id: str
The ID of the Confluence page to fetch attachments from.
:param path: str, optional
Directory where attachments will be saved. If None, defaults to the current working directory.
Ignored if `to_memory` is True.
:param start: int, optional
The start point for paginated attachment fetching. Default is 0. Ignored if `filename` is specified.
:param limit: int, optional
The maximum number of attachments to fetch per request. Default is 50. Ignored if `filename` is specified.
:param filename: str, optional
The name of a specific file to download. If provided, only this file will be fetched.
:param to_memory: bool, optional
If True, attachments are returned as a dictionary of {filename: BytesIO object}.
If False, files are written to the specified directory on disk.
:return:
- If `to_memory` is True, returns a dictionary {filename: BytesIO object}.
- If `to_memory` is False, returns a summary dict: {"attachments_downloaded": int, "path": str}.
:raises:
- FileNotFoundError: If the specified path does not exist.
- PermissionError: If there are permission issues with the specified path.
- requests.HTTPError: If the HTTP request to fetch an attachment fails.
- Exception: For any unexpected errors.
"""
# Default path to current working directory if not provided
if not to_memory and path is None:
path = os.getcwd()

try:
attachments = self.get_attachments_from_content(page_id=page_id, start=start, limit=limit)["results"]
if not attachments:
return "No attachments found"
# Fetch attachments based on the specified parameters
if filename:
# Fetch specific file by filename
attachments = self.get_attachments_from_content(page_id=page_id, filename=filename)["results"]
if not attachments:
return f"No attachment with filename '{filename}' found on the page."
else:
# Fetch all attachments with pagination
attachments = self.get_attachments_from_content(page_id=page_id, start=start, limit=limit)["results"]
if not attachments:
return "No attachments found on the page."

# Prepare to handle downloads
downloaded_files = {}
for attachment in attachments:
file_name = attachment["title"]
if not file_name:
file_name = attachment["id"] # if the attachment has no title, use attachment_id as a filename
file_name = attachment["title"] or attachment["id"] # Use attachment ID if title is unavailable
download_link = self.url + attachment["_links"]["download"]
r = self._session.get(download_link)
file_path = os.path.join(path, file_name)
with open(file_path, "wb") as f:
f.write(r.content)

# Fetch the file content
response = self._session.get(download_link)
response.raise_for_status() # Raise error if request fails

if to_memory:
# Store in BytesIO object
file_obj = io.BytesIO(response.content)
downloaded_files[file_name] = file_obj
else:
# Save file to disk
file_path = os.path.join(path, file_name)
with open(file_path, "wb") as file:
file.write(response.content)

# Return results based on storage mode
if to_memory:
return downloaded_files
else:
return {"attachments_downloaded": len(attachments), "path": path}

except NotADirectoryError:
raise NotADirectoryError("Verify if directory path is correct and/or if directory exists")
raise FileNotFoundError(f"The directory '{path}' does not exist.")
except PermissionError:
raise PermissionError(
"Directory found, but there is a problem with saving file to this directory. Check directory permissions"
)
except Exception as e:
raise e
return {"attachments downloaded": len(attachments), " to path ": path}
raise PermissionError(f"Permission denied when trying to save files to '{path}'.")
except requests.HTTPError as http_err:
raise Exception(f"HTTP error occurred while downloading attachments: {http_err}")
except Exception as err:
raise Exception(f"An unexpected error occurred: {err}")

def delete_attachment(self, page_id, filename, version=None):
"""
Expand Down
Loading