diff --git a/atlassian/confluence.py b/atlassian/confluence.py index 1c8648d11..741b16f7e 100644 --- a/atlassian/confluence.py +++ b/atlassian/confluence.py @@ -2657,6 +2657,103 @@ def get_page_as_word(self, page_id): url = "exportword?pageId={pageId}".format(pageId=page_id) return self.get(url, headers=headers, not_json_response=True) + def get_space_export(self, space_key: str, export_type: str) -> str: + """ + Export a Confluence space to a file of the specified type. + (!) This method was developed for Confluence Cloud and may not work with Confluence on-prem. + (!) This is an experimental method that does not trigger an officially supported REST endpoint. It may break if Atlassian changes the space export front-end logic. + + :param space_key: The key of the space to export. + :param export_type: The type of export to perform. Valid values are: 'html', 'csv', 'xml', 'pdf'. + :return: The URL to download the exported file. + """ + + def get_atl_request(url: str): + # Nested function used to get atl_token used for XSRF protection. this is only applicable to html/csv/xml space exports + try: + response = self.get(url, advanced_mode=True) + parsed_html = BeautifulSoup(response.text, "html.parser") + atl_token = parsed_html.find("input", {"name": "atl_token"}).get("value") + return atl_token + except Exception as e: + raise ApiError("Problems with getting the atl_token for get_space_export method :", reason=e) + + # Checks if space_ke parameter is valid and if api_token has relevant permissions to space + self.get_space(space_key=space_key, expand="permissions") + + try: + log.info( + "Initiated experimental get_space_export method for export type: " + + export_type + + " from Confluence space: " + + space_key + ) + if export_type == "csv": + form_data = { + "atl_token": get_atl_request(f"spaces/exportspacecsv.action?key={space_key}"), + "exportType": "TYPE_CSV", + "contentOption": "all", + "includeComments": "true", + "confirm": "Export", + } + elif export_type == "html": + form_data = { + "atl_token": get_atl_request(f"spaces/exportspacehtml.action?key={space_key}"), + "exportType": "TYPE_HTML", + "contentOption": "visibleOnly", + "includeComments": "true", + "confirm": "Export", + } + elif export_type == "xml": + form_data = { + "atl_token": get_atl_request(f"spaces/exportspacexml.action?key={space_key}"), + "exportType": "TYPE_XML", + "contentOption": "all", + "includeComments": "true", + "confirm": "Export", + } + elif export_type == "pdf": + url = "spaces/flyingpdf/doflyingpdf.action?key=" + space_key + log.info("Initiate PDF space export from space " + str(space_key)) + return self.get_pdf_download_url_for_confluence_cloud(url) + else: + raise ValueError("Invalid export_type parameter value. Valid values are: 'html/csv/xml/pdf'") + url = self.url_joiner(url=self.url, path=f"spaces/doexportspace.action?key={space_key}") + + # Sending a POST request that triggers the space export. + response = self.session.post(url, headers=self.form_token_headers, data=form_data) + parsed_html = BeautifulSoup(response.text, "html.parser") + # Getting the poll URL to get the export progress status + try: + poll_url = parsed_html.find("meta", {"name": "ajs-pollURI"}).get("content") + except Exception as e: + raise ApiError("Problems with getting the poll_url for get_space_export method :", reason=e) + running_task = True + while running_task: + try: + progress_response = self.get(poll_url) + log.info("Space" + space_key + " export status: " + progress_response["message"]) + if progress_response["complete"]: + parsed_html = BeautifulSoup(progress_response["message"], "html.parser") + download_url = parsed_html.find("a", {"class": "space-export-download-path"}).get("href") + if self.url in download_url: + return download_url + else: + combined_url = self.url + download_url + # Ensure only one /wiki is included in the path + if combined_url.count("/wiki") > 1: + combined_url = combined_url.replace("/wiki/wiki", "/wiki") + return combined_url + time.sleep(30) + except Exception as e: + raise ApiError( + "Encountered error during space export status check from space " + space_key, reason=e + ) + + return "None" # Return None if the while loop does not return a value + except Exception as e: + raise ApiError("Encountered error during space export from space " + space_key, reason=e) + def export_page(self, page_id): """ Alias method for export page as pdf @@ -2905,6 +3002,7 @@ def get_pdf_download_url_for_confluence_cloud(self, url): and provides a link to download the PDF once the process completes. This functions polls the long-running task page and returns the download url of the PDF. + This method is used in get_space_export() method for space-> PDF export. :param url: URL to initiate PDF export :return: Download url for PDF file """ diff --git a/docs/confluence.rst b/docs/confluence.rst index 11ce8ea42..1a76de071 100644 --- a/docs/confluence.rst +++ b/docs/confluence.rst @@ -240,6 +240,9 @@ Get spaces info # Get Space permissions set based on json-rpc call confluence.get_space_permissions(space_key) + # Get Space export download url + confluence.get_space_export(space_key, export_type) + Users and Groups ---------------- diff --git a/examples/confluence/confluence_get_space_export.py b/examples/confluence/confluence_get_space_export.py new file mode 100644 index 000000000..4dfa98fcf --- /dev/null +++ b/examples/confluence/confluence_get_space_export.py @@ -0,0 +1,25 @@ +from atlassian import Confluence + +# init the Confluence object +host = "" +username = "" +password = "" +confluence = Confluence( + url=host, + username=username, + password=password, +) +space_key = "TEST" +confluence.get_space_export(space_key=space_key, export_type="html") +# This method should be used to trigger the space export action. +# Provide `space_key` and `export_type` (html/pdf/xml/csv) as arguments. + +# It was tested on Confluence Cloud and might not work properly with Confluence on-prem. +# (!) This is an experimental method that should be considered a workaround for the missing space export REST endpoint. +# (!) The method might break if Atlassian implements changes to their space export front-end logic. + +# The while loop does not have an exit condition; it will run until the space export is completed. +# It is possible that the space export progress might get stuck. It is up to the library user to handle this scenario. + +# Method returns the link to the space export file. +# It is up to the library user to handle the file download action.