Skip to content

Commit

Permalink
[Confluence] new method added confluence_get_tables_from_page +requir…
Browse files Browse the repository at this point in the history
…ements.txt
  • Loading branch information
gkowalc committed Dec 12, 2023
1 parent 3e1ef05 commit 760717f
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 0 deletions.
41 changes: 41 additions & 0 deletions atlassian/confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from requests import HTTPError
import requests
from deprecated import deprecated
from bs4 import BeautifulSoup
from atlassian import utils
from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable
from .rest_client import AtlassianRestAPI
Expand Down Expand Up @@ -356,6 +357,46 @@ def get_page_by_id(self, page_id, expand=None, status=None, version=None):

return response

def get_tables_from_page(self, page_id):
"""
Fetches html tables added to confluence page
:param page_id: integer confluence page_id
:return: json object with page_id, number_of_tables_in_page and list of list tables_content representing scrapepd tables
"""
try:
page_content = self.get_page_by_id(page_id, expand="body.storage")["body"]["storage"]["value"]

if page_content:
tables_raw = [
[[cell.text for cell in row("th") + row("td")] for row in table("tr")]
for table in BeautifulSoup(page_content, features="lxml")("table")
]
if len(tables_raw) > 0:
return json.dumps(
{
"page_id": page_id,
"number_of_tables_in_page": len(tables_raw),
"tables_content": tables_raw,
}
)
else:
return {
"No tables found for page: ": page_id,
}
else:
return {"Page content is empty"}
except HTTPError as e:
if e.response.status_code == 404:
# Raise ApiError as the documented reason is ambiguous
log.error("Couldn't retrieve tables from page", page_id)
raise ApiError(
"There is no content with the given pageid, pageid params is not an integer "
"or the calling user does not have permission to view the page",
reason=e,
)
except Exception as e:
log.error("Error occured", e)

def get_page_labels(self, page_id, prefix=None, start=None, limit=None):
"""
Returns the list of labels on a piece of Content.
Expand Down
3 changes: 3 additions & 0 deletions docs/confluence.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ Page actions
# Add comment into page
confluence.add_comment(page_id, text)
# Fetch tables from Confluence page
confluence.get_page_tables(page_id)
Template actions
----------------

Expand Down
17 changes: 17 additions & 0 deletions examples/confluence/confluence_get_tables_from_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from atlassian import Confluence
import logging

confluence = Confluence(
url="<instance_url>",
username="<user_enamil>",
password="api_key",
)
page_id = 393464
logging.basicConfig(level=logging.INFO)
# Page_id is the page id of the page you want to get the tables from.

result = confluence.get_tables_from_page(page_id)
print(result)
# Let's say page has two table, each one has 3 columns and 2 rows'
# Method should return following output: {"page_id": 393464, "number_of_tables_in_page": 2, "tables_content": [[["header1", "header2", "header3"], ["h1r1", "h2r1", "h3r1"], ["h1r2", "h2r2", "h3r2"]], [["table2 header1", "table2 header2", "table2 header3"], ["h1r1", "h2r1", "h3r1"], ["h1r2", "h2r2", "h3r2"]]]}
# tables_content is a list of lists of lists. Each nested list represents a table. Each nested list inside a table represents a row.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ six
oauthlib
requests_oauthlib
requests-kerberos==0.14.0
bs4
lxml
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ deps =
pytest-cov
coverage
requests
bs4
commands =
coverage erase
pytest -v --cov=atlassian --cov-branch --cov-report=xml
Expand Down

0 comments on commit 760717f

Please sign in to comment.