Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert single-item lists in table cells into paragraphs #119

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ The format is based on Keep a Changelog, and this project adheres to Semantic Ve

### Changed

- Convert single-item lists to paragraphs inside of table cells

### Fixed

## [1.40.0] - 2023-12-16
Expand Down
39 changes: 39 additions & 0 deletions bloom_nofos/nofos/nofo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,43 @@ def unwrap_empty_elements(soup):
el.unwrap()


def _convert_single_item_lists_to_paragraphs(soup, cell):
"""
This function mutates the soup!

Converts single-item lists in a table cell to paragraphs for improved accessibility.

This function checks if a given table cell (`<td>` or `<th>`) contains exactly one child,
which must be a list (`<ul>` or `<ol>`) with only one list item (`<li>`). If such a list
is found, it is replaced with a `<p>` tag containing the same HTML content as the list item.

Modifies:
The `cell` object is modified in place. If a matching single-item list is found,
it is replaced with a paragraph (`<p>`).

Example:
Input:
<td><ul><li><strong>Bold</strong> list item</li></ul></td>

Output:
<td><p><strong>Bold</strong> list item</p></td>
"""
cell_children = list(cell.children)
if (
len(cell_children) == 1
and cell_children[0].name in ["ul", "ol"]
and len(cell_children[0].find_all("li")) == 1
):
# Extract the text content of the single list item
li = cell_children[0].find("li", recursive=False)

# Replace with a paragraph, then remove the list
new_paragraph = soup.new_tag("p")
new_paragraph.extend(li.contents)
cell.append(new_paragraph)
cell_children[0].decompose()


def clean_table_cells(soup):
"""
This function mutates the soup!
Expand Down Expand Up @@ -1287,6 +1324,8 @@ def clean_table_cells(soup):
for span in cell.find_all("span"):
span.unwrap()

_convert_single_item_lists_to_paragraphs(soup, cell)


def replace_src_for_inline_images(soup):
"""
Expand Down
71 changes: 70 additions & 1 deletion bloom_nofos/nofos/test_nofo.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_no_replacements_needed(self):
self.assertEqual(replace_chars(test_string), test_string)


class TestsCleanTableCells(TestCase):
class TestsCleanTableCellsSpans(TestCase):
def test_remove_span_keep_content(self):
html = "<table><tr><td><span>Content</span> and more <span>content</span></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
Expand Down Expand Up @@ -155,6 +155,75 @@ def test_multiple_spans_in_cell(self):
self.assertEqual(soup.td.text, "FirstSecond")


class TestsCleanTableCellsLists(TestCase):
def test_replace_single_item_ul_in_table_cell(self):
html = "<table><tr><td><ul><li>UL list item 1</li></ul></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(
str(soup), "<table><tr><td><p>UL list item 1</p></td></tr></table>"
)

def test_replace_single_item_ul_in_table_cell_with_other_tags(self):
html = '<table><tr><td><ul><li>UL list item 1 with <a href="https://google.com">a link</a> and <strong>bold formatting</strong></li></ul></td></tr></table>'
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(
str(soup),
'<table><tr><td><p>UL list item 1 with <a href="https://google.com">a link</a> and <strong>bold formatting</strong></p></td></tr></table>',
)

def test_replace_single_item_ol_in_table_cell(self):
html = "<table><tr><td><ol><li>OL list item 1</li></ol></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(
str(soup), "<table><tr><td><p>OL list item 1</p></td></tr></table>"
)

def test_NO_replace_multiple_item_ul_in_table_cell(self):
html = "<table><tr><td><ul><li>UL list item 1</li><li>UL list item 2</li></ul></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)

def test_NO_replace_multiple_item_ol_in_table_cell(self):
html = "<table><tr><td><ol><li>OL list item 1</li><li>OL list item 2</li></ol></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)

def test_NO_replace_single_item_ul_in_table_cell_if_element_before(self):
html = "<table><tr><td><p>Paragraph 1 before ul</p><ul><li>UL list item 1</li></ul></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)

def test_NO_replace_single_item_ul_in_table_cell_if_element_after(self):
html = "<table><tr><td><ul><li>UL list item 1</li></ul><p>Paragraph 1 after ul</p></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)

def test_NO_replace_2_single_item_uls_in_table_cell(self):
html = "<table><tr><td><ul><li>UL list 1 item 1</li></ul><ul><li>UL list 2 item 1</li></ul></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)

def test_NO_replace_2_single_item_ols_in_table_cell(self):
html = "<table><tr><td><ol><li>OL list 1 item 1</li></ol><ol><li>OL list 2 item 1</li></ol></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)

def test_NO_replace_single_item_ul_and_single_item_ol_in_table_cell(self):
html = "<table><tr><td><ul><li>UL list 1 item 1</li></ul><ol><li>OL list 1 item 1</li></ol></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(str(soup), html)


class TableConvertFirstRowToHeaderRowTests(TestCase):
def setUp(self):
self.caption_text = "Physician Assistant Training Chart"
Expand Down
Loading