Skip to content

Commit

Permalink
Preserve HTML elements that are in the table cell as well
Browse files Browse the repository at this point in the history
Previous to this, we were just keeping the text, but we would lose
HTML elements (eg, lists and bold).

Now it works as you would expect.
  • Loading branch information
pcraig3 committed Dec 17, 2024
1 parent ce3c598 commit 05b8f93
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 7 deletions.
15 changes: 8 additions & 7 deletions bloom_nofos/nofos/nofo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,18 +1265,18 @@ def _convert_single_item_lists_to_paragraphs(soup, cell):
This function checks if a given table cell (`<td>` or `<th>`) contains exactly one child,
which must be a list (`<ul>` or `<ol>`) with only one list item (`<li>`). If such a list
is found, it is replaced with a `<p>` tag containing the same text content as the list item.
is found, it is replaced with a `<p>` tag containing the same HTML content as the list item.
Modifies:
The `cell` object is modified in place. If a matching single-item list is found,
it is replaced with a paragraph (`<p>`).
Example:
Input:
<td><ul><li>List item</li></ul></td>
<td><ul><li><strong>Bold</strong> list item</li></ul></td>
Output:
<td><p>List item</p></td>
<td><p><strong>Bold</strong> list item</p></td>
"""
cell_children = list(cell.children)
if (
Expand All @@ -1285,12 +1285,13 @@ def _convert_single_item_lists_to_paragraphs(soup, cell):
and len(cell_children[0].find_all("li")) == 1
):
# Extract the text content of the single list item
text = cell_children[0].get_text(strip=True)
# Remove the list and replace with a paragraph
cell_children[0].decompose()
li = cell_children[0].find("li", recursive=False)

# Replace with a paragraph, then remove the list
new_paragraph = soup.new_tag("p")
new_paragraph.string = text
new_paragraph.extend(li.contents)
cell.append(new_paragraph)
cell_children[0].decompose()


def clean_table_cells(soup):
Expand Down
9 changes: 9 additions & 0 deletions bloom_nofos/nofos/test_nofo.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,15 @@ def test_replace_single_item_ul_in_table_cell(self):
str(soup), "<table><tr><td><p>UL list item 1</p></td></tr></table>"
)

def test_replace_single_item_ul_in_table_cell_with_other_tags(self):
html = '<table><tr><td><ul><li>UL list item 1 with <a href="https://google.com">a link</a> and <strong>bold formatting</strong></li></ul></td></tr></table>'
soup = BeautifulSoup(html, "html.parser")
clean_table_cells(soup)
self.assertEqual(
str(soup),
'<table><tr><td><p>UL list item 1 with <a href="https://google.com">a link</a> and <strong>bold formatting</strong></p></td></tr></table>',
)

def test_replace_single_item_ol_in_table_cell(self):
html = "<table><tr><td><ol><li>OL list item 1</li></ol></td></tr></table>"
soup = BeautifulSoup(html, "html.parser")
Expand Down

0 comments on commit 05b8f93

Please sign in to comment.