Skip to content

Commit

Permalink
修复单元格占多行又占多列导出报错的问题。issues:导出表格标注报错 PFCCLab#113
Browse files Browse the repository at this point in the history
  • Loading branch information
BotAndyGao committed Nov 26, 2024
1 parent 9e7ae19 commit 8cc4ab6
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
1 change: 0 additions & 1 deletion PPOCRLabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3181,7 +3181,6 @@ def exportJSON(self):
"""
export PPLabel and CSV to JSON (PubTabNet)
"""
import pandas as pd

# automatically save annotations
self.saveFilestate()
Expand Down
18 changes: 10 additions & 8 deletions libs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,16 @@ def convert_token(html_list):
elif col == "td":
token_list.extend(["<td>", "</td>"])
else:
token_list.append("<td")
if "colspan" in col:
_, n = col.split("colspan=")
token_list.append(' colspan="{}"'.format(int(n)))
if "rowspan" in col:
_, n = col.split("rowspan=")
token_list.append(' rowspan="{}"'.format(int(n)))
token_list.extend([">", "</td>"])
token_list.append("<td") # Start the td tag
# Use regex to match "colspan" and "rowspan" attributes and their values
colspan_match = re.search(r"colspan=(\d+)", col)
rowspan_match = re.search(r"rowspan=(\d+)", col)
if colspan_match:
token_list.append(f' colspan="{colspan_match.group(1)}"')
if rowspan_match:
token_list.append(f' rowspan="{rowspan_match.group(1)}"')
token_list.append(">") # End the opening td tag
token_list.append("</td>") # Close the td tag
token_list.append("</tr>")
token_list.append("</tbody>")

Expand Down

0 comments on commit 8cc4ab6

Please sign in to comment.