修复单元格占多行又占多列导出报错的问题。issues：导出表格标注报错 PFCCLab#113

BotAndyGao · Nov 26, 2024 · 8cc4ab6 · 8cc4ab6
1 parent 9e7ae19
commit 8cc4ab6
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 9 deletions.
diff --git a/PPOCRLabel.py b/PPOCRLabel.py
@@ -3181,7 +3181,6 @@ def exportJSON(self):
         """
         export PPLabel and CSV to JSON (PubTabNet)
         """
-        import pandas as pd
 
         # automatically save annotations
         self.saveFilestate()

diff --git a/libs/utils.py b/libs/utils.py
@@ -232,14 +232,16 @@ def convert_token(html_list):
             elif col == "td":
                 token_list.extend(["<td>", "</td>"])
             else:
-                token_list.append("<td")
-                if "colspan" in col:
-                    _, n = col.split("colspan=")
-                    token_list.append(' colspan="{}"'.format(int(n)))
-                if "rowspan" in col:
-                    _, n = col.split("rowspan=")
-                    token_list.append(' rowspan="{}"'.format(int(n)))
-                token_list.extend([">", "</td>"])
+                token_list.append("<td")  # Start the td tag
+                # Use regex to match "colspan" and "rowspan" attributes and their values
+                colspan_match = re.search(r"colspan=(\d+)", col)
+                rowspan_match = re.search(r"rowspan=(\d+)", col)
+                if colspan_match:
+                    token_list.append(f' colspan="{colspan_match.group(1)}"')
+                if rowspan_match:
+                    token_list.append(f' rowspan="{rowspan_match.group(1)}"')
+                token_list.append(">")  # End the opening td tag
+                token_list.append("</td>")  # Close the td tag
         token_list.append("</tr>")
     token_list.append("</tbody>")