style: fixed ruff format errors

huggingface · Dec 21, 2024 · 2ac81d5 · 2ac81d5
1 parent c9f1c2b
commit 2ac81d5
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 8 deletions.
diff --git a/src/datatrove/pipeline/extractors/base.py b/src/datatrove/pipeline/extractors/base.py
@@ -67,12 +67,10 @@ def run(self, data: DocumentsPipeline, rank: int = 0, world_size: int = 1) -> Do
                     try:
                         doc.text = future.result(timeout=self.timeout)
                     except TimeoutError:
-                        logger.warning(
-                            "⏰ Timeout while cleaning record text. Skipping record.")
+                        logger.warning("⏰ Timeout while cleaning record text. Skipping record.")
                         continue
                     except Exception as e:
-                        logger.warning(
-                            f'❌ Error "{e}" while cleaning record text. Skipping record.')
+                        logger.warning(f'❌ Error "{e}" while cleaning record text. Skipping record.')
                         continue
                 if doc.text:
                     self.stat_update(StatHints.forwarded)

diff --git a/src/datatrove/pipeline/extractors/readabilipy.py b/src/datatrove/pipeline/extractors/readabilipy.py
@@ -45,7 +45,6 @@ def clean_html(self, html: str) -> str:
         result = simple_tree_from_html_string(html)
         return str(result)
 
-
     def extract(self, text: str) -> str:
         """
         Args:

diff --git a/src/datatrove/pipeline/extractors/readability.py b/src/datatrove/pipeline/extractors/readability.py
@@ -50,7 +50,6 @@ def clean_html(self, text: str) -> str:
 
         return doc.summary()
 
-
     def extract(self, text: str) -> str:
         """
         Args:

diff --git a/src/datatrove/pipeline/extractors/trafilatura.py b/src/datatrove/pipeline/extractors/trafilatura.py
@@ -47,8 +47,8 @@ def clean_html(self, html: str) -> str:
 
         from trafilatura import bare_extraction
 
-        html_body = bare_extraction(html, favor_precision=self.favour_precision, **self.kwargs)['body']
-        cleaned_html = ElementTree.tostring(html_body, encoding = "unicode")
+        html_body = bare_extraction(html, favor_precision=self.favour_precision, **self.kwargs)["body"]
+        cleaned_html = ElementTree.tostring(html_body, encoding="unicode")
         return cleaned_html
 
     def extract(self, text: str) -> str:
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,7 +45,6 @@ def clean_html(self, html: str) -> str: @@
             result = simple_tree_from_html_string(html)
             return str(result)
         def extract(self, text: str) -> str:
             """
             Args:
@@ Expand Down @@