reformat with black

CambioML · Aug 27, 2024 · 772d242 · 772d242
1 parent b2d1d11
commit 772d242
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 16 deletions.
diff --git a/any_parser/base.py b/any_parser/base.py
@@ -45,20 +45,30 @@ def extract(self, file_path):
         result = self._request_file_extraction(user_id, file_id)
         return result
 
-    def parse(self, file_path, parse_type="table", output_format="HTML", prompt="", mode="advanced"):
+    def parse(
+        self,
+        file_path,
+        parse_type="table",
+        output_format="HTML",
+        prompt="",
+        mode="advanced",
+    ):
         parse_type = parse_type.upper()
         if parse_type not in ["TABLE"]:
-            raise ValueError("Invalid parse_type. Currently, only 'table' is supported.")
+            raise ValueError(
+                "Invalid parse_type. Currently, only 'table' is supported."
+            )
 
         output_format = output_format.upper()
         if output_format not in ["HTML", "JSON", "CSV"]:
-            raise ValueError("Invalid output_format. Expected 'HTML', 'JSON', or 'CSV'.")
+            raise ValueError(
+                "Invalid output_format. Expected 'HTML', 'JSON', or 'CSV'."
+            )
 
         user_id, file_id = self._request_and_upload_by_apiKey(file_path)
         result = self._request_info_extraction(user_id, file_id)
         return run_convert(result, output_format)
 
-
     def _error_handler(self, response):
         if response.status_code == 403:
             raise Exception("Invalid API Key")
@@ -136,4 +146,3 @@ def _request_info_extraction(self, user_id, file_id):
             return query_response.json()
 
         self._error_handler(response)
-
diff --git a/any_parser/postprocessors.py b/any_parser/postprocessors.py
@@ -2,6 +2,7 @@
 import io
 import csv
 
+
 def run_convert(result, output_format):
     if output_format == "JSON":
         converter = _html_table_string_to_json
@@ -12,20 +13,21 @@ def run_convert(result, output_format):
 
     return [converter(table) for table in result]
 
+
 def _html_table_string_to_json(html_string: str):
-    soup = BeautifulSoup(html_string, 'html.parser')
-    table = soup.find('table')
+    soup = BeautifulSoup(html_string, "html.parser")
+    table = soup.find("table")
 
     if not table:
-        raise ValueError('No table found in the provided HTML string.')
+        raise ValueError("No table found in the provided HTML string.")
 
-    rows = table.find_all('tr')
-    headers = [cell.get_text(strip=True) for cell in rows[0].find_all(['th', 'td'])]
+    rows = table.find_all("tr")
+    headers = [cell.get_text(strip=True) for cell in rows[0].find_all(["th", "td"])]
 
     result = []
 
     for row in rows[1:]:
-        cells = row.find_all('td')
+        cells = row.find_all("td")
         row_object = {}
 
         for header, cell in zip(headers, cells):
@@ -35,19 +37,20 @@ def _html_table_string_to_json(html_string: str):
 
     return result
 
+
 def _html_table_to_csv(html_string: str) -> str:
-    soup = BeautifulSoup(html_string, 'html.parser')
-    table = soup.find('table')
+    soup = BeautifulSoup(html_string, "html.parser")
+    table = soup.find("table")
 
     if not table:
-        raise ValueError('No table found in the provided HTML string.')
+        raise ValueError("No table found in the provided HTML string.")
 
-    rows = table.find_all('tr')
+    rows = table.find_all("tr")
     output = io.StringIO()
     csv_writer = csv.writer(output, quoting=csv.QUOTE_MINIMAL)
 
     for row in rows:
-        cells = row.find_all(['th', 'td'])
+        cells = row.find_all(["th", "td"])
         csv_writer.writerow([cell.get_text(strip=True) for cell in cells])
 
     return output.getvalue()