diff --git a/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py b/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py index 481c77d2..1ed8e9fe 100644 --- a/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py +++ b/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py @@ -41,12 +41,5 @@ def extract_from_pages(self, pages: List[str]) -> List[Document]: Returns: List[Document]: A list of Document objects extracted from the specified pages. """ - try: - response = self.wikimedia_reader.load_data(pages=pages, - auto_suggest=False - ) - return response - except Exception as e: - print(f"Failed to extract from pages {pages}: {str(e)}") - return [] - + response = self.wikimedia_reader.load_data(pages=pages, auto_suggest=False) + return response diff --git a/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py b/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py index ec0054d7..d39e7d1d 100644 --- a/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py +++ b/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py @@ -31,8 +31,9 @@ def test_extract_from_valid_pages(self): test_pages = ["Python_(programming_language)", "OpenAI"] documents = self.extractor.extract(page_ids=test_pages) self.assertEqual(len(documents), len(mock_response)) - self.mock_reader.load_data.assert_called_once_with(pages=test_pages, - auto_suggest=False) + self.mock_reader.load_data.assert_called_once_with( + pages=test_pages, auto_suggest=False + ) def test_extract_no_pages(self): """ @@ -53,5 +54,6 @@ def test_handle_invalid_page_titles(self): documents = self.extractor.extract(page_ids=invalid_pages) self.assertEqual(len(documents), 0) - self.mock_reader.load_data.assert_called_with(pages=invalid_pages, - auto_suggest=False) + self.mock_reader.load_data.assert_called_with( + pages=invalid_pages, auto_suggest=False + )