diff --git a/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py b/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py index d7bcaf5d..1ed8e9fe 100644 --- a/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py +++ b/dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py @@ -41,5 +41,5 @@ def extract_from_pages(self, pages: List[str]) -> List[Document]: Returns: List[Document]: A list of Document objects extracted from the specified pages. """ - response = self.wikimedia_reader.load_data(pages=pages) + response = self.wikimedia_reader.load_data(pages=pages, auto_suggest=False) return response diff --git a/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py b/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py index 6badf729..d39e7d1d 100644 --- a/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py +++ b/dags/hivemind_etl_helpers/tests/unit/test_mediawiki_extractor.py @@ -31,7 +31,9 @@ def test_extract_from_valid_pages(self): test_pages = ["Python_(programming_language)", "OpenAI"] documents = self.extractor.extract(page_ids=test_pages) self.assertEqual(len(documents), len(mock_response)) - self.mock_reader.load_data.assert_called_once_with(pages=test_pages) + self.mock_reader.load_data.assert_called_once_with( + pages=test_pages, auto_suggest=False + ) def test_extract_no_pages(self): """ @@ -52,4 +54,6 @@ def test_handle_invalid_page_titles(self): documents = self.extractor.extract(page_ids=invalid_pages) self.assertEqual(len(documents), 0) - self.mock_reader.load_data.assert_called_with(pages=invalid_pages) + self.mock_reader.load_data.assert_called_with( + pages=invalid_pages, auto_suggest=False + )