Skip to content

Commit

Permalink
Merge pull request #161 from TogetherCrew/fix/mediawiki-extraction-au…
Browse files Browse the repository at this point in the history
…tosuggest-issues

fix: turned off `auto_suggest` in extractor.py, as it was causing iss…
  • Loading branch information
amindadgar authored May 24, 2024
2 parents 148b40e + b01471f commit 7471dbd
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
2 changes: 1 addition & 1 deletion dags/hivemind_etl_helpers/src/db/mediawiki/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ def extract_from_pages(self, pages: List[str]) -> List[Document]:
Returns:
List[Document]: A list of Document objects extracted from the specified pages.
"""
response = self.wikimedia_reader.load_data(pages=pages)
response = self.wikimedia_reader.load_data(pages=pages, auto_suggest=False)
return response
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def test_extract_from_valid_pages(self):
test_pages = ["Python_(programming_language)", "OpenAI"]
documents = self.extractor.extract(page_ids=test_pages)
self.assertEqual(len(documents), len(mock_response))
self.mock_reader.load_data.assert_called_once_with(pages=test_pages)
self.mock_reader.load_data.assert_called_once_with(
pages=test_pages, auto_suggest=False
)

def test_extract_no_pages(self):
"""
Expand All @@ -52,4 +54,6 @@ def test_handle_invalid_page_titles(self):

documents = self.extractor.extract(page_ids=invalid_pages)
self.assertEqual(len(documents), 0)
self.mock_reader.load_data.assert_called_with(pages=invalid_pages)
self.mock_reader.load_data.assert_called_with(
pages=invalid_pages, auto_suggest=False
)

0 comments on commit 7471dbd

Please sign in to comment.