Skip to content

Commit

Permalink
DATASHADES-320 / add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mutantsan committed May 1, 2024
1 parent 3d6ff5a commit f614886
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 15 deletions.
23 changes: 18 additions & 5 deletions ckanext/charts/fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def make_cache_key(self) -> str:


class URLDataFetcher(DataFetcherStrategy):
SUPPORTED_FORMATS = ["csv", "xlsx", "xml"]
SUPPORTED_FORMATS = ["csv", "xlsx", "xls", "xml"]

def __init__(
self,
Expand All @@ -97,13 +97,18 @@ def fetch_data(self) -> pd.DataFrame:
data = self.make_request()

try:
if self.file_format == "xlsx":
if self.file_format in ("xlsx", "xls"):
df = pd.read_excel(BytesIO(data))
elif self.fetch_data == "xml":
elif self.file_format == "xml":
df = pd.read_xml(BytesIO(data))
else:
df = pd.read_csv(BytesIO(data))
except KeyError as e:
except (
pd.errors.ParserError,
lxml.etree.XMLSyntaxError,
UnicodeDecodeError,
ValueError,
) as e:
raise exception.DataFetchError(
f"An error occurred during fetching data from URL: {e}"
)
Expand Down Expand Up @@ -175,6 +180,7 @@ def fetch_data(self) -> pd.DataFrame:
pd.errors.ParserError,
lxml.etree.XMLSyntaxError,
UnicodeDecodeError,
ValueError,
) as e:
raise exception.DataFetchError(
f"An error occurred during fetching data from file: {e}"
Expand All @@ -193,7 +199,14 @@ def __init__(self, data: dict[str, list[Any]]):
self.data = data

def fetch_data(self) -> pd.DataFrame:
return pd.DataFrame(self.data)
try:
df = pd.DataFrame(self.data)
except ValueError as e:
raise exception.DataFetchError(
f"An error occurred during fetching hardcoded data: {e}"
)

return df

def make_cache_key(self) -> str:
return "not-cached"
Expand Down
51 changes: 41 additions & 10 deletions ckanext/charts/tests/test_fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
import ckanext.charts.fetchers as fetchers
from ckanext.charts.exception import DataFetchError

CSV_DATA = b"col1,col2\n1,2\n3,4\n"


@pytest.mark.ckan_config("ckan.plugins", "datastore")
@pytest.mark.usefixtures("clean_db", "with_plugins")
Expand Down Expand Up @@ -45,18 +43,45 @@ def test_fetch_data_success(self, resource_factory):

@pytest.mark.usefixtures("clean_redis")
class TestURLDataFetcher:
URL = "http://example.com/data.csv"
URL = "http://xxx"

def _get_file_content(self, fmt: str) -> bytes:
file_path = os.path.join(os.path.dirname(__file__), "data", f"sample.{fmt}")

with open(file_path, mode="rb") as file:
return file.read()

def test_fetch_data_success(self, requests_mock):
requests_mock.get(self.URL, content=CSV_DATA)
requests_mock.get(self.URL, content=self._get_file_content("csv"))

result = fetchers.URLDataFetcher(self.URL).fetch_data()

assert isinstance(result, pd.DataFrame)
assert len(result) == 2
assert list(result.columns) == ["col1", "col2"]
assert list(result["col1"]) == [1, 3]
assert list(result["col2"]) == [2, 4]
assert len(result) == 10000

def test_fetch_data_success_xml(self, requests_mock):
requests_mock.get(self.URL, content=self._get_file_content("xml"))

result = fetchers.URLDataFetcher(self.URL, file_format="xml").fetch_data()

assert isinstance(result, pd.DataFrame)
assert len(result) == 36

def test_fetch_data_success_xlsx(self, requests_mock):
requests_mock.get(self.URL, content=self._get_file_content("xlsx"))

result = fetchers.URLDataFetcher(self.URL, file_format="xlsx").fetch_data()

assert isinstance(result, pd.DataFrame)
assert len(result) == 100

def test_fetch_data_success_xlx(self, requests_mock):
requests_mock.get(self.URL, content=self._get_file_content("xls"))

result = fetchers.URLDataFetcher(self.URL, file_format="xls").fetch_data()

assert isinstance(result, pd.DataFrame)
assert len(result) == 100

def test_fetch_data_http_error(self, requests_mock):
requests_mock.get(self.URL, status_code=404)
Expand All @@ -77,7 +102,7 @@ def test_fetch_data_timeout_error(self, requests_mock):
fetchers.URLDataFetcher(self.URL).fetch_data()

def test_hit_cache_redis(self, requests_mock):
requests_mock.get(self.URL, content=CSV_DATA)
requests_mock.get(self.URL, content=self._get_file_content("csv"))

fetcher = fetchers.URLDataFetcher(self.URL)

Expand All @@ -91,7 +116,7 @@ def test_hit_cache_redis(self, requests_mock):

@pytest.mark.usefixtures("clean_disk_cache")
def test_hit_cache_disk(self, requests_mock):
requests_mock.get(self.URL, content=CSV_DATA)
requests_mock.get(self.URL, content=self._get_file_content("csv"))

fetcher = fetchers.URLDataFetcher(self.URL, cache_stragegy="disk")

Expand Down Expand Up @@ -120,6 +145,12 @@ def test_fetch_data(self):
assert len(result) == 4
assert list(result.columns) == ["col1", "col2", "col3"]

def test_malformed_data(self):
with pytest.raises(DataFetchError):
fetchers.HardcodedDataFetcher(
{"col1": ["1"], "col2": ["a", "b"]}
).fetch_data()


@pytest.mark.usefixtures("clean_redis", "clean_disk_cache")
class TestFileSystemDataFetcher:
Expand Down

0 comments on commit f614886

Please sign in to comment.