Skip to content

Commit

Permalink
feat: add sample donor csv repository along with tests
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonKonar committed Jan 9, 2024
1 parent 790612d commit 356bf26
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 0 deletions.
46 changes: 46 additions & 0 deletions persistence/sample_donor_csv_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from datetime import datetime
import logging
import os
from typing import List

from model.gender import get_gender_from_abbreviation
from model.sample_donor import SampleDonor
from persistence.sample_donor_repository import SampleDonorRepository
from util.custom_logger import setup_logger
import pandas as pd

setup_logger()
logger = logging.getLogger()


class SampleDonorCsvRepository(SampleDonorRepository):
"""Class for handling sample donors stored in Csv files"""

def __init__(self, records_path: str, separator: str, donor_parsing_map: dict):
self._dir_path = records_path
self._ids: set = set()
self.separator = separator
self._donor_parsing_map = donor_parsing_map
logger.debug(f"Loaded the following donor parsing map {donor_parsing_map}")

def get_all(self) -> List[SampleDonor]:
for dir_entry in os.scandir(self._dir_path):
if dir_entry.name.endswith(".csv"):
yield from self.__extract_donor_from_csv_file(dir_entry)

def __extract_donor_from_csv_file(self, dir_entry: os.DirEntry) -> SampleDonor:
file_content = pd.read_csv(dir_entry, sep= self.separator, dtype=str)
for _, row in file_content.iterrows():
try:
donor = SampleDonor(row[self._donor_parsing_map.get("id")])
donor.gender = get_gender_from_abbreviation(row[self._donor_parsing_map.get("gender")])
year_of_birth = row[self._donor_parsing_map.get("birthDate")]
if year_of_birth is not None:
donor.date_of_birth = datetime.strptime(year_of_birth, '%Y')
if donor.identifier not in self._ids:
self._ids.add(donor.identifier)
yield donor
except TypeError as e:
logger.info(e , "Skipping...")
return

58 changes: 58 additions & 0 deletions test/unit/persistence/test_sample_donor_csv_repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import unittest

import pytest
from pyfakefs.fake_filesystem_unittest import patchfs

from model.gender import Gender
from model.sample_donor import SampleDonor
from persistence.sample_donor_csv_repository import SampleDonorCsvRepository
from util.config import PARSING_MAP_CSV


class TestDonorCsvRepo(unittest.TestCase):
header = "sample_ID;patient_pseudonym;sex;birth_year;date_of_diagnosis;diagnosis;donor_age;sampling_date;sampling_type;storage_temperature;available_number_of_samples \n"

content = "34;1113;f;1939;2100-10-22;M329;49;2007-10-22;serum;-20;1"

dir_path = "/mock_dir/"

@pytest.fixture(autouse=True)
def run_around_tests(self):
self.donor_repository = SampleDonorCsvRepository(records_path=self.dir_path,
donor_parsing_map=PARSING_MAP_CSV['donor_map'],
separator=";")

@patchfs
def test_get_all_ok(self, fake_fs):
fake_fs.create_file(self.dir_path + "mock_file.csv", contents=self.header + self.content)
for donor in self.donor_repository.get_all():
self.assertIsInstance(donor, SampleDonor)
self.assertEqual("1113", donor.identifier)
self.assertEqual(Gender.FEMALE, donor.gender)

@patchfs
def test_get_all_with_one_wrongly_formatted_file(self, fake_fs):
fake_fs.create_file(self.dir_path + "mock_file.csv", contents=self.header + self.content)
fake_fs.create_file(self.dir_path + "mock_wrong_file.csv", contents="badly_formated_csv")
for donor in self.donor_repository.get_all():
self.assertIsInstance(donor, SampleDonor)
self.assertEqual("1113", donor.identifier)

@patchfs
def test_get_all_does_not_return_duplicate_patients(self, fake_fs):
fake_fs.create_file(self.dir_path + "mock_file.csv", contents=self.header + self.content)
fake_fs.create_file(self.dir_path + "mock_file_duplicate.csv", contents=self.header + self.content)
counter = 0
for donor in self.donor_repository.get_all():
self.assertIsInstance(donor, SampleDonor)
self.assertEqual("1113", donor.identifier)
counter += 1
self.assertEqual(1, counter)

@patchfs
def test_get_all_with_empty_repository_throws_no_errors(self, fake_fs):
fake_fs.create_dir(self.dir_path)
counter = 0
for _ in self.donor_repository.get_all():
counter += 1
self.assertEqual(0, counter)

0 comments on commit 356bf26

Please sign in to comment.