Skip to content

Commit

Permalink
AREG-120 chore: Refactor scicrunch_citation.py script for improved re…
Browse files Browse the repository at this point in the history
…adability and maintainability
  • Loading branch information
D-GopalKrishna committed May 8, 2024
1 parent a5f590d commit 32b4b6e
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,48 +28,67 @@ def log_error(self, error):
self.stdout.write(self.style.ERROR(error))

def handle(self, *args, **options):
requests_in_one_sec = options.get('max_requests_per_second', 10)
requests_per_second_limit = options.get('max_requests_per_second', 10)
antibodies_ids = self.get_curated_antibodies()

api_request_rate_limiter = RateLimiter(max_requests_per_second=requests_per_second_limit)
total_anticipated_requests = len(antibodies_ids)
failed_antibody_ids = []
for id in range(len(antibodies_ids)):
self.process_antibody_ingestion(antibodies_ids, api_request_rate_limiter, failed_antibody_ids, id)
if self.has_too_many_failures(failed_antibody_ids, total_anticipated_requests):
break

self.log_failed_antibodies(failed_antibody_ids)



def process_antibody_ingestion(self, antibodies_ids, api_request_rate_limiter, failed_antibody_ids, id):
ab_id = antibodies_ids[id]
try:
number_of_citations = fetch_scicrunch_citation_metric(
ab_id, scicrunch_api_key
)
if number_of_citations is not None:
ingested = ingest_scicrunch_citation_metric(ab_id, number_of_citations)
except FetchCitationMetricFailed as e:
self.log_error(e)
failed_antibody_ids.append(ab_id)
except Exception as e:
self.log_error(e)
failed_antibody_ids.append(ab_id)

api_request_rate_limiter.add_request()


def get_curated_antibodies(self):
try:
antibodies_ids = get_curated_antibodies()
except Exception as e:
self.log_error(f"{e}. Exiting the script")
sys.exit(1)
return antibodies_ids


api_request_rate_limiter = RateLimiter(max_requests_per_second=requests_in_one_sec)
total_anticipated_requests = len(antibodies_ids)
failed_requests_for_ab_ids = []
for i in range(len(antibodies_ids)):
ab_id = antibodies_ids[i]
try:
number_of_citations = fetch_scicrunch_citation_metric(
ab_id, scicrunch_api_key
)
if number_of_citations:
ingested = ingest_scicrunch_citation_metric(ab_id, number_of_citations)
except FetchCitationMetricFailed as e:
self.log_error(e)
failed_requests_for_ab_ids.append(ab_id)
except Exception as e:
self.log_error(e)
failed_requests_for_ab_ids.append(ab_id)

api_request_rate_limiter.add_request()

# if more than 1% of the total fails then stop the script
if len(failed_requests_for_ab_ids) / total_anticipated_requests > 0.01:
self.log_error(
"More than 1% of the requests failed. Exiting the script"
)
break # stop the script if more than 1% of the requests fail

if failed_requests_for_ab_ids:
def log_failed_antibodies(self, failed_antibody_ids):
if failed_antibody_ids:
antibodies_failed = (
", ".join(failed_requests_for_ab_ids)
if len(failed_requests_for_ab_ids) < 10
else ", ".join(failed_requests_for_ab_ids[:10]) + "..."
", ".join(failed_antibody_ids)
if len(failed_antibody_ids) < 10
else ", ".join(failed_antibody_ids[:10]) + "..."
)
self.log_error(
f"Failed for Antibodies: {antibodies_failed}. Exiting the script"
)
self.log_error(f"Total Failed: {len(failed_requests_for_ab_ids)}")
self.log_error(f"Total Failed: {len(failed_antibody_ids)}")
sys.exit(1)

def has_too_many_failures(self, failed_antibody_ids, total_anticipated_requests):
# if more than 1% of the total fails then stop the script
if len(failed_antibody_ids) / total_anticipated_requests > 0.01:
self.log_error(
"More than 1% of the requests failed. Exiting the script"
)
return True
return False

Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def test_scicrunch_citation_django_command(self):
except SystemExit as e:
self.assertEqual(e.code, 1)

# add the antibodies found in Scicrunch website to the DB
for testab in TEST_ANTIBODIES_FOR_SCICRUNCH_CITATION[:4]:
ab = Antibody.objects.create(
ab_id=testab["ab_id"],
Expand All @@ -46,6 +47,7 @@ def test_scicrunch_citation_django_command(self):
testab["expected_citation"]
)

# Add an antibody not present in the Scicrunch website
unknown_id_antibody = TEST_ANTIBODIES_FOR_SCICRUNCH_CITATION[4]
ab2 = Antibody.objects.create(
ab_id=unknown_id_antibody["ab_id"], ## unknown Id [100]
Expand All @@ -56,8 +58,9 @@ def test_scicrunch_citation_django_command(self):

command.handle()

# Antibody with unknown_id should not have any citation - hence it should be 0
a2 = Antibody.objects.get(ab_id=unknown_id_antibody["ab_id"])
self.assertEqual(a2.citation, 0) ## 0 - since it doesn't find any citation for unknown Id 1
self.assertEqual(a2.citation, 0)


def test_rate_limiter(self):
Expand Down

0 comments on commit 32b4b6e

Please sign in to comment.