From cc9260a7165a53cd527bfc9fa9375d2706caf3bd Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 24 Jul 2023 07:45:35 -0400 Subject: [PATCH 1/8] fix: case-insensitive curator comment + fix logger.warning statement modified: adsdocmatch/oracle_util.py --- adsdocmatch/oracle_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/adsdocmatch/oracle_util.py b/adsdocmatch/oracle_util.py index 2342494..7a0ed70 100644 --- a/adsdocmatch/oracle_util.py +++ b/adsdocmatch/oracle_util.py @@ -357,8 +357,9 @@ def read_google_sheet(self, input_filename): for index, row in dt.iterrows(): # if curator comment is not in vocabulary; print flag, and drop the row comments = ['update', 'add', 'delete'] + row.curator_comment = row.curator_comment.lower() if row.curator_comment not in comments: - logger.warning('Error: Bad curator comment at', row.source_bib) + logger.warning('Error: Bad curator comment at %s' % row.source_bib) dt.drop(index, inplace=True) # where curator comment is 'update', duplicate row and rewrite actions; From e67e7fb6f9c12be2fb4ea4879517bf7397f54d10 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 7 Aug 2023 11:02:13 -0400 Subject: [PATCH 2/8] fix: added cleanup_db method to oracle_util.py modified: adsdocmatch/oracle_util.py --- adsdocmatch/oracle_util.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/adsdocmatch/oracle_util.py b/adsdocmatch/oracle_util.py index 7a0ed70..bafd91d 100644 --- a/adsdocmatch/oracle_util.py +++ b/adsdocmatch/oracle_util.py @@ -564,3 +564,38 @@ def update_db_sourced_matches(self, input_filename, source): return self.add_to_db(matches) else: return "Unable to get confidence for source %s from oracle."%source + + def cleanup_db(self): + """ + + :param: + :return: + """ + + try: + num_attempts = int(config.get('DOCMATCHPIPELINE_API_ORACLE_SERVICE_ATTEMPTS', 5)) + for i in range(num_attempts): + response = requests.get( + url=config.get('DOCMATCHPIPELINE_API_ORACLE_SERVICE_URL', 'http://localhost') + '/cleanup', + headers={'Content-type': 'application/json', 'Accept': 'text/plain', + 'Authorization': 'Bearer %s' % config.get('DOCMATCHPIPELINE_API_TOKEN', '')}) + status_code = response.status_code + if status_code == 200: + logger.info('Got 200 for status_code at attempt # %d' % (i + 1)) + break + # if got 5xx errors from oracle, per alberto, sleep for five seconds and try again, attempt 3 times + elif status_code in [502, 504]: + logger.info('Got %d status_code from oracle, waiting %d second and attempt again.' % ( + status_code, num_attempts)) + time.sleep(sleep_sec) + # any other error, quit + else: + logger.info('Got %s status_code from a call to oracle, stopping.' % status_code) + break + if status_code == 200: + logger.info('Cleanup command issued to oracle database.') + else: + raise Exception('Unable to issue cleanup command to oracle_service') + except Exception as err: + logger.error("Error from cleanup_db: %s" % err) + return From 0b1c1e3cd515f84f79e2e5ad2dddab2da0feefc9 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 7 Aug 2023 11:19:46 -0400 Subject: [PATCH 3/8] fix: added cleanup argument to run.py modified: adsdocmatch/oracle_util.py modified: run.py --- adsdocmatch/oracle_util.py | 1 + run.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/adsdocmatch/oracle_util.py b/adsdocmatch/oracle_util.py index bafd91d..e822bfc 100644 --- a/adsdocmatch/oracle_util.py +++ b/adsdocmatch/oracle_util.py @@ -573,6 +573,7 @@ def cleanup_db(self): """ try: + sleep_sec = int(config.get('DOCMATCHPIPELINE_API_ORACLE_SERVICE_SLEEP_SEC', 5)) num_attempts = int(config.get('DOCMATCHPIPELINE_API_ORACLE_SERVICE_ATTEMPTS', 5)) for i in range(num_attempts): response = requests.get( diff --git a/run.py b/run.py index b371138..3baf3d9 100644 --- a/run.py +++ b/run.py @@ -87,6 +87,13 @@ def get_args(): default=False, help="Add tab delimited two column matched bibcodes to oracle with confidence of the source.") + parser.add_argument("-c", + "--cleanup", + dest="cleanup_oracle", + action="store_true", + default=False, + help="Clean up the db, removing tmp bibcodes and lower confidence of multi matches.") + return parser.parse_args() @@ -188,6 +195,12 @@ def main(): else: logger.error("Both parameters are need to add matched bibcodes to oracle: file path (-mf) and the source to apply (-as).") + elif args.cleanup_oracle: + try: + status = OracleUtil().cleanup_db() + logger.info("The cleanup_db command was issued to oracle_service.") + except Exception as err: + logger.error("Error issuing cleanup_db command to oracle_service: %s" % err) else: logger.debug("Nothing to do.") From 825e9a57eb864a2bc55a7f2fec84ab47e24bfb82 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 7 Aug 2023 13:20:28 -0400 Subject: [PATCH 4/8] added unit test for cleanup_db modified: adsdocmatch/tests/unittests/test_oracle_util.py --- adsdocmatch/tests/unittests/test_oracle_util.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/adsdocmatch/tests/unittests/test_oracle_util.py b/adsdocmatch/tests/unittests/test_oracle_util.py index 00a98a0..00e0355 100644 --- a/adsdocmatch/tests/unittests/test_oracle_util.py +++ b/adsdocmatch/tests/unittests/test_oracle_util.py @@ -570,6 +570,17 @@ def test_query(self): # remove temp files os.remove(tmp_output_filename) + def test_cleanup(self): + """ """ + with mock.patch('requests.get') as mock_oracle_util: + mock_oracle_util.return_value = mock_response = mock.Mock() + mock_response.status_code = 200 + result = self.match_metadata.ORACLE_UTIL.cleanup_db() + self.assertEqual(result, None) + mock_response.status_code = 403 + result = self.match_metadata.ORACLE_UTIL.cleanup_db() + self.assertEqual(result, None) + if __name__ == '__main__': unittest.main() From 0b316024e465c8b4bcdea4841b39605dbe7ca8ae Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 7 Aug 2023 13:52:13 -0400 Subject: [PATCH 5/8] updated oracle_util.cleanup_db return values and tests modified: adsdocmatch/oracle_util.py modified: adsdocmatch/tests/unittests/test_oracle_util.py modified: run.py --- adsdocmatch/oracle_util.py | 10 +++++----- adsdocmatch/tests/unittests/test_oracle_util.py | 5 ++++- run.py | 5 ++++- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/adsdocmatch/oracle_util.py b/adsdocmatch/oracle_util.py index e822bfc..3ca7de9 100644 --- a/adsdocmatch/oracle_util.py +++ b/adsdocmatch/oracle_util.py @@ -578,14 +578,13 @@ def cleanup_db(self): for i in range(num_attempts): response = requests.get( url=config.get('DOCMATCHPIPELINE_API_ORACLE_SERVICE_URL', 'http://localhost') + '/cleanup', - headers={'Content-type': 'application/json', 'Accept': 'text/plain', - 'Authorization': 'Bearer %s' % config.get('DOCMATCHPIPELINE_API_TOKEN', '')}) + headers={'Authorization': 'Bearer %s' % config.get('DOCMATCHPIPELINE_API_TOKEN', '')}) status_code = response.status_code if status_code == 200: logger.info('Got 200 for status_code at attempt # %d' % (i + 1)) break # if got 5xx errors from oracle, per alberto, sleep for five seconds and try again, attempt 3 times - elif status_code in [502, 504]: + elif status_code in [500, 502, 504]: logger.info('Got %d status_code from oracle, waiting %d second and attempt again.' % ( status_code, num_attempts)) time.sleep(sleep_sec) @@ -595,8 +594,9 @@ def cleanup_db(self): break if status_code == 200: logger.info('Cleanup command issued to oracle database.') + return else: - raise Exception('Unable to issue cleanup command to oracle_service') + raise Exception('Unable to issue cleanup command to oracle_service, service returned %s on final try' % status_code) except Exception as err: logger.error("Error from cleanup_db: %s" % err) - return + return "Error from cleanup_db: %s" % err diff --git a/adsdocmatch/tests/unittests/test_oracle_util.py b/adsdocmatch/tests/unittests/test_oracle_util.py index 00e0355..ce3a308 100644 --- a/adsdocmatch/tests/unittests/test_oracle_util.py +++ b/adsdocmatch/tests/unittests/test_oracle_util.py @@ -579,7 +579,10 @@ def test_cleanup(self): self.assertEqual(result, None) mock_response.status_code = 403 result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, None) + self.assertEqual(result, 'Error from cleanup_db: Unable to issue cleanup command to oracle_service, service returned 403 on final try') + mock_response.status_code = 502 + result = self.match_metadata.ORACLE_UTIL.cleanup_db() + self.assertEqual(result, 'Error from cleanup_db: Unable to issue cleanup command to oracle_service, service returned 502 on final try') if __name__ == '__main__': diff --git a/run.py b/run.py index 3baf3d9..1b3c0ac 100644 --- a/run.py +++ b/run.py @@ -198,7 +198,10 @@ def main(): elif args.cleanup_oracle: try: status = OracleUtil().cleanup_db() - logger.info("The cleanup_db command was issued to oracle_service.") + if not status: + logger.info("The cleanup_db command was issued to oracle_service.") + else: + logger.warning("The cleanup_db command failed: %s" % status) except Exception as err: logger.error("Error issuing cleanup_db command to oracle_service: %s" % err) else: From 44b9cf5cc864fc28be0f5b16a89fa9b83d6def48 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Tue, 8 Aug 2023 08:40:06 -0400 Subject: [PATCH 6/8] capture return text from oracle_util.cleanup_db modified: adsdocmatch/oracle_util.py modified: adsdocmatch/tests/unittests/test_oracle_util.py modified: run.py --- adsdocmatch/oracle_util.py | 4 ++-- adsdocmatch/tests/unittests/test_oracle_util.py | 9 ++++++--- run.py | 7 ++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/adsdocmatch/oracle_util.py b/adsdocmatch/oracle_util.py index 3ca7de9..7cb5db7 100644 --- a/adsdocmatch/oracle_util.py +++ b/adsdocmatch/oracle_util.py @@ -594,9 +594,9 @@ def cleanup_db(self): break if status_code == 200: logger.info('Cleanup command issued to oracle database.') - return else: - raise Exception('Unable to issue cleanup command to oracle_service, service returned %s on final try' % status_code) + logger.warning('Unable to issue cleanup command to oracle_service, service returned %s on final try' % status_code) + return response.text except Exception as err: logger.error("Error from cleanup_db: %s" % err) return "Error from cleanup_db: %s" % err diff --git a/adsdocmatch/tests/unittests/test_oracle_util.py b/adsdocmatch/tests/unittests/test_oracle_util.py index ce3a308..b5ac81a 100644 --- a/adsdocmatch/tests/unittests/test_oracle_util.py +++ b/adsdocmatch/tests/unittests/test_oracle_util.py @@ -575,14 +575,17 @@ def test_cleanup(self): with mock.patch('requests.get') as mock_oracle_util: mock_oracle_util.return_value = mock_response = mock.Mock() mock_response.status_code = 200 + mock_response.text = 'This is a message.' result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, None) + self.assertEqual(result, 'This is a message.') mock_response.status_code = 403 + mock_response.text = 'This is a different message.' result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, 'Error from cleanup_db: Unable to issue cleanup command to oracle_service, service returned 403 on final try') + self.assertEqual(result, 'This is a different message.') mock_response.status_code = 502 + mock_response.text = 'Yet another message.' result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, 'Error from cleanup_db: Unable to issue cleanup command to oracle_service, service returned 502 on final try') + self.assertEqual(result, 'Yet another message.') if __name__ == '__main__': diff --git a/run.py b/run.py index 1b3c0ac..11c3706 100644 --- a/run.py +++ b/run.py @@ -198,12 +198,13 @@ def main(): elif args.cleanup_oracle: try: status = OracleUtil().cleanup_db() - if not status: - logger.info("The cleanup_db command was issued to oracle_service.") + if status: + logger.info("The cleanup_db command returned the result: %s" % status) else: - logger.warning("The cleanup_db command failed: %s" % status) + logger.warning("The cleanup_db command did not return a status.") except Exception as err: logger.error("Error issuing cleanup_db command to oracle_service: %s" % err) + else: logger.debug("Nothing to do.") From 5d1edcac80306ec327c390c228f2f3b88ef66780 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Wed, 9 Aug 2023 11:53:23 -0400 Subject: [PATCH 7/8] fix: modify cleanup_db return per GS modified: adsdocmatch/oracle_util.py --- adsdocmatch/oracle_util.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/adsdocmatch/oracle_util.py b/adsdocmatch/oracle_util.py index 7cb5db7..635045f 100644 --- a/adsdocmatch/oracle_util.py +++ b/adsdocmatch/oracle_util.py @@ -581,22 +581,23 @@ def cleanup_db(self): headers={'Authorization': 'Bearer %s' % config.get('DOCMATCHPIPELINE_API_TOKEN', '')}) status_code = response.status_code if status_code == 200: - logger.info('Got 200 for status_code at attempt # %d' % (i + 1)) + logger.info('Got HTTP Status 200 from oracle at attempt # %d' % (i + 1)) break # if got 5xx errors from oracle, per alberto, sleep for five seconds and try again, attempt 3 times elif status_code in [500, 502, 504]: - logger.info('Got %d status_code from oracle, waiting %d second and attempt again.' % ( - status_code, num_attempts)) + logger.info('Got HTTP Status %d from oracle, waiting %d seconds for next attempt...' % (status_code, sleep_sec)) time.sleep(sleep_sec) # any other error, quit else: logger.info('Got %s status_code from a call to oracle, stopping.' % status_code) break + json_text = json.loads(response.text) + details = json_text.get('details', '') if status_code == 200: - logger.info('Cleanup command issued to oracle database.') + logger.info('Cleanup command issued to oracle database, received response: %s' % details) else: - logger.warning('Unable to issue cleanup command to oracle_service, service returned %s on final try' % status_code) - return response.text + logger.warning('Unable to issue cleanup command to oracle_service: on the final try, oracle service returned status code %s with details: %s ' % (status_code, details)) + return details except Exception as err: logger.error("Error from cleanup_db: %s" % err) return "Error from cleanup_db: %s" % err From b153e3e1efbdcaa3eeae0e74089796822efcf9cf Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Wed, 9 Aug 2023 12:11:07 -0400 Subject: [PATCH 8/8] Updated unit tests for cleanup_db "details" json modified: adsdocmatch/tests/unittests/test_oracle_util.py --- adsdocmatch/tests/unittests/test_oracle_util.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/adsdocmatch/tests/unittests/test_oracle_util.py b/adsdocmatch/tests/unittests/test_oracle_util.py index b5ac81a..e302a2d 100644 --- a/adsdocmatch/tests/unittests/test_oracle_util.py +++ b/adsdocmatch/tests/unittests/test_oracle_util.py @@ -575,17 +575,17 @@ def test_cleanup(self): with mock.patch('requests.get') as mock_oracle_util: mock_oracle_util.return_value = mock_response = mock.Mock() mock_response.status_code = 200 - mock_response.text = 'This is a message.' + mock_response.text = '{"details": "This is one result."}' result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, 'This is a message.') + self.assertEqual(result, 'This is one result.') mock_response.status_code = 403 - mock_response.text = 'This is a different message.' + mock_response.text = '{"details": "This is another result."}' result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, 'This is a different message.') + self.assertEqual(result, 'This is another result.') mock_response.status_code = 502 - mock_response.text = 'Yet another message.' + mock_response.text = '{"details": "This is yet another result."}' result = self.match_metadata.ORACLE_UTIL.cleanup_db() - self.assertEqual(result, 'Yet another message.') + self.assertEqual(result, 'This is yet another result.') if __name__ == '__main__':