diff --git a/Framework/script/RepoCleaner/README.md b/Framework/script/RepoCleaner/README.md index 748166e829..173ef56349 100644 --- a/Framework/script/RepoCleaner/README.md +++ b/Framework/script/RepoCleaner/README.md @@ -33,10 +33,26 @@ There can be any number of these rules. The order is important as we use the fir The configuration for ccdb-test is described [here](../../../doc/DevelopersTips.md). +## Setup virtual environment for development and test (venv) + +1. cd Framework/script/RepoCleaner +2. python3 -m venv env +3. source env/bin/activate +4. python -m pip install -r requirements.txt +5. python3 -m pip install . +6. You can execute and work. Next time just do "activate" and then you are good to go + ## Unit Tests -`cd QualityControl/Framework/script/RepoCleaner ; python3 -m unittest discover` -and to test only one of them: `python3 -m unittest tests/test_NewProduction.py -k test_2_runs` +``` +cd Framework/script/RepoCleaner +source env/bin/activate + +# Run a test: +python -m unittest tests.test_Ccdb.TestCcdb.test_getObjectsList +``` + +`cd QualityControl/Framework/script/RepoCleaner ; python3 -m unittest discover` In particular there is a test for the `production` rule that is pretty extensive. It hits the ccdb though and it needs the following path to be truncated: ` @@ -75,11 +91,3 @@ Create new version 2. `python3 setup.py sdist bdist_wheel` 3. `python3 -m twine upload --repository pypi dist/*` -## Use venv - -1. cd Framework/script/RepoCleaner -2. python3 -m venv env -3. source env/bin/activate -4. python -m pip install -r requirements.txt -5. python3 -m pip install . -6. You can execute and work. Next time just do "activate" and then you are good to go \ No newline at end of file diff --git a/Framework/script/RepoCleaner/qcrepocleaner/Ccdb.py b/Framework/script/RepoCleaner/qcrepocleaner/Ccdb.py index ead669180a..407cec3416 100644 --- a/Framework/script/RepoCleaner/qcrepocleaner/Ccdb.py +++ b/Framework/script/RepoCleaner/qcrepocleaner/Ccdb.py @@ -26,6 +26,7 @@ def __init__(self, path: str, validFrom, validTo, createdAt, uuid=None, metadata :param uuid: unique id of the object :param validFrom: validity range smaller limit (in ms) :param validTo: validity range bigger limit (in ms) + :param createdAt: creation timestamp of the object ''' self.path = path self.uuid = uuid @@ -72,7 +73,8 @@ def getObjectsList(self, added_since: int = 0, path: str = "", no_wildcard: bool :return A list of strings, each containing a path to an object in the CCDB. ''' url_for_all_obj = self.url + '/latest/' + path - url_for_all_obj += '/' if no_wildcard else '/.*' + url_for_all_obj += '/' if path else '' + url_for_all_obj += '' if no_wildcard else '.*' logger.debug(f"Ccdb::getObjectsList -> {url_for_all_obj}") headers = {'Accept': 'application/json', 'If-Not-Before':str(added_since)} r = requests.get(url_for_all_obj, headers=headers) diff --git a/Framework/script/RepoCleaner/requirements.txt b/Framework/script/RepoCleaner/requirements.txt new file mode 100644 index 0000000000..7418a2c699 --- /dev/null +++ b/Framework/script/RepoCleaner/requirements.txt @@ -0,0 +1,12 @@ +certifi==2024.2.2 +chardet==5.2.0 +charset-normalizer==3.3.2 +dryable==1.2.0 +idna==3.7 +psutil==6.1.0 +python-consul==1.1.0 +PyYAML==6.0.1 +requests==2.31.0 +responses==0.25.0 +six==1.16.0 +urllib3==2.2.1 diff --git a/Framework/script/RepoCleaner/qcrepocleaner/config-test.yaml b/Framework/script/RepoCleaner/tests/config-test.yaml similarity index 100% rename from Framework/script/RepoCleaner/qcrepocleaner/config-test.yaml rename to Framework/script/RepoCleaner/tests/config-test.yaml diff --git a/Framework/script/RepoCleaner/qcrepocleaner/objectsList.json b/Framework/script/RepoCleaner/tests/objectsList.json similarity index 100% rename from Framework/script/RepoCleaner/qcrepocleaner/objectsList.json rename to Framework/script/RepoCleaner/tests/objectsList.json diff --git a/Framework/script/RepoCleaner/tests/test_1_per_hour.py b/Framework/script/RepoCleaner/tests/test_1_per_hour.py index 0b5d01341e..5d783363b4 100644 --- a/Framework/script/RepoCleaner/tests/test_1_per_hour.py +++ b/Framework/script/RepoCleaner/tests/test_1_per_hour.py @@ -1,26 +1,12 @@ import logging import time import unittest -from datetime import timedelta, date, datetime - -from Ccdb import Ccdb, ObjectVersion -from rules import last_only -import os -import sys -import importlib - -def import_path(path): # needed because o2-qc-repo-cleaner has no suffix - module_name = os.path.basename(path).replace('-', '_') - spec = importlib.util.spec_from_loader( - module_name, - importlib.machinery.SourceFileLoader(module_name, path) - ) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - sys.modules[module_name] = module - return module - -one_per_hour = import_path("../qcrepocleaner/rules/1_per_hour.py") +from importlib import import_module +from qcrepocleaner.Ccdb import Ccdb +from tests import test_utils +from tests.test_utils import CCDB_TEST_URL + +one_per_hour = import_module(".1_per_hour", "qcrepocleaner.rules") # file names should not start with a number... class Test1PerHour(unittest.TestCase): """ @@ -35,7 +21,7 @@ class Test1PerHour(unittest.TestCase): one_minute = 60000 def setUp(self): - self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080') + self.ccdb = Ccdb(CCDB_TEST_URL) # ccdb-test but please use IP to avoid DNS alerts self.path = "qc/TST/MO/repo/test" self.run = 124321 self.extra = {} @@ -43,10 +29,10 @@ def setUp(self): def test_1_per_hour(self): """ - 60 versions, 2 minutes apart + 120 versions grace period of 15 minutes - First version is preserved (always). 7 are preserved during the grace period at the end. - One more is preserved after 1 hour. --> 9 preserved + First version is preserved (always). 14 are preserved during the grace period at the end. + One more is preserved after 1 hour. --> 16 preserved """ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') @@ -54,24 +40,26 @@ def test_1_per_hour(self): # Prepare data test_path = self.path + "/test_1_per_hour" - self.prepare_data(test_path, 60, 2) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [120], [0], 123) stats = one_per_hour.process(self.ccdb, test_path, 15, 1, self.in_ten_years, self.extra) - self.assertEqual(stats["deleted"], 51) - self.assertEqual(stats["preserved"], 9) + logging.info(stats) + self.assertEqual(stats["deleted"], 104) + self.assertEqual(stats["preserved"], 16) objects_versions = self.ccdb.getVersionsList(test_path) - self.assertEqual(len(objects_versions), 9) + self.assertEqual(len(objects_versions), 16) def test_1_per_hour_period(self): """ - 60 versions, 2 minutes apart + 120 versions no grace period period of acceptance: 1 hour in the middle - We have therefore 30 versions in the acceptance period. + We have therefore 60 versions in the acceptance period. Only 1 of them, the one 1 hour after the first version in the set, will be preserved, the others are deleted. - Thus we have 29 deletion. Everything outside the acceptance period is kept. + Thus we have 59 deletion. Everything outside the acceptance period is kept. """ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S') @@ -79,42 +67,18 @@ def test_1_per_hour_period(self): # Prepare data test_path = self.path + "/test_1_per_hour_period" - self.prepare_data(test_path, 60, 2) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [120], [0], 123) current_timestamp = int(time.time() * 1000) - logging.debug(f"{current_timestamp} - {datetime.today()}") - - objects_versions = self.ccdb.getVersionsList(test_path) - created = len(objects_versions) stats = one_per_hour.process(self.ccdb, test_path, 15, current_timestamp-90*60*1000, current_timestamp-30*60*1000, self.extra) - self.assertEqual(stats["deleted"], 29) - self.assertEqual(stats["preserved"], 31) + logging.info(stats) + self.assertEqual(stats["deleted"], 59) + self.assertEqual(stats["preserved"], 61) objects_versions = self.ccdb.getVersionsList(test_path) - self.assertEqual(len(objects_versions), 31) - - - def prepare_data(self, path, number_versions, minutes_between): - """ - Prepare a data set starting `since_minutes` in the past. - 1 version per minute - """ - - current_timestamp = int(time.time() * 1000) - data = {'part': 'part'} - run = 1234 - counter = 0 - - for x in range(number_versions+1): - counter = counter + 1 - from_ts = current_timestamp - minutes_between * x * 60 * 1000 - to_ts = current_timestamp - metadata = {'RunNumber': str(run)} - version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata) - self.ccdb.putVersion(version=version_info, data=data) - - logging.debug(f"counter : {counter}") + self.assertEqual(len(objects_versions), 61) if __name__ == '__main__': diff --git a/Framework/script/RepoCleaner/tests/test_1_per_run.py b/Framework/script/RepoCleaner/tests/test_1_per_run.py index c2f6a8e4ec..887de75608 100644 --- a/Framework/script/RepoCleaner/tests/test_1_per_run.py +++ b/Framework/script/RepoCleaner/tests/test_1_per_run.py @@ -1,29 +1,13 @@ import logging import time import unittest -from datetime import timedelta, date, datetime +from importlib import import_module -from Ccdb import Ccdb, ObjectVersion -from rules import last_only -import os -import sys -import importlib - - -def import_path(path): # needed because o2-qc-repo-cleaner has no suffix - module_name = os.path.basename(path).replace('-', '_') - spec = importlib.util.spec_from_loader( - module_name, - importlib.machinery.SourceFileLoader(module_name, path) - ) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - sys.modules[module_name] = module - return module - - -one_per_run = import_path("../qcrepocleaner/rules/1_per_run.py") +from qcrepocleaner.Ccdb import Ccdb +from tests import test_utils +from tests.test_utils import CCDB_TEST_URL +one_per_run = import_module(".1_per_run", "qcrepocleaner.rules") # file names should not start with a number... class Test1PerRun(unittest.TestCase): """ @@ -38,15 +22,14 @@ class Test1PerRun(unittest.TestCase): one_minute = 60000 def setUp(self): - self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080') + self.ccdb = Ccdb(CCDB_TEST_URL) self.path = "qc/TST/MO/repo/test" self.run = 124321 self.extra = {} def test_1_per_run(self): """ - 60 versions, 1 minute apart - 6 runs + 6 runs of 10 versions, versions 1 minute apart grace period of 15 minutes Preserved: 14 at the end (grace period), 6 for the runs, but 2 are in both sets --> 14+6-2=18 preserved """ @@ -56,7 +39,8 @@ def test_1_per_run(self): # Prepare data test_path = self.path + "/test_1_per_run" - self.prepare_data(test_path, 60) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [10, 10, 10, 10, 10, 10], [0, 0, 0, 0, 0, 0], 123) objects_versions = self.ccdb.getVersionsList(test_path) created = len(objects_versions) @@ -71,8 +55,7 @@ def test_1_per_run(self): def test_1_per_run_period(self): """ - 60 versions 1 minute apart - 6 runs + 6 runs of 10 versions each, versions 1 minute apart no grace period acceptance period is only the 38 minutes in the middle preserved: 6 runs + 11 first and 11 last, with an overlap of 2 --> 26 @@ -83,7 +66,8 @@ def test_1_per_run_period(self): # Prepare data test_path = self.path + "/test_1_per_run_period" - self.prepare_data(test_path, 60) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [10, 10, 10, 10, 10, 10], [0, 0, 0, 0, 0, 0], 123) current_timestamp = int(time.time() * 1000) stats = one_per_run.process(self.ccdb, test_path, 0, current_timestamp - 49 * 60 * 1000, @@ -94,29 +78,5 @@ def test_1_per_run_period(self): objects_versions = self.ccdb.getVersionsList(test_path) self.assertEqual(len(objects_versions), 26) - def prepare_data(self, path, since_minutes): - """ - Prepare a data set starting `since_minutes` in the past. - 1 version per minute, 1 run every 10 versions - """ - - current_timestamp = int(time.time() * 1000) - data = {'part': 'part'} - run = 1234 - counter = 0 - - for x in range(since_minutes + 1): - counter = counter + 1 - from_ts = current_timestamp - x * 60 * 1000 - to_ts = current_timestamp - metadata = {'RunNumber': str(run)} - version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata) - self.ccdb.putVersion(version=version_info, data=data) - if x % 10 == 0: - run = run + 1 - - logging.debug(f"counter : {counter}") - - if __name__ == '__main__': unittest.main() diff --git a/Framework/script/RepoCleaner/tests/test_Ccdb.py b/Framework/script/RepoCleaner/tests/test_Ccdb.py index 7f04f27902..03030a9b0d 100644 --- a/Framework/script/RepoCleaner/tests/test_Ccdb.py +++ b/Framework/script/RepoCleaner/tests/test_Ccdb.py @@ -1,45 +1,48 @@ import logging import unittest -import requests +from typing import List + import responses -from Ccdb import Ccdb, ObjectVersion -from rules import production +from qcrepocleaner.Ccdb import Ccdb, ObjectVersion +from tests.test_utils import CCDB_TEST_URL + class TestCcdb(unittest.TestCase): def setUp(self): - with open('../qcrepocleaner/objectsList.json') as f: # will close() when we leave this block + with open('objectsList.json') as f: # will close() when we leave this block self.content_objectslist = f.read() - with open('../versionsList.json') as f: # will close() when we leave this block + with open('versionsList.json') as f: # will close() when we leave this block self.content_versionslist = f.read() - self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080') + self.ccdb = Ccdb(CCDB_TEST_URL) + logging.getLogger().setLevel(logging.DEBUG) @responses.activate def test_getObjectsList(self): # Prepare mock response - responses.add(responses.GET, 'http://ccdb-test.cern.ch:8080/latest/.*', + responses.add(responses.GET, CCDB_TEST_URL + '/latest/.*', self.content_objectslist, status=200) # get list of objects - objectsList = self.ccdb.getObjectsList() - print(f"{objectsList}") - self.assertEqual(len(objectsList), 3) - self.assertEqual(objectsList[0], 'Test') - self.assertEqual(objectsList[1], 'ITSQcTask/ChipStaveCheck') + objects_list = self.ccdb.getObjectsList() + print(f"{objects_list}") + self.assertEqual(len(objects_list), 3) + self.assertEqual(objects_list[0], 'Test') + self.assertEqual(objects_list[1], 'ITSQcTask/ChipStaveCheck') @responses.activate def test_getVersionsList(self): # Prepare mock response object_path='asdfasdf/example' - responses.add(responses.GET, 'http://ccdb-test.cern.ch:8080/browse/'+object_path, + responses.add(responses.GET, CCDB_TEST_URL + '/browse/'+object_path, self.content_versionslist, status=200) # get versions for object - versionsList: List[ObjectVersion] = self.ccdb.getVersionsList(object_path) - print(f"{versionsList}") - self.assertEqual(len(versionsList), 2) - self.assertEqual(versionsList[0].path, object_path) - self.assertEqual(versionsList[1].path, object_path) - self.assertEqual(versionsList[1].metadata["custom"], "34") + versions_list: List[ObjectVersion] = self.ccdb.getVersionsList(object_path) + print(f"{versions_list}") + self.assertEqual(len(versions_list), 2) + self.assertEqual(versions_list[0].path, object_path) + self.assertEqual(versions_list[1].path, object_path) + self.assertEqual(versions_list[1].metadata["custom"], "34") if __name__ == '__main__': unittest.main() diff --git a/Framework/script/RepoCleaner/tests/test_MultiplePerRun.py b/Framework/script/RepoCleaner/tests/test_MultiplePerRun.py index 8a3f53ce0f..7bbb759e7b 100644 --- a/Framework/script/RepoCleaner/tests/test_MultiplePerRun.py +++ b/Framework/script/RepoCleaner/tests/test_MultiplePerRun.py @@ -1,14 +1,13 @@ import logging import time import unittest -from datetime import timedelta, date, datetime -from typing import List -from qcrepocleaner.Ccdb import Ccdb, ObjectVersion +import test_utils +from qcrepocleaner.Ccdb import Ccdb from qcrepocleaner.rules import multiple_per_run -class TestProduction(unittest.TestCase): +class TestMultiplePerRun(unittest.TestCase): """ This test pushes data to the CCDB and then run the Rule Production and then check. It does it for several use cases. @@ -21,7 +20,7 @@ class TestProduction(unittest.TestCase): one_minute = 60000 def setUp(self): - self.ccdb = Ccdb('http://137.138.47.222:8080') + self.ccdb = Ccdb(test_utils.CCDB_TEST_URL) # ccdb-test but please use IP to avoid DNS alerts self.extra = {"interval_between_versions": "90", "migrate_to_EOS": False} self.path = "qc/TST/MO/repo/test" @@ -36,7 +35,8 @@ def test_1_finished_run(self): # Prepare data test_path = self.path + "/test_1_finished_run" - self.prepare_data(test_path, [150], [22*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [150], [22*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -56,7 +56,8 @@ def test_2_runs(self): # Prepare data test_path = self.path + "/test_2_runs" - self.prepare_data(test_path, [150, 150], [3*60, 20*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [150, 150], [3 * 60, 20 * 60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -77,7 +78,8 @@ def test_5_runs(self): # Prepare data test_path = self.path + "/test_5_runs" - self.prepare_data(test_path, [1*60, 2*60, 3*60+10, 4*60, 5*60], + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [1*60, 2*60, 3*60+10, 4*60, 5*60], [60, 120, 190, 240, 24*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, @@ -85,11 +87,11 @@ def test_5_runs(self): self.assertEqual(stats["deleted"], 60+120+190+240+300-18) self.assertEqual(stats["preserved"], 18) self.assertEqual(stats["updated"], 0) - + # and now re-run it to make sure we preserve the state stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) - + self.assertEqual(stats["deleted"], 0) self.assertEqual(stats["preserved"], 18) self.assertEqual(stats["updated"], 0) @@ -105,7 +107,8 @@ def test_run_one_object(self): # Prepare data test_path = self.path + "/test_run_one_object" - self.prepare_data(test_path, [1], [25*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [1], [25*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -125,7 +128,8 @@ def test_run_two_object(self): # Prepare data test_path = self.path + "/test_run_two_object" - self.prepare_data(test_path, [2], [25*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [2], [25*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -145,7 +149,8 @@ def test_3_runs_with_period(self): # Prepare data test_path = self.path + "/test_3_runs_with_period" - self.prepare_data(test_path, [30,30, 30], [120,120,25*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [30,30, 30], [120,120,25*60], 123) current_timestamp = int(time.time() * 1000) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=current_timestamp-29*60*60*1000, @@ -160,49 +165,8 @@ def test_asdf(self): datefmt='%d-%b-%y %H:%M:%S') logging.getLogger().setLevel(int(10)) test_path = self.path + "/asdf" - self.prepare_data(test_path, [70, 70, 70], [6*60, 6*60, 25*60], 55555) - - def prepare_data(self, path, run_durations: List[int], time_till_next_run: List[int], first_run_number: int): - """ - Prepare a data set populated with a number of runs. - run_durations contains the duration of each of these runs in minutes - time_till_next_run is the time between two runs in minutes. - The first element of time_till_next_run is used to separate the first two runs. - Both lists must have the same number of elements. - """ - - if len(run_durations) != len(time_till_next_run): - logging.error(f"run_durations and time_till_next_run must have the same length") - exit(1) - - total_duration = 0 - for a, b in zip(run_durations, time_till_next_run): - total_duration += a + b - logging.info(f"Total duration : {total_duration}") - - current_timestamp = int(time.time() * 1000) - cursor = current_timestamp - total_duration * 60 * 1000 - first_ts = cursor - data = {'part': 'part'} - run = first_run_number - - for run_duration, time_till_next in zip(run_durations, time_till_next_run): - metadata = {'RunNumber': str(run)} - logging.debug(f"cursor: {cursor}") - logging.debug(f"time_till_next: {time_till_next}") - - for i in range(run_duration): - to_ts = cursor + 24 * 60 * 60 * 1000 # a day - metadata2 = {**metadata, 'Created': str(cursor)} - version_info = ObjectVersion(path=path, validFrom=cursor, validTo=to_ts, metadata=metadata2, - createdAt=cursor) - self.ccdb.putVersion(version=version_info, data=data) - cursor += 1 * 60 * 1000 - - run += 1 - cursor += time_till_next * 60 * 1000 - - return first_ts + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [70, 70, 70], [6*60, 6*60, 25*60], 55555) if __name__ == '__main__': diff --git a/Framework/script/RepoCleaner/tests/test_MultiplePerRun_deleteFirstLast.py b/Framework/script/RepoCleaner/tests/test_MultiplePerRun_deleteFirstLast.py index d636c1e370..3b7780a570 100644 --- a/Framework/script/RepoCleaner/tests/test_MultiplePerRun_deleteFirstLast.py +++ b/Framework/script/RepoCleaner/tests/test_MultiplePerRun_deleteFirstLast.py @@ -1,14 +1,14 @@ import logging import time import unittest -from datetime import timedelta, date, datetime -from typing import List -from qcrepocleaner.Ccdb import Ccdb, ObjectVersion +from qcrepocleaner.Ccdb import Ccdb from qcrepocleaner.rules import multiple_per_run +from tests import test_utils +from tests.test_utils import CCDB_TEST_URL -class TestProduction(unittest.TestCase): +class TestMultiplePerRunDeleteFirstLast(unittest.TestCase): """ This test pushes data to the CCDB and then run the Rule Production and then check. It does it for several use cases. @@ -21,7 +21,7 @@ class TestProduction(unittest.TestCase): one_minute = 60000 def setUp(self): - self.ccdb = Ccdb('http://137.138.47.222:8080') + self.ccdb = Ccdb(CCDB_TEST_URL) # ccdb-test but please use IP to avoid DNS alerts self.extra = {"interval_between_versions": "90", "migrate_to_EOS": False, "delete_first_last": True} self.path = "qc/TST/MO/repo/test" @@ -36,7 +36,8 @@ def test_1_finished_run(self): # Prepare data test_path = self.path + "/test_1_finished_run" - self.prepare_data(test_path, [150], [22*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [150], [22*60], 123) objectsBefore = self.ccdb.getVersionsList(test_path) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, @@ -61,7 +62,8 @@ def test_2_runs(self): # Prepare data test_path = self.path + "/test_2_runs" - self.prepare_data(test_path, [150, 150], [3*60, 20*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [150, 150], [3*60, 20*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -82,7 +84,8 @@ def test_5_runs(self): # Prepare data test_path = self.path + "/test_5_runs" - self.prepare_data(test_path, [1*60, 2*60, 3*60+10, 4*60, 5*60], + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [1*60, 2*60, 3*60+10, 4*60, 5*60], [60, 120, 190, 240, 24*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, @@ -90,11 +93,11 @@ def test_5_runs(self): self.assertEqual(stats["deleted"], 60+120+190+240+300-18) self.assertEqual(stats["preserved"], 18) self.assertEqual(stats["updated"], 0) - + # and now re-run it to make sure we preserve the state stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) - + self.assertEqual(stats["deleted"], 0) self.assertEqual(stats["preserved"], 18) self.assertEqual(stats["updated"], 0) @@ -110,7 +113,8 @@ def test_run_one_object(self): # Prepare data test_path = self.path + "/test_run_one_object" - self.prepare_data(test_path, [1], [25*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [1], [25*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -130,7 +134,8 @@ def test_run_two_object(self): # Prepare data test_path = self.path + "/test_run_two_object" - self.prepare_data(test_path, [2], [25*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [2], [25*60], 123) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=1, to_timestamp=self.in_ten_years, extra_params=self.extra) @@ -150,7 +155,8 @@ def test_3_runs_with_period(self): # Prepare data test_path = self.path + "/test_3_runs_with_period" - self.prepare_data(test_path, [30,30, 30], [120,120,25*60], 123) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [30,30, 30], [120,120,25*60], 123) current_timestamp = int(time.time() * 1000) stats = multiple_per_run.process(self.ccdb, test_path, delay=60*24, from_timestamp=current_timestamp-29*60*60*1000, @@ -165,49 +171,8 @@ def test_asdf(self): datefmt='%d-%b-%y %H:%M:%S') logging.getLogger().setLevel(int(10)) test_path = self.path + "/asdf" - self.prepare_data(test_path, [70, 70, 70], [6*60, 6*60, 25*60], 55555) - - def prepare_data(self, path, run_durations: List[int], time_till_next_run: List[int], first_run_number: int): - """ - Prepare a data set populated with a number of runs. - run_durations contains the duration of each of these runs in minutes - time_till_next_run is the time between two runs in minutes. - The first element of time_till_next_run is used to separate the first two runs. - Both lists must have the same number of elements. - """ - - if len(run_durations) != len(time_till_next_run): - logging.error(f"run_durations and time_till_next_run must have the same length") - exit(1) - - total_duration = 0 - for a, b in zip(run_durations, time_till_next_run): - total_duration += a + b - logging.info(f"Total duration : {total_duration}") - - current_timestamp = int(time.time() * 1000) - cursor = current_timestamp - total_duration * 60 * 1000 - first_ts = cursor - data = {'part': 'part'} - run = first_run_number - - for run_duration, time_till_next in zip(run_durations, time_till_next_run): - metadata = {'RunNumber': str(run)} - logging.debug(f"cursor: {cursor}") - logging.debug(f"time_till_next: {time_till_next}") - - for i in range(run_duration): - to_ts = cursor + 24 * 60 * 60 * 1000 # a day - metadata2 = {**metadata, 'Created': str(cursor)} - version_info = ObjectVersion(path=path, validFrom=cursor, validTo=to_ts, metadata=metadata2, - createdAt=cursor) - self.ccdb.putVersion(version=version_info, data=data) - cursor += 1 * 60 * 1000 - - run += 1 - cursor += time_till_next * 60 * 1000 - - return first_ts + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [70, 70, 70], [6*60, 6*60, 25*60], 55555) if __name__ == '__main__': diff --git a/Framework/script/RepoCleaner/tests/test_Production.py b/Framework/script/RepoCleaner/tests/test_Production.py index d0d8163a2a..24aa73ca09 100644 --- a/Framework/script/RepoCleaner/tests/test_Production.py +++ b/Framework/script/RepoCleaner/tests/test_Production.py @@ -1,10 +1,12 @@ import logging import time import unittest -from datetime import timedelta, date, datetime +from datetime import timedelta, datetime -from Ccdb import Ccdb, ObjectVersion -from rules import production +from qcrepocleaner.Ccdb import Ccdb, ObjectVersion +from qcrepocleaner.rules import production +from tests import test_utils +from tests.test_utils import CCDB_TEST_URL class TestProduction(unittest.TestCase): @@ -20,7 +22,7 @@ class TestProduction(unittest.TestCase): one_minute = 60000 def setUp(self): - self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080') + self.ccdb = Ccdb(CCDB_TEST_URL) self.extra = {"delay_first_trimming": "30", "period_btw_versions_first": "10", "delay_final_trimming": "60", "period_btw_versions_final": "60"} self.path = "qc/TST/MO/repo/test" @@ -40,7 +42,8 @@ def test_start_run(self): # Prepare data test_path = self.path + "/test_start_run" - self.prepare_data(test_path, 25, 30, True, 60, False) + test_utils.clean_data(self.ccdb, test_path) + self.prepare_data_for_prod_test(test_path, 25, 30, True, 60, False) production.eor_dict.pop(int(self.run), None) stats = production.process(self.ccdb, test_path, 30, 1, self.in_ten_years, self.extra) @@ -74,7 +77,8 @@ def test_start_run_period(self): # Prepare data test_path = self.path + "/test_start_run_period" - first_ts = self.prepare_data(test_path, 25, 30, True, 60, False) + test_utils.clean_data(self.ccdb, test_path) + first_ts = self.prepare_data_for_prod_test(test_path, 25, 30, True, 60, False) logging.getLogger().debug(f"first_ts : {first_ts}") # everything outside the period @@ -111,7 +115,8 @@ def test_mid_run(self): # Prepare data test_path = self.path + "/test_mid_run" - self.prepare_data(test_path, 90) + test_utils.clean_data(self.ccdb, test_path) + self.prepare_data_for_prod_test(test_path, 90) production.eor_dict.pop(int(self.run), None) stats = production.process(self.ccdb, test_path, 30, 1, self.in_ten_years, self.extra) @@ -141,7 +146,8 @@ def test_mid_run_period(self): # Prepare data test_path = self.path + "/test_mid_run_period" - first_ts = self.prepare_data(test_path, 90) + test_utils.clean_data(self.ccdb, test_path) + first_ts = self.prepare_data_for_prod_test(test_path, 90) logging.getLogger().debug(f"first_ts : {first_ts}") objects_versions = self.ccdb.getVersionsList(test_path) @@ -171,7 +177,8 @@ def test_run_finished(self): # Prepare data test_path = self.path + "/test_run_finished" - self.prepare_data(test_path, 290, 190, False, 0, True) + test_utils.clean_data(self.ccdb, test_path) + self.prepare_data_for_prod_test(test_path, 290, 190, False, 0, True) production.eor_dict[int(self.run)] = datetime.now() - timedelta(minutes=100) stats = production.process(self.ccdb, test_path, 30, 1, self.in_ten_years, self.extra) @@ -198,7 +205,8 @@ def test_run_finished_period(self): # Prepare data test_path = self.path + "/test_run_finished_period" - first_ts = self.prepare_data(test_path, 290, 190, False, 0, True) + test_utils.clean_data(self.ccdb, test_path) + first_ts = self.prepare_data_for_prod_test(test_path, 290, 190, False, 0, True) logging.getLogger().debug(f"first_ts : {first_ts}") production.eor_dict[int(self.run)] = datetime.now() - timedelta(minutes=100) @@ -214,8 +222,9 @@ def test_run_finished_period(self): self.assertTrue("trim1" not in objects_versions[6].metadata) self.assertTrue("preservation" in objects_versions[6].metadata) - def prepare_data(self, path, minutes_since_sor, duration_first_part=30, skip_first_part=False, - minutes_second_part=60, skip_second_part=False): + + def prepare_data_for_prod_test(self, path, minutes_since_sor, duration_first_part=30, skip_first_part=False, + minutes_second_part=60, skip_second_part=False): """ Prepare a data set starting `minutes_since_sor` in the past. The data is layed out in two parts @@ -242,7 +251,7 @@ def prepare_data(self, path, minutes_since_sor, duration_first_part=30, skip_fir if first_ts > from_ts: first_ts = from_ts to_ts = from_ts + 24 * 60 * 60 * 1000 # a day - version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata) + version_info = ObjectVersion(path=path, validFrom=from_ts, createdAt=from_ts, validTo=to_ts, metadata=metadata) self.ccdb.putVersion(version=version_info, data=data) cursor = cursor + duration_first_part * 60 * 1000 @@ -257,7 +266,7 @@ def prepare_data(self, path, minutes_since_sor, duration_first_part=30, skip_fir if first_ts > from_ts: first_ts = from_ts to_ts = from_ts + 24 * 60 * 60 * 1000 # a day - version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata) + version_info = ObjectVersion(path=path, validFrom=from_ts, createdAt=from_ts, validTo=to_ts, metadata=metadata) self.ccdb.putVersion(version=version_info, data=data) return first_ts diff --git a/Framework/script/RepoCleaner/tests/test_last_only.py b/Framework/script/RepoCleaner/tests/test_last_only.py index a8e1228789..5a58fd1a97 100644 --- a/Framework/script/RepoCleaner/tests/test_last_only.py +++ b/Framework/script/RepoCleaner/tests/test_last_only.py @@ -1,12 +1,11 @@ import logging import time import unittest -from datetime import timedelta, date, datetime - -from Ccdb import Ccdb, ObjectVersion -from rules import last_only - +from qcrepocleaner.Ccdb import Ccdb +from qcrepocleaner.rules import last_only +from tests import test_utils +from tests.test_utils import CCDB_TEST_URL class TestLastOnly(unittest.TestCase): @@ -22,7 +21,7 @@ class TestLastOnly(unittest.TestCase): one_minute = 60000 def setUp(self): - self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080') + self.ccdb = Ccdb(CCDB_TEST_URL) # ccdb-test but please use IP to avoid DNS alerts self.extra = {} self.path = "qc/TST/MO/repo/test" self.run = 124321 @@ -30,7 +29,7 @@ def setUp(self): def test_last_only(self): """ - 59 versions + 60 versions grace period of 30 minutes """ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', @@ -39,10 +38,11 @@ def test_last_only(self): # Prepare data test_path = self.path + "/test_last_only" - self.prepare_data(test_path, 60) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [60], [0], 123) stats = last_only.process(self.ccdb, test_path, 30, 1, self.in_ten_years, self.extra) - self.assertEqual(stats["deleted"], 30) + self.assertEqual(stats["deleted"], 31) # 31 because between the time we produced the 60 versions and now, there is a shift self.assertEqual(stats["preserved"], 29) objects_versions = self.ccdb.getVersionsList(test_path) @@ -51,7 +51,7 @@ def test_last_only(self): def test_last_only_period(self): """ - 59 versions + 60 versions no grace period only 20 minutes in the middle are in the period """ @@ -61,40 +61,17 @@ def test_last_only_period(self): # Prepare data test_path = self.path + "/test_last_only_period" - self.prepare_data(test_path, 60) + test_utils.clean_data(self.ccdb, test_path) + test_utils.prepare_data(self.ccdb, test_path, [60], [0], 123) current_timestamp = int(time.time() * 1000) - stats = last_only.process(self.ccdb, test_path, 0, current_timestamp-41*60*1000, current_timestamp-19*60*1000, self.extra) - self.assertEqual(stats["deleted"], 19) + stats = last_only.process(self.ccdb, test_path, 0, current_timestamp-40*60*1000, current_timestamp-20*60*1000, self.extra) + self.assertEqual(stats["deleted"], 20) self.assertEqual(stats["preserved"], 40) objects_versions = self.ccdb.getVersionsList(test_path) self.assertEqual(len(objects_versions), 40) - def prepare_data(self, path, since_minutes): - """ - Prepare a data set starting `since_minutes` in the past. - 1 version per minute - Each data has a different run number. - """ - - current_timestamp = int(time.time() * 1000) - data = {'part': 'part'} - run = 1234 - counter = 0 - - for x in range(since_minutes): - counter = counter + 1 - from_ts = current_timestamp - x * 60 * 1000 - to_ts = current_timestamp - metadata = {'RunNumber': str(run)} - run = run + 1 - version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata) - self.ccdb.putVersion(version=version_info, data=data) - - logging.debug(f"counter : {counter}" ) - - if __name__ == '__main__': unittest.main() diff --git a/Framework/script/RepoCleaner/tests/test_repoCleaner.py b/Framework/script/RepoCleaner/tests/test_repoCleaner.py index 8c907dd6e1..a3a518630b 100644 --- a/Framework/script/RepoCleaner/tests/test_repoCleaner.py +++ b/Framework/script/RepoCleaner/tests/test_repoCleaner.py @@ -1,11 +1,12 @@ -import unittest -import yaml - import importlib -from importlib.util import spec_from_loader, module_from_spec -from importlib.machinery import SourceFileLoader import os import sys +import unittest +from importlib.machinery import SourceFileLoader +from importlib.util import spec_from_loader + +import yaml + def import_path(path): # needed because o2-qc-repo-cleaner has no suffix module_name = os.path.basename(path).replace('-', '_') @@ -18,11 +19,10 @@ def import_path(path): # needed because o2-qc-repo-cleaner has no suffix sys.modules[module_name] = module return module - repoCleaner = import_path("../qcrepocleaner/o2-qc-repo-cleaner") parseConfig = repoCleaner.parseConfig Rule = repoCleaner.Rule -findMatchingRule = repoCleaner.findMatchingRule +findMatchingRules = repoCleaner.findMatchingRules class TestRepoCleaner(unittest.TestCase): @@ -64,12 +64,12 @@ def test_findMatchingRule(self): rules.append(Rule('task1/obj1', '120', 'policy1')) rules.append(Rule('task1/obj1', '120', 'policy2')) rules.append(Rule('task2/.*', '120', 'policy3')) - self.assertEqual(findMatchingRule(rules, 'task1/obj1').policy, 'policy1') - self.assertNotEqual(findMatchingRule(rules, 'task1/obj1').policy, 'policy2') - self.assertEqual(findMatchingRule(rules, 'task3/obj1'), None) - self.assertEqual(findMatchingRule(rules, 'task2/obj1/obj1').policy, 'policy3') + self.assertEqual(findMatchingRules(rules, 'task1/obj1')[0].policy, 'policy1') + self.assertNotEqual(findMatchingRules(rules, 'task1/obj1')[0].policy, 'policy2') + self.assertEqual(findMatchingRules(rules, 'task3/obj1'), []) + self.assertEqual(findMatchingRules(rules, 'task2/obj1/obj1')[0].policy, 'policy3') rules.append(Rule('.*', '0', 'policyAll')) - self.assertEqual(findMatchingRule(rules, 'task3/obj1').policy, 'policyAll') + self.assertEqual(findMatchingRules(rules, 'task3/obj1')[0].policy, 'policyAll') if __name__ == '__main__': diff --git a/Framework/script/RepoCleaner/tests/test_utils.py b/Framework/script/RepoCleaner/tests/test_utils.py new file mode 100644 index 0000000000..9abaf7b069 --- /dev/null +++ b/Framework/script/RepoCleaner/tests/test_utils.py @@ -0,0 +1,55 @@ +import logging +import time +from typing import List + +from qcrepocleaner.Ccdb import ObjectVersion + +CCDB_TEST_URL = 'http://128.142.249.62:8080' + +def clean_data(ccdb, path): + versions = ccdb.getVersionsList(path) + for v in versions: + ccdb.deleteVersion(v) + + +def prepare_data(ccdb, path, run_durations: List[int], time_till_next_run: List[int], first_run_number: int): + """ + Prepare a data set populated with a number of runs. + run_durations contains the duration of each of these runs in minutes + time_till_next_run is the time between two runs in minutes. + The first element of time_till_next_run is used to separate the first two runs. + Both lists must have the same number of elements. + """ + + if len(run_durations) != len(time_till_next_run): + logging.error(f"run_durations and time_till_next_run must have the same length") + exit(1) + + total_duration = 0 + for a, b in zip(run_durations, time_till_next_run): + total_duration += a + b + logging.info(f"Total duration : {total_duration}") + + current_timestamp = int(time.time() * 1000) + cursor = current_timestamp - total_duration * 60 * 1000 + first_ts = cursor + data = {'part': 'part'} + run = first_run_number + + for run_duration, time_till_next in zip(run_durations, time_till_next_run): + metadata = {'RunNumber': str(run)} + logging.debug(f"cursor: {cursor}") + logging.debug(f"time_till_next: {time_till_next}") + + for i in range(run_duration): + to_ts = cursor + 24 * 60 * 60 * 1000 # a day + metadata2 = {**metadata, 'Created': str(cursor)} + version_info = ObjectVersion(path=path, validFrom=cursor, validTo=to_ts, metadata=metadata2, + createdAt=cursor) + ccdb.putVersion(version=version_info, data=data) + cursor += 1 * 60 * 1000 + + run += 1 + cursor += time_till_next * 60 * 1000 + + return first_ts diff --git a/Framework/script/RepoCleaner/versionsList.json b/Framework/script/RepoCleaner/tests/versionsList.json similarity index 95% rename from Framework/script/RepoCleaner/versionsList.json rename to Framework/script/RepoCleaner/tests/versionsList.json index ae3e9a2aae..f5f0a8887a 100644 --- a/Framework/script/RepoCleaner/versionsList.json +++ b/Framework/script/RepoCleaner/tests/versionsList.json @@ -3,6 +3,7 @@ { "id": "0c576bb0-7304-11e9-8d02-200114580202", "validFrom": "1557479683554", + "Created": "1557479683554", "validUntil": "1872839683554", "initialValidity": "1872839683554", "createTime": "1557479683563", @@ -20,6 +21,7 @@ { "id": "06fb1e80-72f7-11e9-8d02-200114580202", "validFrom": "1557474091106", + "Created": "1557474091106", "validUntil": "1557479683553", "initialValidity": "1872834091106", "createTime": "1557474091112",