diff --git a/dispatcher/backend/src/common/constants.py b/dispatcher/backend/src/common/constants.py index 43d6c5e9..bc362b40 100644 --- a/dispatcher/backend/src/common/constants.py +++ b/dispatcher/backend/src/common/constants.py @@ -97,3 +97,6 @@ REQ_TIMEOUT_NOTIFICATIONS = int(os.getenv("REQ_TIMEOUT_NOTIFICATIONS", 5)) REQ_TIMEOUT_CMS = int(os.getenv("REQ_TIMEOUT_CMS", 10)) REQ_TIMEOUT_GHCR = int(os.getenv("REQ_TIMEOUT_GHCR", 10)) + +# OFFLINERS +ZIMIT_USE_RELAXED_SCHEMA = bool(os.getenv("ZIMIT_USE_RELAXED_SCHEMA")) diff --git a/dispatcher/backend/src/common/schemas/models.py b/dispatcher/backend/src/common/schemas/models.py index bec68c17..61e4e64c 100644 --- a/dispatcher/backend/src/common/schemas/models.py +++ b/dispatcher/backend/src/common/schemas/models.py @@ -2,6 +2,7 @@ from marshmallow import Schema, fields, pre_load, validate, validates_schema +from common import constants from common.enum import DockerImageName, Offliner, Platform from common.schemas import SerializableSchema, String from common.schemas.fields import ( @@ -33,6 +34,7 @@ WikihowFlagsSchema, YoutubeFlagsSchema, ZimitFlagsSchema, + ZimitFlagsSchemaRelaxed, ) @@ -84,7 +86,9 @@ def get_offliner_schema(offliner): Offliner.nautilus: NautilusFlagsSchema, Offliner.ted: TedFlagsSchema, Offliner.openedx: OpenedxFlagsSchema, - Offliner.zimit: ZimitFlagsSchema, + Offliner.zimit: ZimitFlagsSchemaRelaxed + if constants.ZIMIT_USE_RELAXED_SCHEMA + else ZimitFlagsSchema, Offliner.kolibri: KolibriFlagsSchema, Offliner.wikihow: WikihowFlagsSchema, Offliner.ifixit: IFixitFlagsSchema, diff --git a/dispatcher/backend/src/common/schemas/offliners/__init__.py b/dispatcher/backend/src/common/schemas/offliners/__init__.py index c0748dd7..252b1e70 100644 --- a/dispatcher/backend/src/common/schemas/offliners/__init__.py +++ b/dispatcher/backend/src/common/schemas/offliners/__init__.py @@ -10,7 +10,7 @@ from common.schemas.offliners.ted import TedFlagsSchema from common.schemas.offliners.wikihow import WikihowFlagsSchema from common.schemas.offliners.youtube import YoutubeFlagsSchema -from common.schemas.offliners.zimit import ZimitFlagsSchema +from common.schemas.offliners.zimit import ZimitFlagsSchema, ZimitFlagsSchemaRelaxed __all__ = ( "FreeCodeCampFlagsSchema", @@ -25,6 +25,7 @@ "WikihowFlagsSchema", "YoutubeFlagsSchema", "ZimitFlagsSchema", + "ZimitFlagsSchemaRelaxed", ) diff --git a/dispatcher/backend/src/common/schemas/offliners/zimit.py b/dispatcher/backend/src/common/schemas/offliners/zimit.py index 54556250..ce63d38a 100644 --- a/dispatcher/backend/src/common/schemas/offliners/zimit.py +++ b/dispatcher/backend/src/common/schemas/offliners/zimit.py @@ -468,3 +468,19 @@ class Meta: data_key="adminEmail", required=False, ) + + +class ZimitFlagsSchemaRelaxed(ZimitFlagsSchema): + """A Zimit flags schema with relaxed constraints on validation + + For now, only zim_file name is not checked anymore. Typically used for youzim.it + """ + + zim_file = String( + metadata={ + "label": "ZIM filename", + "description": "ZIM file name (based on --name if not provided). " + "Make sure to end with _{period}.zim", + }, + data_key="zim-file", + ) diff --git a/dispatcher/backend/src/routes/schedules/schedule.py b/dispatcher/backend/src/routes/schedules/schedule.py index 1502a89d..02900714 100644 --- a/dispatcher/backend/src/routes/schedules/schedule.py +++ b/dispatcher/backend/src/routes/schedules/schedule.py @@ -125,6 +125,10 @@ def post(self, session: so.Session, token: AccessToken.Payload): try: document = ScheduleSchema().load(request.get_json()) + flags_schema = ScheduleConfigSchema.get_offliner_schema( + document["config"]["task_name"] + ) + flags_schema().load(document["config"]["flags"]) except ValidationError as e: raise InvalidRequestJSON(e.messages) diff --git a/dispatcher/backend/src/tests/integration/routes/schedules/test_freecodecamp.py b/dispatcher/backend/src/tests/integration/routes/schedules/test_freecodecamp.py index 11ceebfa..0533dc93 100644 --- a/dispatcher/backend/src/tests/integration/routes/schedules/test_freecodecamp.py +++ b/dispatcher/backend/src/tests/integration/routes/schedules/test_freecodecamp.py @@ -1,9 +1,9 @@ class TestFreeCodeCamp: - def test_create_freecodecamp_schedule( + def test_create_freecodecamp_schedule_ok( self, client, access_token, garbage_collector ): schedule = { - "name": "fcc_javascript_test", + "name": "fcc_javascript_test_ok", "category": "freecodecamp", "enabled": False, "tags": [], @@ -14,7 +14,13 @@ def test_create_freecodecamp_schedule( "image": {"name": "openzim/freecodecamp", "tag": "1.0.0"}, "monitor": False, "platform": None, - "flags": {}, + "flags": { + "course": ("somecourse"), + "language": "eng", + "name": "acourse", + "title": "a title", + "description": "a description", + }, "resources": {"cpu": 3, "memory": 1024, "disk": 0}, }, "periodicity": "quarterly", @@ -24,9 +30,10 @@ def test_create_freecodecamp_schedule( response = client.post( url, json=schedule, headers={"Authorization": access_token} ) - assert response.status_code == 201 response_data = response.get_json() - garbage_collector.add_schedule_id(response_data["_id"]) + if "_id" in response_data: + garbage_collector.add_schedule_id(response_data["_id"]) + assert response.status_code == 201 patch_data = { "enabled": True, @@ -50,3 +57,48 @@ def test_create_freecodecamp_schedule( url, json=patch_data, headers={"Authorization": access_token} ) assert response.status_code == 204 + + def test_create_freecodecamp_schedule_ko( + self, client, access_token, garbage_collector + ): + schedule = { + "name": "fcc_javascript_test_ko", + "category": "freecodecamp", + "enabled": False, + "tags": [], + "language": {"code": "fr", "name_en": "French", "name_native": "Français"}, + "config": { + "task_name": "freecodecamp", + "warehouse_path": "/freecodecamp", + "image": {"name": "openzim/freecodecamp", "tag": "1.0.0"}, + "monitor": False, + "platform": None, + "flags": { + "course": ("somecourse"), + "language": "eng", + "name": "acourse", + "title": "a title", + "description": ( + "a description which is way way way way way way way way way " + "way way way way way way way way way too long" + ), + }, + "resources": {"cpu": 3, "memory": 1024, "disk": 0}, + }, + "periodicity": "quarterly", + } + + url = "/schedules/" + response = client.post( + url, json=schedule, headers={"Authorization": access_token} + ) + response_data = response.get_json() + if "_id" in response_data: + garbage_collector.add_schedule_id(response_data["_id"]) + assert response.status_code == 400 + assert "error_description" in response_data + assert "description" in response_data["error_description"] + assert ( + "Longer than maximum length 80." + in response_data["error_description"]["description"] + ) diff --git a/dispatcher/backend/src/tests/integration/routes/schedules/test_schedule.py b/dispatcher/backend/src/tests/integration/routes/schedules/test_schedule.py index 70249d32..c5c03315 100644 --- a/dispatcher/backend/src/tests/integration/routes/schedules/test_schedule.py +++ b/dispatcher/backend/src/tests/integration/routes/schedules/test_schedule.py @@ -265,7 +265,10 @@ def test_create_schedule_missing_keys(self, client, access_token, key): "flags": {}, "image": {"name": "openzim/phet", "tag": "latest"}, "monitor": False, + "platform": None, + "resources": {"cpu": 3, "memory": 1024, "disk": 0}, }, + "periodicity": "quarterly", } del schedule[key] @@ -273,7 +276,14 @@ def test_create_schedule_missing_keys(self, client, access_token, key): response = client.post( url, json=schedule, headers={"Authorization": access_token} ) + response_data = response.get_json() assert response.status_code == 400 + assert "error_description" in response_data + assert key in response_data["error_description"] + assert ( + "Missing data for required field." + in response_data["error_description"][key] + ) @pytest.mark.parametrize("key", ["warehouse_path", "flags", "image"]) def test_create_schedule_missing_config_keys(self, client, access_token, key): @@ -293,7 +303,10 @@ def test_create_schedule_missing_config_keys(self, client, access_token, key): "flags": {}, "image": {"name": "openzim/phet", "tag": "latest"}, "monitor": False, + "platform": None, + "resources": {"cpu": 3, "memory": 1024, "disk": 0}, }, + "periodicity": "quarterly", } del schedule["config"][key] @@ -301,7 +314,51 @@ def test_create_schedule_missing_config_keys(self, client, access_token, key): response = client.post( url, json=schedule, headers={"Authorization": access_token} ) + response_data = response.get_json() assert response.status_code == 400 + assert "error_description" in response_data + assert "config" in response_data["error_description"] + assert key in response_data["error_description"]["config"] + assert ( + "Missing data for required field." + in response_data["error_description"]["config"][key] + ) + + def test_create_schedule_flags_ko(self, client, access_token): + schedule = { + "name": "ifixit flags ko", + "category": "ifixit", + "enabled": False, + "tags": [], + "language": { + "code": "en", + "name_en": "English", + "name_native": "English", + }, + "config": { + "task_name": "ifixit", + "warehouse_path": "/ifixit", + "flags": {}, + "image": {"name": "openzim/ifixit", "tag": "latest"}, + "monitor": False, + "platform": "ifixit", + "resources": {"cpu": 3, "memory": 1024, "disk": 0}, + }, + "periodicity": "quarterly", + } + + url = "/schedules/" + response = client.post( + url, json=schedule, headers={"Authorization": access_token} + ) + response_data = response.get_json() + assert response.status_code == 400 + assert "error_description" in response_data + assert "language" in response_data["error_description"] + assert ( + "Missing data for required field." + in response_data["error_description"]["language"] + ) def test_image_names(self, client, schedule, access_token): url = "/schedules/{}/image-names".format(schedule["name"]) diff --git a/dispatcher/backend/src/tests/integration/routes/schedules/test_zimit.py b/dispatcher/backend/src/tests/integration/routes/schedules/test_zimit.py new file mode 100644 index 00000000..a705cdbc --- /dev/null +++ b/dispatcher/backend/src/tests/integration/routes/schedules/test_zimit.py @@ -0,0 +1,107 @@ +from collections import namedtuple +from typing import List + +import pytest + +from common import constants + + +def update_dict(dict: dict, key_path: str, new_value: any): + """Update a nested key value in a dictionary + + E.g if key_path is 'key1.subkey2', then dict['key1']['subkey2'] will be set""" + + # Split the key path into individual keys + keys = key_path.split(".") + + # Initialize a reference to the nested dictionary + current_dict = dict + + # Navigate through the nested structure + for key in keys[:-1]: + current_dict = current_dict[key] + + # Update the value using the last key + current_dict[keys[-1]] = new_value + + +class TestZimit: + mod = namedtuple("Modification", ["key_path", "new_value"]) + + @pytest.mark.parametrize( + "modifications, relaxed_schema, succeeds", + [ + ( + [mod(key_path="name", new_value="zimit_test_good_name_not_relaxed")], + False, + True, + ), + ( + [mod(key_path="name", new_value="zimit_test_good_name_relaxed")], + True, + True, + ), + ( + [ + mod(key_path="name", new_value="zimit_test_bad_name_not_relaxed"), + mod(key_path="config.flags.zim-file", new_value="bad_name"), + ], + False, + False, + ), + ( + [ + mod(key_path="name", new_value="zimit_test_bad_name_relaxed"), + mod(key_path="config.flags.zim-file", new_value="bad_name"), + ], + True, + True, + ), + ], + ) + def test_create_zimit_schedule_generic( + self, + client, + access_token, + garbage_collector, + modifications: List[mod], + relaxed_schema: bool, + succeeds: bool, + ): + constants.ZIMIT_USE_RELAXED_SCHEMA = relaxed_schema + schedule = { + "name": "zimit_test_ok", + "category": "other", + "enabled": False, + "tags": [], + "language": {"code": "fr", "name_en": "French", "name_native": "Français"}, + "config": { + "task_name": "zimit", + "warehouse_path": "/other", + "image": {"name": "openzim/zimit", "tag": "1.0.0"}, + "monitor": False, + "platform": None, + "flags": { + "name": "acme", + "url": "https://www.acme.com", + "zim-file": "acme_en_all_{period}.zim", + }, + "resources": {"cpu": 3, "memory": 1024, "disk": 0}, + }, + "periodicity": "quarterly", + } + for modification in modifications: + update_dict(schedule, modification.key_path, modification.new_value) + url = "/schedules/" + response = client.post( + url, json=schedule, headers={"Authorization": access_token} + ) + response_data = response.get_json() + if "_id" in response_data: + garbage_collector.add_schedule_id(response_data["_id"]) + if succeeds: + assert response.status_code == 201 + else: + assert response.status_code == 400 + assert "error_description" in response_data + assert "zim-file" in response_data["error_description"]