From 6f48b31729d10a9a30aee2c24b2feff92d61e227 Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Mon, 29 Jul 2024 16:38:19 -0400 Subject: [PATCH 1/2] Assign mitxonline certificate type from api values --- learning_resources/etl/mitxonline.py | 30 +++++++++++++++++---- learning_resources/etl/mitxonline_test.py | 32 ++++++++++++++--------- test_json/mitxonline_courses.json | 12 +++++++++ test_json/mitxonline_programs.json | 27 ++++++++++++------- 4 files changed, 75 insertions(+), 26 deletions(-) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index 5edbcc3a8e..73c6fb5de8 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -59,6 +59,24 @@ def _parse_datetime(value): return parse(value).replace(tzinfo=UTC) if value else None +def parse_certificate_type(certification_type: str) -> str: + """ + Parse the certification type + + Args: + certification_type(str): the certification type + + Returns: + str: the parsed certification type + """ + cert_map = { + "micromasters credential": CertificationType.micromasters.name, + "certificate of completion": CertificationType.completion.name, + } + + return cert_map.get(certification_type.lower(), CertificationType.none.name) + + def parse_page_attribute( mitx_json, attribute, @@ -251,7 +269,9 @@ def _transform_course(course): ), # a course is only published if it has a live url and published runs "professional": False, "certification": has_certification, - "certification_type": CertificationType.completion.name + "certification_type": parse_certificate_type( + course.get("certificate_type", CertificationType.none.name) + ) if has_certification else CertificationType.none.name, "image": _transform_image(course), @@ -309,10 +329,10 @@ def transform_programs(programs): "departments": parse_departments(program.get("departments", [])), "platform": PlatformType.mitxonline.name, "professional": False, - "certification": bool(parse_page_attribute(program, "page_url")), - "certification_type": CertificationType.completion.name - if bool(parse_page_attribute(program, "page_url")) - else CertificationType.none.name, + "certification": program.get("certificate_type") is not None, + "certification_type": parse_certificate_type( + program.get("certificate_type", CertificationType.none.name) + ), "topics": transform_topics(program.get("topics", []), OFFERED_BY["code"]), "description": clean_data(parse_page_attribute(program, "description")), "url": parse_page_attribute(program, "page_url", is_url=True), diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index 900aa57f22..3845969803 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -23,6 +23,7 @@ _transform_run, extract_courses, extract_programs, + parse_certificate_type, parse_page_attribute, parse_program_prices, transform_courses, @@ -136,9 +137,9 @@ def test_mitxonline_transform_programs( "certification": bool( program_data.get("page", {}).get("page_url", None) is not None ), - "certification_type": CertificationType.completion.name - if program_data.get("page", {}).get("page_url", None) is not None - else CertificationType.none.name, + "certification_type": parse_certificate_type( + program_data["certificate_type"] + ), "image": _transform_image(program_data), "description": clean_data( program_data.get("page", {}).get("description", None) @@ -318,15 +319,9 @@ def test_mitxonline_transform_courses(settings, mock_mitxonline_courses_data): for course_run in course_data["courseruns"] ], ), - "certification_type": CertificationType.completion.name - if parse_certification( - OFFERED_BY["code"], - [ - _transform_run(course_run, course_data) - for course_run in course_data["courseruns"] - ], - ) - else CertificationType.none.name, + "certification_type": parse_certificate_type( + course_data["certificate_type"] + ), "topics": transform_topics(course_data["topics"], OFFERED_BY["code"]), "url": ( urljoin( @@ -488,3 +483,16 @@ def test_parse_prices(current_price, page_price, expected): assert parse_program_prices(program_data) == sorted( [float(price) for price in expected] ) + + +@pytest.mark.parametrize( + ("cert_type", "expected_cert_type"), + [ + ("Certificate of Completion", CertificationType.completion.name), + ("MicroMasters Credential", CertificationType.micromasters.name), + ("Pro Cert", CertificationType.none.name), + ], +) +def test_parse_certificate_type(cert_type, expected_cert_type): + """Test that the certificate type is correctly parsed""" + assert parse_certificate_type(cert_type) == expected_cert_type diff --git a/test_json/mitxonline_courses.json b/test_json/mitxonline_courses.json index 51ed56c859..3764758fdb 100644 --- a/test_json/mitxonline_courses.json +++ b/test_json/mitxonline_courses.json @@ -9,6 +9,7 @@ "readable_id": "course-v1:MITxT+14.100PEx", "next_run_id": null, "departments": [], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/14.100x_xGKvQiK.jpg?v=8fe621cc0d80ebe4e57fd1b240e9f95d8c2b31db", "page_url": "/courses/course-v1:MITxT+14.100PEx/", @@ -75,6 +76,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/4-drkpurple.jpg?v=b6024e4bdf229b618e36f2e942f2ae6f173dd9d3", "page_url": "/courses/course-v1:MITxT+10.50.CH04x/", @@ -128,6 +130,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/7-drkBlue_5uVzT1f.jpg?v=0f9790b2ce8c75e91f5fbf01c5373d36fb75a81e", "page_url": "/courses/course-v1:MITxT+10.50.CH07x/", @@ -181,6 +184,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/9-red.jpg?v=71fc9e04b14d210d7f5062dee34dc4e32db6967a", "page_url": "/courses/course-v1:MITxT+10.50.CH09x/", @@ -234,6 +238,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/6-ltBlue.jpg?v=af26cc60badb0a7b289813f9ccbf4c8f77ad8490", "page_url": "/courses/course-v1:MITxT+10.50.CH06x/", @@ -287,6 +292,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/drkGreen_375.png?v=67357416dbff6268f5463acc4ea8847b8a08428a", "page_url": "/courses/course-v1:MITxT+10.50.CH02x/", @@ -340,6 +346,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/1-ltGreen.jpg?v=f40e3581924ca77414ae3977c8cdb1a94765e46e", "page_url": "/courses/course-v1:MITxT+10.50.CH01x/", @@ -393,6 +400,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/8-orange.jpg?v=0be48100571b661b7c8a5b8de061f25232d91301", "page_url": "/courses/course-v1:MITxT+10.50.CH08x/", @@ -446,6 +454,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/3-purple.jpg?v=82c25a37e88b44529a7f4439348f07f099308d1a", "page_url": "/courses/course-v1:MITxT+10.50.CH03x/", @@ -499,6 +508,7 @@ "name": "Chemical Engineering" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/5-brown_2.2.jpg?v=ff1b83927266cef6dcfe8c0c30c0adf18205847b", "page_url": "/courses/course-v1:MITxT+10.50.CH05x/", @@ -552,6 +562,7 @@ "name": "Economics" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/14.01x_Course_Image_MITxO.png?v=f6aacc1df41d983b5cda78201a7fc86751229a62", "page_url": "/courses/course-v1:MITxT+14.01x/", @@ -634,6 +645,7 @@ "name": "Management" } ], + "certificate_type": "Certificate of Completion", "page": { "feature_image_src": "/media/original_images/15-356-2x_Image.jpg?v=3a1d8836abb252670315f0cf70875d8a2feda19a", "page_url": "/courses/course-v1:MITxT+15.356.2x/", diff --git a/test_json/mitxonline_programs.json b/test_json/mitxonline_programs.json index bd38a94270..11b45d4b5c 100644 --- a/test_json/mitxonline_programs.json +++ b/test_json/mitxonline_programs.json @@ -116,7 +116,8 @@ "departments": ["Mathematics"], "live": true, "topics": [], - "availability": "scheduled" + "availability": "scheduled", + "certificate_type": "MicroMasters Credential" }, { "title": "Analysis of Transport Phenomena", @@ -279,7 +280,8 @@ "departments": ["Chemical Engineering"], "live": true, "topics": [], - "availability": "anytime" + "availability": "anytime", + "certificate_type": "Certificate of Completion" }, { "title": "Data, Economics, and Design of Policy", @@ -442,7 +444,8 @@ "name": "Social Sciences" } ], - "availability": null + "availability": null, + "certificate_type": "Certificate of Completion" }, { "title": "Data, Economics, and Design of Policy: International Development", @@ -593,7 +596,8 @@ "name": "Social Sciences" } ], - "availability": "scheduled" + "availability": "scheduled", + "certificate_type": "Certificate of Completion" }, { "title": "Data, Economics, and Design of Policy: Public Policy", @@ -722,7 +726,8 @@ "departments": ["Economics"], "live": true, "topics": [], - "availability": null + "availability": null, + "certificate_type": "Certificate of Completion" }, { "title": "Differential Calculus", @@ -813,7 +818,8 @@ "departments": ["Mathematics"], "live": true, "topics": [], - "availability": "scheduled" + "availability": "scheduled", + "certificate_type": "Certificate of Completion" }, { "title": "Introductory Electricity and Magnetics", @@ -904,7 +910,8 @@ "departments": ["Materials Science and Engineering"], "live": true, "topics": [], - "availability": "anytime" + "availability": "anytime", + "certificate_type": "Certificate of Completion" }, { "title": "xMinor in Materials for Electronic, Optical, and Magnetic Devices", @@ -995,7 +1002,8 @@ "departments": ["Materials Science and Engineering"], "live": true, "topics": [], - "availability": "scheduled" + "availability": "scheduled", + "certificate_type": "Certificate of Completion" }, { "title": "xSeries in Introduction to Mechanics", @@ -1098,7 +1106,8 @@ "departments": [], "live": true, "topics": [], - "availability": "anytime" + "availability": "anytime", + "certificate_type": "MicroMasters Credential" } ] } From 33acc09818891bdf58f2409c399ebaa7cbf4a1af Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Mon, 29 Jul 2024 16:57:17 -0400 Subject: [PATCH 2/2] tweak cert type assignment --- learning_resources/etl/mitxonline.py | 6 +++++- learning_resources/etl/mitxonline_test.py | 12 +++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/learning_resources/etl/mitxonline.py b/learning_resources/etl/mitxonline.py index 73c6fb5de8..65ce28c65a 100644 --- a/learning_resources/etl/mitxonline.py +++ b/learning_resources/etl/mitxonline.py @@ -74,7 +74,11 @@ def parse_certificate_type(certification_type: str) -> str: "certificate of completion": CertificationType.completion.name, } - return cert_map.get(certification_type.lower(), CertificationType.none.name) + certification_code = cert_map.get(certification_type.lower()) + if not certification_code: + log.error("Unknown MITx Online certification type: %s", certification_type) + return CertificationType.completion.name + return certification_code def parse_page_attribute( diff --git a/learning_resources/etl/mitxonline_test.py b/learning_resources/etl/mitxonline_test.py index 3845969803..fab3246f45 100644 --- a/learning_resources/etl/mitxonline_test.py +++ b/learning_resources/etl/mitxonline_test.py @@ -486,13 +486,15 @@ def test_parse_prices(current_price, page_price, expected): @pytest.mark.parametrize( - ("cert_type", "expected_cert_type"), + ("cert_type", "expected_cert_type", "error"), [ - ("Certificate of Completion", CertificationType.completion.name), - ("MicroMasters Credential", CertificationType.micromasters.name), - ("Pro Cert", CertificationType.none.name), + ("Certificate of Completion", CertificationType.completion.name, False), + ("MicroMasters Credential", CertificationType.micromasters.name, False), + ("Pro Cert", CertificationType.completion.name, True), ], ) -def test_parse_certificate_type(cert_type, expected_cert_type): +def test_parse_certificate_type(mocker, cert_type, expected_cert_type, error): """Test that the certificate type is correctly parsed""" + mock_log = mocker.patch("learning_resources.etl.mitxonline.log.error") assert parse_certificate_type(cert_type) == expected_cert_type + assert mock_log.call_count == (1 if error else 0)