diff --git a/learning_resources/etl/constants.py b/learning_resources/etl/constants.py index f3dffd1c8d..9759842f20 100644 --- a/learning_resources/etl/constants.py +++ b/learning_resources/etl/constants.py @@ -26,8 +26,8 @@ CourseLoaderConfig = namedtuple( # noqa: PYI024 "CourseLoaderConfig", - ["prune", "offered_by", "runs"], - defaults=[True, OfferedByLoaderConfig(), LearningResourceRunLoaderConfig()], + ["prune", "offered_by", "runs", "fetch_only"], + defaults=[True, OfferedByLoaderConfig(), LearningResourceRunLoaderConfig(), False], ) ProgramLoaderConfig = namedtuple( # noqa: PYI024 diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 52ba23ddf5..0da8afd06f 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -259,7 +259,7 @@ def load_run( return learning_resource_run -def load_course( +def load_course( # noqa: C901 resource_data: dict, blocklist: list[str], duplicates: list[dict], @@ -335,6 +335,20 @@ def load_course( unique_field_value, ) resource_id = deduplicated_course_id or readable_id + + if config.fetch_only: + # Do not upsert the course, it should already exist. + # Just find it and return it. + resource = LearningResource.objects.filter( + readable_id=resource_id, + platform=platform, + resource_type=LearningResourceType.course.name, + published=True, + ).first() + if not resource: + log.warning("No published resource found for %s", resource_id) + return resource + if unique_field_name != READABLE_ID_FIELD: # Some dupes may result, so we need to unpublish resources # with matching unique values and different readable_ids @@ -367,6 +381,10 @@ def load_course( if config.prune: # mark runs no longer included here as unpublished + # This generally should not be done when loading courses + # from a program (config.prune=False). + # The course ETL should be the ultimate source of truth for + # courses and their runs. for run in learning_resource.runs.exclude( run_id__in=run_ids_to_update_or_create ).filter(published=True): diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index e908b9f0af..79554c01ea 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -598,6 +598,36 @@ def test_load_course_dupe_urls(unique_url): assert course.published is (unique_url is False) +@pytest.mark.parametrize("course_exists", [True, False]) +def test_load_course_fetch_only(mocker, course_exists): + """When fetch_only is True, course should just be fetched from db""" + mock_next_runs_prices = mocker.patch( + "learning_resources.etl.loaders.load_next_start_date_and_prices" + ) + mock_warn = mocker.patch("learning_resources.etl.loaders.log.warning") + platform = LearningResourcePlatformFactory.create(code=PlatformType.mitpe.name) + if course_exists: + resource = LearningResourceFactory.create(is_course=True, platform=platform) + else: + resource = LearningResourceFactory.build(is_course=True, platform=platform) + + props = { + "readable_id": resource.readable_id, + "platform": platform.code, + "offered_by": {"code": OfferedBy.ocw.name}, + } + result = load_course(props, [], [], config=CourseLoaderConfig(fetch_only=True)) + if course_exists: + assert result == resource + mock_warn.assert_not_called() + else: + assert result is None + mock_warn.assert_called_once_with( + "No published resource found for %s", resource.readable_id + ) + mock_next_runs_prices.assert_not_called() + + @pytest.mark.parametrize("run_exists", [True, False]) @pytest.mark.parametrize( "availability", [RunAvailability.archived.value, RunAvailability.current.value] diff --git a/learning_resources/etl/pipelines.py b/learning_resources/etl/pipelines.py index 7ccf48d062..c694f8a9ba 100644 --- a/learning_resources/etl/pipelines.py +++ b/learning_resources/etl/pipelines.py @@ -35,7 +35,9 @@ micromasters_etl = compose( load_programs( ETLSource.micromasters.name, - config=ProgramLoaderConfig(prune=True, courses=CourseLoaderConfig()), + config=ProgramLoaderConfig( + prune=True, courses=CourseLoaderConfig(fetch_only=True) + ), ), micromasters.transform, micromasters.extract, @@ -53,7 +55,9 @@ mitxonline_programs_etl = compose( load_programs( ETLSource.mitxonline.name, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True), + config=ProgramLoaderConfig( + courses=CourseLoaderConfig(fetch_only=True), prune=True + ), ), mitxonline.transform_programs, mitxonline.extract_programs, @@ -74,7 +78,7 @@ prolearn_programs_etl = compose( load_programs( ETLSource.prolearn.name, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig(courses=CourseLoaderConfig(fetch_only=True)), ), prolearn.transform_programs, prolearn.extract_programs, @@ -91,7 +95,9 @@ xpro_programs_etl = compose( load_programs( ETLSource.xpro.name, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig( + courses=CourseLoaderConfig(fetch_only=True), prune=True + ), ), xpro.transform_programs, xpro.extract_programs, diff --git a/learning_resources/etl/pipelines_test.py b/learning_resources/etl/pipelines_test.py index af94bda19d..6872e5feb9 100644 --- a/learning_resources/etl/pipelines_test.py +++ b/learning_resources/etl/pipelines_test.py @@ -79,7 +79,9 @@ def test_mitxonline_programs_etl(): mock_load_programs.assert_called_once_with( ETLSource.mitxonline.name, mock_transform.return_value, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True), prune=True), + config=ProgramLoaderConfig( + courses=CourseLoaderConfig(fetch_only=True), prune=True + ), ) assert result == mock_load_programs.return_value @@ -146,7 +148,9 @@ def test_xpro_programs_etl(): mock_load_programs.assert_called_once_with( ETLSource.xpro.name, mock_transform.return_value, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig( + courses=CourseLoaderConfig(fetch_only=True), prune=True + ), ) assert result == mock_load_programs.return_value @@ -296,7 +300,9 @@ def test_micromasters_etl(): mock_load_programs.assert_called_once_with( ETLSource.micromasters.name, mock_transform.return_value, - config=ProgramLoaderConfig(prune=True, courses=CourseLoaderConfig()), + config=ProgramLoaderConfig( + prune=True, courses=CourseLoaderConfig(fetch_only=True) + ), ) assert result == mock_load_programs.return_value @@ -317,7 +323,7 @@ def test_prolearn_programs_etl(): mock_load_programs.assert_called_once_with( ETLSource.prolearn.name, mock_transform.return_value, - config=ProgramLoaderConfig(courses=CourseLoaderConfig(prune=True)), + config=ProgramLoaderConfig(courses=CourseLoaderConfig(fetch_only=True)), ) assert result == mock_load_programs.return_value