Skip to content

Commit

Permalink
feat: add cron task that runs the minimal training pipeline nightly
Browse files Browse the repository at this point in the history
  • Loading branch information
bhearsum committed Dec 18, 2024
1 parent 70307e3 commit 1201a66
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .cron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
jobs:
# Run the minimal pipeline once a day to support integration testing before
# worker image changes are made (see https://bugzilla.mozilla.org/show_bug.cgi?id=1937882).
# These runs will pick up cached tasks, so most of the time this will simply
# end up running `all-pipeline`.
- name: run-pipeline
job:
type: decision-task
# we don't use treeherder...but this is a required field
treeherder-symbol: pipeline
target-tasks-method: train-target-tasks
when:
- {hour: 0, minute: 0}
15 changes: 15 additions & 0 deletions taskcluster/translations_taskgraph/actions/train.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import collections.abc
import json
import logging

Expand Down Expand Up @@ -58,6 +59,15 @@ def validate_pretrained_models(params):
)


def deep_update(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = deep_update(d.get(k, {}), v)
else:
d[k] = v
return d


@register_callback_action(
name="train",
title="Train",
Expand Down Expand Up @@ -397,6 +407,11 @@ def train_action(parameters, graph_config, input, task_group_id, task_id):
# etc.

parameters = dict(parameters)
# Although we provide defaults to everything in the `schema` when
# registering the action in the decorator above, these are not passed
# along when the action runs. To make sure we have the proper defaults
# we use the defaults with the input provided overlaid on top of them.
input = deep_update(defaults, input)

start_stage = input.pop("start-stage", None)
if start_stage:
Expand Down
9 changes: 9 additions & 0 deletions taskcluster/translations_taskgraph/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import logging
from pathlib import Path
from taskgraph.parameters import extend_parameters_schema
from voluptuous import Extra, Optional, Required
import yaml

logger = logging.getLogger(__name__)

# By default, provide a very minimal config for CI that runs very quickly. This allows
# the pipeline to be validated in CI. The production training configs should override
Expand Down Expand Up @@ -103,3 +105,10 @@ def deep_setdefault(dict_, defaults):
def get_decision_parameters(graph_config, parameters):
parameters.setdefault("training_config", {})
deep_setdefault(parameters, get_ci_training_config())
# We run the pipeline on a cron schedule to enable integration testing when
# worker images change (see https://bugzilla.mozilla.org/show_bug.cgi?id=1937882).
# These runs should _never_ be sent to W&B to avoid cluttering it up
# with data of no value.
if parameters["tasks_for"] == "cron" and parameters["target_tasks_method"] == "train-target-tasks":
logger.info("Overriding wandb-publication to be False for cron pipeline run")
parameters["training_config"]["wandb-publication"] = False

0 comments on commit 1201a66

Please sign in to comment.