Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
tommydangerous committed May 14, 2024
1 parent 0279580 commit 3377b7a
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 50 deletions.
2 changes: 1 addition & 1 deletion mlops/unit_2_training/custom/dashboard_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@

@custom
def source(
model: Booster,
settings: Tuple[
Dict[str, Union[bool, float, int, str]],
csr_matrix,
Series,
],
model: Booster,
**kwargs,
) -> Tuple[Booster, csr_matrix, csr_matrix]:
X_train, y_train, _ = settings
Expand Down
6 changes: 4 additions & 2 deletions mlops/unit_2_training/data_exporters/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

@data_exporter
def train(
training_set: Dict[str, Union[Series, csr_matrix]],
settings: Tuple[
Dict[str, Union[bool, float, int, str]],
csr_matrix,
Expand All @@ -31,5 +32,6 @@ def train(
hyperparameters,
verbose_eval=kwargs.get('verbose_eval', 100),
)

return model

# DictVectorizer for online inference.
return model, training_set['build'][6]
10 changes: 4 additions & 6 deletions mlops/unit_4_triggering/pipelines/xgboost_training/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ blocks:
language: python
name: XGBoost
retry_config: null
status: executed
status: updated
timeout: null
type: data_exporter
upstream_blocks:
- training_set
- hyperparameter_tuning/xgboost
uuid: xgboost
- all_upstream_blocks_executed: true
- all_upstream_blocks_executed: false
color: pink
configuration:
file_path: custom/dashboard_data_source.py
Expand All @@ -72,7 +72,7 @@ blocks:
language: python
name: Dashboard data source
retry_config: null
status: failed
status: executed
timeout: null
type: custom
upstream_blocks:
Expand Down Expand Up @@ -106,7 +106,5 @@ variables:
early_stopping_rounds: 1
max_depth: 1
max_evaluations: 1
verbose_eval: 10000
verbosity: 0
variables_dir: /home/src/mage_data/unit_4_triggering
variables_dir: /root/.mage_data/unit_4_triggering
widgets: []
83 changes: 42 additions & 41 deletions mlops/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,47 +98,48 @@ def track_experiment(

dataset_inputs = []

for dataset_name, dataset, tags in [
('dataset', training_set, dict(context='training')),
(
'targets',
training_targets.to_numpy() if training_targets is not None else None,
dict(context='training'),
),
('dataset', validation_set, dict(context='validation')),
(
'targets',
validation_targets.to_numpy() if validation_targets is not None else None,
dict(context='validation'),
),
('predictions', predictions, dict(context='training')),
]:
if dataset is None:
continue

dataset_from = None
if isinstance(dataset, pd.DataFrame):
dataset_from = from_pandas
elif isinstance(dataset, np.ndarray):
dataset_from = from_numpy

if dataset_from:
ds = dataset_from(dataset, name=dataset_name)._to_mlflow_entity()
ds_input = DatasetInput(ds, tags=[InputTag(k, v) for k, v in tags.items()])
dataset_inputs.append(ds_input)

if verbosity:
context = tags['context']
if dataset_from:
print(f'Logged input for {context} {dataset_name}.')
else:
print(
f'Unable to log input for {context} {dataset_name}, '
f'{type(dataset)} not registered.'
)

if len(dataset_inputs) >= 1:
client.log_inputs(run_id, dataset_inputs)
# This increases memory too much.
# for dataset_name, dataset, tags in [
# ('dataset', training_set, dict(context='training')),
# (
# 'targets',
# training_targets.to_numpy() if training_targets is not None else None,
# dict(context='training'),
# ),
# ('dataset', validation_set, dict(context='validation')),
# (
# 'targets',
# validation_targets.to_numpy() if validation_targets is not None else None,
# dict(context='validation'),
# ),
# ('predictions', predictions, dict(context='training')),
# ]:
# if dataset is None:
# continue

# dataset_from = None
# if isinstance(dataset, pd.DataFrame):
# dataset_from = from_pandas
# elif isinstance(dataset, np.ndarray):
# dataset_from = from_numpy

# if dataset_from:
# ds = dataset_from(dataset, name=dataset_name)._to_mlflow_entity()
# ds_input = DatasetInput(ds, tags=[InputTag(k, v) for k, v in tags.items()])
# dataset_inputs.append(ds_input)

# if verbosity:
# context = tags['context']
# if dataset_from:
# print(f'Logged input for {context} {dataset_name}.')
# else:
# print(
# f'Unable to log input for {context} {dataset_name}, '
# f'{type(dataset)} not registered.'
# )

# if len(dataset_inputs) >= 1:
# client.log_inputs(run_id, dataset_inputs)

if model:
log_model = None
Expand Down

0 comments on commit 3377b7a

Please sign in to comment.