Skip to content

Commit

Permalink
Fix tensorboard logging dir
Browse files Browse the repository at this point in the history
  • Loading branch information
chiragjn committed Dec 27, 2023
1 parent cde4bc6 commit ab685e5
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
unset_hf_deepspeed_config,
)
from transformers.integrations.integration_utils import TensorBoardCallback
from transformers.training_args import ParallelMode
from transformers.training_args import ParallelMode, default_logdir
from transformers.utils import is_torch_tf32_available
from transformers.utils import logging as hf_logging_utils

Expand Down Expand Up @@ -100,6 +100,9 @@ def __post_init__(self):
raise ValueError(
f"Auto batch size finder is not supported with Deepseed because of bugs with model preparation: https://github.com/huggingface/transformers/issues/24558"
)
if self.logging_dir is not None:
self.logging_dir = os.path.join(self.logging_dir, default_logdir())
os.makedirs(self.logging_dir, exist_ok=True)
super().__post_init__()


Expand Down Expand Up @@ -409,7 +412,6 @@ def setup(training_arguments: HFTrainingArguments, other_arguments: OtherArgumen
_setup_logging(training_arguments=training_arguments)
_maybe_set_custom_tempdir()
_maybe_set_torch_max_memory(device=training_arguments.local_rank)

if other_arguments.use_flash_attention:
import flash_attn as _

Expand Down

0 comments on commit ab685e5

Please sign in to comment.