Skip to content

Commit

Permalink
remove conformer oom fixes from this branch
Browse files Browse the repository at this point in the history
  • Loading branch information
priyakasimbeg committed Oct 11, 2023
1 parent 24edc3b commit 09ceeec
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ def train_stddev(self):

@property
def max_allowed_runtime_sec(self) -> int:
return 122136 # ~34h extended max_allowed_run_time for conformer OOM issue

return 61_068 # ~17 hours

@property
def eval_period_time_sec(self) -> int:
Expand Down
8 changes: 4 additions & 4 deletions submission_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@

# disable only for deepspeech if it works fine for other workloads.
os.environ['XLA_FLAGS'] = '--xla_gpu_enable_triton_gemm=false'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:256'

# TODO(znado): make a nicer registry of workloads that lookup in.
BASE_WORKLOADS_DIR = workloads.BASE_WORKLOADS_DIR
Expand Down Expand Up @@ -216,8 +215,10 @@ def train_once(
model_params, model_state = workload.init_model_fn(
model_init_rng, dropout_rate, aux_dropout_rate)
if FLAGS.framework == 'pytorch' and FLAGS.torch_compile:
compile_error_workloads = ['ogbg', 'criteo1tb', 'librispeech_conformer']
eager_backend_workloads = ['librispeech_deepspeech']
compile_error_workloads = ['ogbg', 'criteo1tb']
eager_backend_workloads = [
'librispeech_conformer', 'librispeech_deepspeech'
]
aot_eager_backend_workloads = []
if FLAGS.workload in compile_error_workloads:
logging.warning(
Expand Down Expand Up @@ -601,7 +602,6 @@ def main(_):
# Prevent OOM on librispeech conformer.
if FLAGS.workload == 'librispeech_conformer':
os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '0.85'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:256'

# Extend path according to framework.
workload_metadata['workload_path'] = os.path.join(
Expand Down

0 comments on commit 09ceeec

Please sign in to comment.