Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 452: Added the assertion for consistency check and evaluation frequency check #788

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions algorithmic_efficiency/random_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,30 +18,30 @@

# Annoyingly, RandomState(seed) requires seed to be in [0, 2 ** 32 - 1] (an
# unsigned int), while RandomState.randint only accepts and returns signed ints.
MAX_INT32 = 2**31
MIN_INT32 = -MAX_INT32
MAX_UINT32 = 2**32-1
MIN_UINT32 = 0

SeedType = Union[int, list, np.ndarray]


def _signed_to_unsigned(seed: SeedType) -> SeedType:
if isinstance(seed, int):
return seed % 2**32
return seed % MAX_UINT32
if isinstance(seed, list):
return [s % 2**32 for s in seed]
return [s % MAX_UINT32 for s in seed]
if isinstance(seed, np.ndarray):
return np.array([s % 2**32 for s in seed.tolist()])
return np.array([s % MAX_UINT32 for s in seed.tolist()])


def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]:
rng = np.random.RandomState(seed=_signed_to_unsigned(seed))
new_seed = rng.randint(MIN_INT32, MAX_INT32, dtype=np.int32)
new_seed = rng.randint(MIN_UINT32, MAX_UINT32, dtype=np.uint32)
return [new_seed, data]


def _split(seed: SeedType, num: int = 2) -> SeedType:
rng = np.random.RandomState(seed=_signed_to_unsigned(seed))
return rng.randint(MIN_INT32, MAX_INT32, dtype=np.int32, size=[num, 2])
return rng.randint(MIN_UINT32, MAX_UINT32, dtype=np.uint32, size=[num, 2])


def _PRNGKey(seed: SeedType) -> SeedType: # pylint: disable=invalid-name
Expand Down Expand Up @@ -75,5 +75,5 @@ def split(seed: SeedType, num: int = 2) -> SeedType:
def PRNGKey(seed: SeedType) -> SeedType: # pylint: disable=invalid-name
if FLAGS.framework == 'jax':
_check_jax_install()
return jax_rng.PRNGKey(seed)
return jax_rng.key(seed)
return _PRNGKey(seed)
25 changes: 20 additions & 5 deletions submission_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
--num_tuning_trials=3 \
--experiment_dir=/home/znado/experiment_dir \
--experiment_name=baseline
--skip_eval = True/False
"""

import datetime
Expand Down Expand Up @@ -88,6 +89,10 @@
flags.DEFINE_string('librispeech_tokenizer_vocab_path',
'',
'Location to librispeech tokenizer.')
flags.DEFINE_boolean(
'skip_eval',
True,
help='True to skip eval on the datasets and false otherwise')

flags.DEFINE_enum(
'framework',
Expand Down Expand Up @@ -327,7 +332,10 @@ def train_once(
train_state['last_step_end_time'] = global_start_time

logging.info('Starting training loop.')
goals_reached = (
if FLAGS.skip_eval == True:
goals_reached = (train_state['validation_goal_reached'])
else:
goals_reached = (
train_state['validation_goal_reached'] and
train_state['test_goal_reached'])
while train_state['is_time_remaining'] and \
Expand Down Expand Up @@ -402,9 +410,12 @@ def train_once(
train_state['test_goal_reached'] = (
workload.has_reached_test_target(latest_eval_result) or
train_state['test_goal_reached'])
goals_reached = (
train_state['validation_goal_reached'] and
train_state['test_goal_reached'])
if FLAGS.skip_eval == True:
goals_reached = (train_state['validation_goal_reached'])
else:
goals_reached = (
train_state['validation_goal_reached'] and
train_state['test_goal_reached'])
# Save last eval time.
eval_end_time = get_time()
train_state['last_eval_time'] = eval_end_time
Expand Down Expand Up @@ -487,7 +498,11 @@ def train_once(
preemption_count=preemption_count,
checkpoint_dir=log_dir,
save_intermediate_checkpoints=FLAGS.save_intermediate_checkpoints)

assert(abs(metrics['eval_results'][-1][1]['total_duration'] -
(train_state['accumulated_submission_time'] +
train_state['accumulated_logging_time'] +
train_state['accumulated_eval_time']) <= 10))
assert(int(train_state['accumulated_submission_time'] // workload.eval_period_time_sec) <= len(metrics['eval_results']) + 2)
return train_state['accumulated_submission_time'], metrics


Expand Down