Skip to content

Commit

Permalink
chore: wandb
Browse files Browse the repository at this point in the history
  • Loading branch information
distributedstatemachine committed Dec 24, 2024
1 parent 4550f19 commit 9fce7d7
Showing 1 changed file with 33 additions and 33 deletions.
66 changes: 33 additions & 33 deletions neurons/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,31 +136,31 @@ def __init__(self):
tplr.logger.error(f"Commitment error: {str(e)}")
tplr.commit(self.subtensor, self.wallet, self.config.netuid)

# # Init Wandb.
# # Ensure the wandb directory exists
# wandb_dir = os.path.join(os.getcwd(), 'wandb')
# os.makedirs(wandb_dir, exist_ok=True)

# # Define the run ID file path inside the wandb directory
# run_id_file = os.path.join(wandb_dir, f"wandb_run_id_M{self.uid}_{tplr.__version__}.txt")

# # Attempt to read the existing run ID
# if os.path.exists(run_id_file):
# with open(run_id_file, 'r') as f:
# run_id = f.read().strip()
# tplr.logger.info(f"Resuming WandB run with id {run_id}")
# else:
# run_id = None
# tplr.logger.info("Starting a new WandB run.")

# # Initialize WandB
# self.wandb = tplr.initialize_wandb(
# run_prefix='M',
# uid=self.uid,
# config=self.config,
# group='miner',
# job_type='training'
# )
# Init Wandb.
# Ensure the wandb directory exists
wandb_dir = os.path.join(os.getcwd(), 'wandb')
os.makedirs(wandb_dir, exist_ok=True)

# Define the run ID file path inside the wandb directory
run_id_file = os.path.join(wandb_dir, f"wandb_run_id_M{self.uid}_{tplr.__version__}.txt")

# Attempt to read the existing run ID
if os.path.exists(run_id_file):
with open(run_id_file, 'r') as f:
run_id = f.read().strip()
tplr.logger.info(f"Resuming WandB run with id {run_id}")
else:
run_id = None
tplr.logger.info("Starting a new WandB run.")

# Initialize WandB
self.wandb = tplr.initialize_wandb(
run_prefix='M',
uid=self.uid,
config=self.config,
group='miner',
job_type='training'
)

# Init model.
tplr.logger.info('\n' + '-' * 40 + ' Hparams ' + '-' * 40)
Expand Down Expand Up @@ -537,14 +537,14 @@ async def run(self):
window_time_delta = self.window_time - end_step
window_delta_str = f"[red]{window_time_delta:.2f}[/red]" if window_time_delta < 0 else f"[green]+{window_time_delta:.2f}[/green]"
tplr.logger.info(f"{tplr.P(window, end_step - start_step)}[{window_delta_str}]: Finished step.")
# wandb.log({
# "miner/loss": step_loss,
# "miner/tokens_per_step": tokens_per_step,
# "miner/tokens_per_second": tokens_per_second,
# "miner/sample_rate": self.sample_rate,
# "miner/utilization": train_duration / (end_step - start_step),
# "miner/learning_rate": self.scheduler.get_last_lr()[0]
# }, step=self.global_step)
wandb.log({
"miner/loss": step_loss,
"miner/tokens_per_step": tokens_per_step,
"miner/tokens_per_second": tokens_per_second,
"miner/sample_rate": self.sample_rate,
"miner/utilization": train_duration / (end_step - start_step),
"miner/learning_rate": self.scheduler.get_last_lr()[0]
}, step=self.global_step)

# Catch keyboard interrrupt.
except KeyboardInterrupt:
Expand Down

0 comments on commit 9fce7d7

Please sign in to comment.