Skip to content

Commit

Permalink
add results + configs
Browse files Browse the repository at this point in the history
  • Loading branch information
markus583 committed Jun 17, 2024
1 parent 08b34c0 commit d7af259
Show file tree
Hide file tree
Showing 323 changed files with 117,262 additions and 1,482 deletions.
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_12l.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_12l",
"lim_lookahead": false,
"block_size": 256,
"no_sm_corruption": false,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 12
}
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_12l_ll.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_12l",
"lim_lookahead": true,
"block_size": 256,
"no_sm_corruption": false,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 12
}
23 changes: 23 additions & 0 deletions configs/SM/sat_sm_12l_no-pretraining.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"output_dir": "sat_sm_12l",
"lim_lookahead": true,
"block_size": 256,
"no_sm_corruption": false,
"without_pretraining" : true,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 12
}
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_12l_only_clean.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_12l",
"lim_lookahead": true,
"block_size": 256,
"no_sm_corruption": true,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 12
}
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_1l.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_1l",
"lim_lookahead": false,
"block_size": 256,
"no_sm_corruption": false,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 1
}
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_3l.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_3l",
"lim_lookahead": false,
"block_size": 256,
"no_sm_corruption": false,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 3
}
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_6l.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_6l",
"lim_lookahead": false,
"block_size": 256,
"no_sm_corruption": false,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 6
}
22 changes: 22 additions & 0 deletions configs/SM/sat_sm_9l.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"output_dir": "sat_sm_9",
"lim_lookahead": false,
"block_size": 256,
"no_sm_corruption": false,
"overwrite_output_dir": true,
"evaluation_strategy": "steps",
"eval_steps": 250,
"report_to": "wandb",
"learning_rate": 0.00003,
"warmup_steps": 500,
"per_device_train_batch_size": 128,
"per_device_eval_batch_size": 128,
"weight_decay": 0.01,
"push_to_hub": false,
"save_total_limit": 1,
"save_strategy": "steps",
"save_steps": 1000,
"load_best_model_at_end": false,
"max_steps": 20000,
"num_layers": 9
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
40 changes: 0 additions & 40 deletions configs/canine_stratify_0.1_3layers_lookahead_128.json

This file was deleted.

6 changes: 3 additions & 3 deletions configs/peft/lora.json → configs/lora/lora_12l.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"model_name_or_path": "xlmr-3l-v3_look48_lc0.1-mix2",
"output_dir": "xlmr-3l-v4_LL_lora-v2_ep30_s10k",
"model_name_or_path": "segment-any-text/sat-12l",
"output_dir": "sat-12l-LL_lora",
"block_size": 256,
"eval_stride": 128,
"do_train": true,
Expand All @@ -16,7 +16,7 @@
"num_train_epochs": 30,
"logging_steps": 50,
"report_to": "wandb",
"wandb_project": "sentence-peft-v2",
"wandb_project": "sentence",
"save_steps": 100000000,
"remove_unused_columns": false,
"one_sample_per_line": false,
Expand Down
13 changes: 7 additions & 6 deletions configs/peft/adapter.json → configs/lora/lora_3l.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"model_name_or_path": "xlmr-normal-p-v3",
"output_dir": "xlmr-3l-v3_adapter_rf32_ep20_v2_100-1k-10k",
"model_name_or_path": "segment-any-text/sat-3l",
"output_dir": "sat-3l-LL_lora",
"block_size": 256,
"eval_stride": 128,
"do_train": true,
Expand All @@ -13,9 +13,10 @@
"preprocessing_num_workers": 1,
"learning_rate": 3e-4,
"fp16": false,
"num_train_epochs": 20,
"num_train_epochs": 30,
"logging_steps": 50,
"report_to": "wandb",
"wandb_project": "sentence",
"save_steps": 100000000,
"remove_unused_columns": false,
"one_sample_per_line": false,
Expand All @@ -29,9 +30,9 @@
"use_subwords": true,
"custom_punctuation_file": "punctuation_xlmr_unk.txt",
"log_level": "warning",
"adapter_config": "seq_bn[reduction_factor=32]",
"adapter_config": "lora[r=16,alpha=32,intermediate_lora=True]",
"weight_decay": 0.0,
"auxiliary_remove_prob": 0.0,
"do_process": false,
"n_train_steps": [100, 1000, 10000]
"train_adapter": true,
"subsample": 10000
}
42 changes: 42 additions & 0 deletions configs/lora/lora_lyrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"model_name_or_path": "segment-any-text/sat-12l",
"output_dir": "sat-12l-no-LL_lora_lyrics",
"block_size": 512,
"eval_stride": 256,
"do_train": true,
"do_eval": true,
"per_device_train_batch_size": 64,
"per_device_eval_batch_size": 32,
"gradient_accumulation_steps": 1,
"eval_accumulation_steps": 8,
"evaluation_strategy": "epoch",
"dataloader_num_workers": 1,
"preprocessing_num_workers": 1,
"learning_rate": 3e-4,
"fp16": false,
"num_train_epochs": 30,
"logging_steps": 50,
"report_to": "wandb",
"wandb_project": "sentence",
"save_steps": 100000000,
"remove_unused_columns": false,
"one_sample_per_line": true,
"do_sentence_training": true,
"do_auxiliary_training": false,
"warmup_ratio": 0.1,
"non_punctuation_sample_ratio": null,
"prediction_loss_only": true,
"use_auxiliary": true,
"ddp_timeout": 3600,
"use_subwords": true,
"custom_punctuation_file": "punctuation_xlmr_unk.txt",
"log_level": "warning",
"adapter_config": "lora[r=16,alpha=32,intermediate_lora=True]",
"weight_decay": 0.0,
"auxiliary_remove_prob": 0.0,
"text_path": "data/lyrics.pth",
"skip_eval_loss": false,
"shuffle": false,
"train_adapter": true,
"subsample": 10000
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
{
"model_name_or_path": "xlm-roberta-base",
"output_dir": "xlm-roberta-base_lora-v2_ep30_mldbW-verses_bs512",
"output_dir": "xlmr-12l_lora_lyrics",
"block_size": 512,
"eval_stride": 256,
"do_train": true,
"do_eval": true,
"per_device_train_batch_size": 32,
"per_device_train_batch_size": 64,
"per_device_eval_batch_size": 32,
"gradient_accumulation_steps": 2,
"gradient_accumulation_steps": 1,
"eval_accumulation_steps": 8,
"evaluation_strategy": "epoch",
"dataloader_num_workers": 1,
Expand All @@ -16,7 +17,7 @@
"num_train_epochs": 30,
"logging_steps": 50,
"report_to": "wandb",
"wandb_project": "lyrics-peft",
"wandb_project": "sentence",
"save_steps": 100000000,
"remove_unused_columns": false,
"one_sample_per_line": true,
Expand All @@ -25,17 +26,17 @@
"warmup_ratio": 0.1,
"non_punctuation_sample_ratio": null,
"prediction_loss_only": true,
"use_auxiliary": false,
"use_auxiliary": true,
"ddp_timeout": 3600,
"use_subwords": true,
"custom_punctuation_file": "punctuation_xlmr_unk.txt",
"log_level": "warning",
"adapter_config": "lora[r=16,alpha=32,intermediate_lora=True]",
"weight_decay": 0.0,
"auxiliary_remove_prob": 0.0,
"text_path": "data/all_data_11_05-lyrics.pth",
"text_path": "data/lyrics.pth",
"skip_eval_loss": false,
"shuffle": false,
"train_adapter": true,
"subsample": null
"subsample": 10000
}
Loading

0 comments on commit d7af259

Please sign in to comment.