Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

[WIP] Byol Repro (v2) #454

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# @package _global_
# BYOL:Bootstrap Your Own Latent - https://arxiv.org/abs/2006.07733
# Configuration based on BYOL paper - Appendix C -- C.1
config:
VERBOSE: True
LOG_FREQUENCY: 200
TEST_ONLY: False
TEST_EVERY_NUM_EPOCH: 1
TEST_MODEL: True
SEED_VALUE: 1
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 32
TRANSFORMS:
- name: RandomResizedCrop
size: 224
interpolation: 3
- name: RandomHorizontalFlip
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
TEST:
DATA_SOURCES: [disk_folder]
LABEL_SOURCES: [disk_folder]
DATASET_NAMES: [imagenet1k_folder]
BATCHSIZE_PER_REPLICA: 32
TRANSFORMS:
- name: Resize
size: 256
interpolation: 3
- name: CenterCrop
size: 224
- name: ToTensor
- name: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
METERS:
name: accuracy_list_meter
accuracy_list_meter:
num_meters: 5
topk_values: [1, 5]
TRAINER:
TRAIN_STEP_NAME: standard_train_step
MODEL:
FEATURE_EVAL_SETTINGS:
EVAL_MODE_ON: True
FREEZE_TRUNK_ONLY: True
SHOULD_FLATTEN_FEATS: False
LINEAR_EVAL_FEAT_POOL_OPS_MAP: [
["conv1", ["AvgPool2d", [[10, 10], 10, 4]]],
["res2", ["AvgPool2d", [[16, 16], 8, 0]]],
["res3", ["AvgPool2d", [[13, 13], 5, 0]]],
["res4", ["AvgPool2d", [[8, 8], 3, 0]]],
["res5", ["AvgPool2d", [[6, 6], 1, 0]]],
]
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 50
HEAD:
PARAMS: [
["eval_mlp", {"in_channels": 64, "dims": [9216, 1000]}],
["eval_mlp", {"in_channels": 256, "dims": [9216, 1000]}],
["eval_mlp", {"in_channels": 512, "dims": [8192, 1000]}],
["eval_mlp", {"in_channels": 1024, "dims": [9216, 1000]}],
["eval_mlp", {"in_channels": 2048, "dims": [8192, 1000]}],
]
WEIGHTS_INIT:
PARAMS_FILE: "specify the model weights"
STATE_DICT_KEY_NAME: classy_state_dict
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: False
SYNC_BN_TYPE: apex
GROUP_SIZE: 8
LOSS:
name: cross_entropy_multiple_output_single_target
cross_entropy_multiple_output_single_target:
ignore_index: -1
OPTIMIZER:
name: sgd
momentum: 0.9
num_epochs: 80
weight_decay: 0
nesterov: True
regularize_bn: False
regularize_bias: True
param_schedulers:
lr:
auto_lr_scaling:
# if set to True, learning rate will be scaled.
auto_scale: True
# base learning rate value that will be scaled.
base_value: 0.2
# batch size for which the base learning rate is specified. The current batch size
# is used to determine how to scale the base learning rate value.
# scaled_lr = ((batchsize_per_gpu * world_size) * base_value ) / base_lr_batch_size
base_lr_batch_size: 256
# scaling_type can be set to "sqrt" to reduce the impact of scaling on the base value
scaling_type: "linear"
name: constant
update_interval: "epoch"
value: 0.2
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 4
NUM_PROC_PER_NODE: 8
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
DIR: "."
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 1
123 changes: 123 additions & 0 deletions configs/config/pretrain/byol/byol_1gpu_resnet18_imagenette.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# @package _global_
# BYOL: Bootstrap Your Own Latent - https://arxiv.org/pdf/2006.07733.pdf
# Official BYOL implementation - https://github.com/deepmind/deepmind-research/blob/master/byol
config:
VERBOSE: False
LOG_FREQUENCY: 200
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 1337 # BYOL Paper - Page 33 - Appendix J.1
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
ROLLING_BTIME_FREQ: 313
TENSORBOARD_SETUP:
USE_TENSORBOARD: False
EXPERIMENT_LOG_DIR: "byol_reference"
LOG_PARAMS: False
FLUSH_EVERY_N_MIN: 20
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_folder]
DATASET_NAMES: [imagenette_160_folder]
BATCHSIZE_PER_REPLICA: 256
LABEL_TYPE: sample_index # Just an implementation detail. Label isn't used
TRANSFORMS: # BYOL Paper - Page 16 - Appendix B
- name: ImgReplicatePil
num_times: 2
- name: RandomResizedCrop
size: 128
interpolation: 3
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 0.5
saturation: 0.4
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilGaussianBlur
p: 1
radius_min: 0.1
radius_max: 2.0
prob: [ 1.0, 0.1 ]
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilRandomSolarize
p: 1.0
prob: [ 0.0, 0.2 ]
- name: ToTensor
- name: Normalize
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
COLLATE_FUNCTION: simclr_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
DROP_LAST: True
USE_DEBUGGING_SAMPLER: False
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL: # BYOL Paper - Page 5 - 3.3 Implementation details -- Architecture
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 18
ZERO_INIT_RESIDUAL: False
HEAD:
PARAMS: [
["mlp", {"dims": [2048, 4096, 256], "use_relu": True, "use_bn": True}],
["mlp", {"dims": [256, 4096, 256], "use_relu": True, "use_bn": True}]
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: pytorch
AMP_PARAMS:
USE_AMP: False
LOSS: # BYOL Paper - Page 33 - Appendix J.1
name: byol_loss
byol_loss:
embedding_dim: 256
momentum: 0.99
OPTIMIZER:
name: lars
eta: 0.001
weight_decay: 1.0e-6
momentum: 0.9
nesterov: False
num_epochs: 1000
regularize_bn: False
regularize_bias: False
exclude_bias_and_norm: True
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_value: 2.0
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.00002
end_value: 2.0 # Automatically rescaled if needed
- name: cosine
start_value: 2.0 # Automatically rescaled if needed
end_value: 0.000002 # Automatically rescaled if needed
update_interval: step
interval_scaling: [rescaled, rescaled]
lengths: [0.001, 0.999] # 10ep
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 1
NUM_PROC_PER_NODE: 1
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 10
CHECKPOINT_ITER_FREQUENCY: -1 # set this variable to checkpoint every few iterations
123 changes: 123 additions & 0 deletions configs/config/pretrain/byol/byol_4node_resnet.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# @package _global_
# BYOL: Bootstrap Your Own Latent - https://arxiv.org/pdf/2006.07733.pdf
# Official BYOL implementation - https://github.com/deepmind/deepmind-research/blob/master/byol
config:
VERBOSE: False
LOG_FREQUENCY: 200
TEST_ONLY: False
TEST_MODEL: False
SEED_VALUE: 1337 # BYOL Paper - Page 33 - Appendix J.1
MULTI_PROCESSING_METHOD: forkserver
HOOKS:
PERF_STATS:
MONITOR_PERF_STATS: True
ROLLING_BTIME_FREQ: 313
TENSORBOARD_SETUP:
USE_TENSORBOARD: False
EXPERIMENT_LOG_DIR: "byol_reference"
LOG_PARAMS: False
FLUSH_EVERY_N_MIN: 20
DATA:
NUM_DATALOADER_WORKERS: 5
TRAIN:
DATA_SOURCES: [disk_filelist]
DATASET_NAMES: [imagenet1k_filelist]
BATCHSIZE_PER_REPLICA: 128
LABEL_TYPE: sample_index # Just an implementation detail. Label isn't used
TRANSFORMS: # BYOL Paper - Page 16 - Appendix B
- name: ImgReplicatePil
num_times: 2
- name: RandomResizedCrop
size: 224
interpolation: 3
- name: RandomHorizontalFlip
p: 0.5
- name: ImgPilColorDistortion
strength: 0.5
saturation: 0.4
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilGaussianBlur
p: 1
radius_min: 0.1
radius_max: 2.0
prob: [ 1.0, 0.1 ]
- name: ImgPilMultiCropRandomApply
transforms:
- name: ImgPilRandomSolarize
p: 1.0
prob: [ 0.0, 0.2 ]
- name: ToTensor
- name: Normalize
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
COLLATE_FUNCTION: simclr_collator
MMAP_MODE: True
COPY_TO_LOCAL_DISK: False
COPY_DESTINATION_DIR: /tmp/imagenet1k/
DROP_LAST: True
USE_DEBUGGING_SAMPLER: False
TRAINER:
TRAIN_STEP_NAME: standard_train_step
METERS:
name: ""
MODEL: # BYOL Paper - Page 5 - 3.3 Implementation details -- Architecture
TRUNK:
NAME: resnet
RESNETS:
DEPTH: 50
ZERO_INIT_RESIDUAL: True
HEAD:
PARAMS: [
["mlp", {"dims": [2048, 4096, 256], "use_relu": True, "use_bn": True}],
["mlp", {"dims": [256, 4096, 256], "use_relu": True, "use_bn": True}]
]
SYNC_BN_CONFIG:
CONVERT_BN_TO_SYNC_BN: True
SYNC_BN_TYPE: pytorch
AMP_PARAMS:
USE_AMP: False
LOSS: # BYOL Paper - Page 33 - Appendix J.1
name: byol_loss
byol_loss:
embedding_dim: 256
momentum: 0.99
OPTIMIZER:
name: lars # CHECKED
eta: 0.001
weight_decay: 1.0e-6 # CHECKED
momentum: 0.9 # CHECKED
nesterov: False # CHECKED
num_epochs: 300 # CHECKED
regularize_bn: False # CHECKED
regularize_bias: False # CHECKED
exclude_bias_and_norm: True # CHECKED
param_schedulers:
lr:
auto_lr_scaling:
auto_scale: true
base_value: 0.3
base_lr_batch_size: 256
name: composite
schedulers:
- name: linear
start_value: 0.00001
end_value: 0.3 # Automatically rescaled if needed
- name: cosine
start_value: 0.3 # Automatically rescaled if needed
end_value: 0.000003 # Automatically rescaled if needed
update_interval: step
interval_scaling: [rescaled, rescaled]
lengths: [0.0334, 0.9666] # 10ep
DISTRIBUTED:
BACKEND: nccl
NUM_NODES: 8
NUM_PROC_PER_NODE: 8
INIT_METHOD: tcp
RUN_ID: auto
MACHINE:
DEVICE: gpu
CHECKPOINT:
AUTO_RESUME: True
CHECKPOINT_FREQUENCY: 10
CHECKPOINT_ITER_FREQUENCY: -1 # set this variable to checkpoint every few iterations
Loading