diff --git a/.gitignore b/.gitignore index 47713d8..b9a2132 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,6 @@ dmypy.json # don't save logs logs/ + +# don't save vscode settings +.vscode/ \ No newline at end of file diff --git a/README.md b/README.md index b2f23d9..4ce85a5 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,19 @@ This repo provides different pytorch implementation for training a deep learning pytorch-templates/ │ ├── train_simple.py : A single-GPU implementation - | + │ ├── train_multi.py : A multi-GPU implementation │ ├── train_pl.py : Pytorch-lightning implementation along with Tensorboard logging │ - ├── train_pl.ipynb : Jupyter notebook for Pytorch-lightning implementation along with Tensorboard logging - │ ├── pl_hydra/ - contains all the files pertaining to pytorch-lightning hydra implementation - │ └──data_loaders.py - | - └── utils/ - small utility functions - ├── util.py - └── ... + │ └──... + │ + ├── utils/ - small utility functions + │ ├── util.py + │ └── ... + │ + └── requirements.txt : file to install python dependencies ``` ## Single-GPU implementation @@ -50,9 +50,61 @@ optional arguments: --momentum MOMENTUM Momentum value in SGD. --gamma GAMMA gamma value for MultiStepLR. ``` + +``` +# Start training with default parameters: +python train_simple.py --run_name=test_single + +# You can either parameters through commandline, for e.g.: +python train_simple.py -bs=64 -ep=2 --run_name=test_single + +# You can also set parameters run_simple.sh file and start the training as following: +source train_simple.py +``` + ## Multi-GPU implementation +``` +# Training with default parameters and 2 GPU: +python -m torch.distributed.launch --nproc_per_node=2 --master_port=9995 train_multi.py --run_name=test_multi + +# You can also pass parameters through commandline (single GPU training), for e.g.: +python -m torch.distributed.launch --nproc_per_node=1 --master_port=9995 train_multi.py -ep=5 --run_name=test_single + +# You can also set parameters in run_simple.sh file and start the training as following: +source train_multi.py +``` ## Pytorch-lightning implementation +``` +# Training with 1 GPU: +python train_pl.py --epochs=5 --run_name=test_pl --gpus=1 + +# Training with 2 GPUs: +python train_pl.py --epochs=5 --run_name=test_pl --gpus=2 +``` + +``` +# Running the Tensorboard: +tensorboard --logdir ./logs/ +``` ## Pytorch-lightning Hydra implementation +[Tensorboard containing the runs comparing different architectures on CIFAR10](https://tensorboard.dev/experiment/JUrYiGdOQqC0iGNoWtdPlg/#scalars&run=densenet%2F2022-05-06_00-27-19%2Ftensorboard%2Fdensenet&runSelectionState=eyJkZW5zZW5ldC8yMDIyLTA1LTA2XzAwLTI3LTE5L3RlbnNvcmJvYXJkL2RlbnNlbmV0Ijp0cnVlLCJnb29nbGVuZXQvMjAyMi0wNS0wNl8wOC00OS01My90ZW5zb3Jib2FyZC9nb29nbGVuZXQiOnRydWUsInJlc25ldC8yMDIyLTA1LTA2XzEwLTM1LTM5L3RlbnNvcmJvYXJkL3Jlc25ldCI6dHJ1ZSwidmdnLzIwMjItMDUtMDVfMTUtNTYtMDAvdGVuc29yYm9hcmQvdmdnIjp0cnVlLCJ2aXQvMjAyMi0wNS0wNV8xNS0wMS01NS90ZW5zb3Jib2FyZC92aXQiOnRydWV9) + + +## Quickstart +``` +# clone project +git clone https://https://github.com/garg-aayush/pytorch-pl-hydra-templates +cd pytorch-pl-hydra-templates + +# create conda environment +conda create -n pl_hydra python=3.8 +conda activate pl_hydra + +# install requirements +pip install -r requirements.txt +``` +## Feedback +To give feedback or ask a question or for environment setup issues, you can use the [Github Discussions](https://https://github.com/garg-aayush/pytorch-pl-hydra-templates/discussions). \ No newline at end of file diff --git a/run_simple.sh b/run_simple.sh index c52c264..1b0cf61 100755 --- a/run_simple.sh +++ b/run_simple.sh @@ -5,7 +5,6 @@ run_name='train_simple.py' logfile=./logs/train_simple.log train_script='train_simple.py' ngpus=1 -port=9994 epochs=150 epochs_per_test=1 batch_size=128