-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Yu Chin Fabian Lim <[email protected]>
- Loading branch information
Showing
2 changed files
with
113 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,25 @@ | ||
epoch,framework_config,gradient_accumulation_steps,mem_nvidia_mem_reserved,model_name_or_path,num_gpus,per_device_train_batch_size,torch_dtype,train_loss,train_runtime,train_samples_per_second,train_steps_per_second,train_tokens_per_second | ||
0.25,none,16.0,70749.0,ibm/PowerMoE-3b,1,8,bfloat16,0.9477007621526718,2350.0523,5.447,0.043,1508.732 | ||
0.25,none,8.0,46699.0,ibm/PowerMoE-3b,2,8,bfloat16,0.9477724695205688,1341.9179,9.539,0.075,1321.094 | ||
0.25,none,4.0,38885.0,ibm/PowerMoE-3b,4,8,bfloat16,0.9478064042329788,712.2347,17.972,0.14,1244.534 | ||
0.25,moe-scattermoe-granite-ep1,16.0,71049.0,ibm/PowerMoE-3b,1,8,bfloat16,0.9477236008644104,741.1462,17.271,0.135,4783.942 | ||
0.25,moe-scattermoe-granite-ep1,8.0,52294.0,ibm/PowerMoE-3b,2,8,bfloat16,0.9511111199855804,484.7077,26.408,0.206,3657.462 | ||
0.25,moe-scattermoe-granite-ep1,4.0,51251.5,ibm/PowerMoE-3b,4,8,bfloat16,0.9541541540622712,264.6776,48.361,0.378,3348.98 | ||
,moe-scattermoe-granite-ep2,16.0,3.0,ibm/PowerMoE-3b,1,8,bfloat16,,,,, | ||
0.25,moe-scattermoe-granite-ep2,8.0,39854.0,ibm/PowerMoE-3b,2,8,bfloat16,0.9480846971273422,602.4418,21.247,0.166,2942.691 | ||
0.25,moe-scattermoe-granite-ep2,4.0,40937.0,ibm/PowerMoE-3b,4,8,bfloat16,0.9512380701303482,305.5111,41.897,0.327,2901.367 | ||
,moe-scattermoe-granite-ep4,16.0,3.0,ibm/PowerMoE-3b,1,8,bfloat16,,,,, | ||
,moe-scattermoe-granite-ep4,8.0,213.0,ibm/PowerMoE-3b,2,8,bfloat16,,,,, | ||
0.25,moe-scattermoe-granite-ep4,4.0,32128.0,ibm/PowerMoE-3b,4,8,bfloat16,0.9484522187709808,314.6519,40.68,0.318,2817.082 | ||
0.25,moe-scattermoe-granite-ep1-padding-free,16.0,46789.0,ibm/PowerMoE-3b,1,8,bfloat16,0.9477115905284882,642.9549,19.908,0.156,3852.214 | ||
0.25,moe-scattermoe-granite-ep1-padding-free,8.0,43327.5,ibm/PowerMoE-3b,2,8,bfloat16,0.951027640104294,455.3931,28.108,0.22,2719.409 | ||
0.25,moe-scattermoe-granite-ep1-padding-free,4.0,39417.75,ibm/PowerMoE-3b,4,8,bfloat16,0.9540604627132416,243.6897,52.526,0.41,2540.936 | ||
,moe-scattermoe-granite-ep2-padding-free,16.0,0.0,ibm/PowerMoE-3b,1,8,bfloat16,,,,, | ||
0.25,moe-scattermoe-granite-ep2-padding-free,8.0,31575.5,ibm/PowerMoE-3b,2,8,bfloat16,0.9480111581087112,549.3424,23.301,0.182,2254.332 | ||
0.25,moe-scattermoe-granite-ep2-padding-free,4.0,29068.75,ibm/PowerMoE-3b,4,8,bfloat16,0.9512316131591796,286.8631,44.621,0.349,2158.521 | ||
,moe-scattermoe-granite-ep4-padding-free,16.0,0.0,ibm/PowerMoE-3b,1,8,bfloat16,,,,, | ||
,moe-scattermoe-granite-ep4-padding-free,8.0,210.0,ibm/PowerMoE-3b,2,8,bfloat16,,,,, | ||
0.25,moe-scattermoe-granite-ep4-padding-free,4.0,22499.75,ibm/PowerMoE-3b,4,8,bfloat16,0.9483268648386002,289.7612,44.174,0.345,2136.932 | ||
,none,,65630.125,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8610308569669723,4188.2636,3.056,0.024,80.224 | ||
,moe-scattermoe-granite-ep8,,51988.125,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.856986141204834,1065.0627,12.018,0.094,315.474 | ||
,moe-scattermoe-granite-ep8-padding-free,,52019.375,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8575272679328918,1051.6374,12.171,0.095,319.502 | ||
0.25,none,16.0,71199.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9438143467903136,2371.9316,5.396,0.042,1505.608 | ||
0.25,none,8.0,46829.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437569552659988,1355.7096,9.442,0.074,1317.096 | ||
0.25,none,4.0,37996.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9437739425897598,708.3914,18.069,0.141,1260.32 | ||
0.25,moe-scattermoe-granite-ep1,16.0,71187.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9439476370811464,742.739,17.234,0.135,4808.149 | ||
0.25,moe-scattermoe-granite-ep1,8.0,52503.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9506204092502594,485.5103,26.364,0.206,3677.78 | ||
0.25,moe-scattermoe-granite-ep1,4.0,51145.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9572784686088562,262.9566,48.677,0.38,3395.238 | ||
0.25,moe-scattermoe-granite-ep2,8.0,40193.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437192791700364,577.2164,22.175,0.173,3093.467 | ||
0.25,moe-scattermoe-granite-ep2,4.0,40878.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9509018939733506,300.285,42.626,0.333,2973.176 | ||
0.25,moe-scattermoe-granite-ep4,4.0,31777.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9434539985656738,307.1264,41.677,0.326,2906.946 | ||
0.25,moe-scattermoe-granite-ep1-padding-free,16.0,48401.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9437484860420228,631.9756,20.254,0.158,3924.202 | ||
0.25,moe-scattermoe-granite-ep1-padding-free,8.0,42452.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9506663566827774,454.3444,28.172,0.22,2729.207 | ||
0.25,moe-scattermoe-granite-ep1-padding-free,4.0,38560.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.957276314496994,241.2967,53.047,0.414,2569.451 | ||
0.25,moe-scattermoe-granite-ep2-padding-free,8.0,31012.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.943688799738884,546.507,23.421,0.183,2268.955 | ||
0.25,moe-scattermoe-granite-ep2-padding-free,4.0,28133.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9505942213535308,283.5444,45.143,0.353,2186.607 | ||
0.25,moe-scattermoe-granite-ep4-padding-free,4.0,21585.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9441865116357804,284.6079,44.974,0.351,2178.436 | ||
0.25,moe-scattermoe-granite-ep1-padding-free-foak,16.0,42651.0,ibm-granite/granite-3.0-3b-a800m-instruct,1,8,bfloat16,0.9437448275089264,615.4528,20.798,0.162,4029.554 | ||
0.25,moe-scattermoe-granite-ep1-padding-free-foak,8.0,37743.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.950773031115532,433.4811,29.528,0.231,2860.563 | ||
0.25,moe-scattermoe-granite-ep1-padding-free-foak,4.0,35153.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9572476959228516,232.0428,55.162,0.431,2671.921 | ||
0.25,moe-scattermoe-granite-ep2-padding-free-foak,8.0,26075.0,ibm-granite/granite-3.0-3b-a800m-instruct,2,8,bfloat16,0.9437651455402374,524.7751,24.391,0.191,2362.917 | ||
0.25,moe-scattermoe-granite-ep2-padding-free-foak,4.0,24665.5,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.9507779973745346,274.126,46.694,0.365,2261.733 | ||
0.25,moe-scattermoe-granite-ep4-padding-free-foak,4.0,18368.0,ibm-granite/granite-3.0-3b-a800m-instruct,4,8,bfloat16,0.943427557349205,278.1245,46.023,0.36,2229.217 | ||
,none,,65607.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8599078696966171,4180.9544,3.062,0.024,80.364 | ||
,moe-scattermoe-granite-ep8,,52004.75,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8588122856616974,1071.1967,11.949,0.093,313.668 | ||
,moe-scattermoe-granite-ep8-foak,,51961.25,mistralai/Mixtral-8x7B-Instruct-v0.1,8,1,bfloat16,0.8599798053503036,1043.6675,12.264,0.096,321.942 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
accelerate==1.0.1 | ||
aiohappyeyeballs==2.4.3 | ||
aiohttp==3.10.10 | ||
aiosignal==1.3.1 | ||
async-timeout==4.0.3 | ||
attrs==24.2.0 | ||
bitsandbytes==0.43.3 | ||
certifi==2024.8.30 | ||
charset-normalizer==3.4.0 | ||
contourpy==1.3.0 | ||
cycler==0.12.1 | ||
datasets==2.21.0 | ||
dill==0.3.8 | ||
docstring_parser==0.16 | ||
einops==0.8.0 | ||
filelock==3.16.1 | ||
flash-attn==2.6.3 | ||
-e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration&subdirectory=plugins/framework | ||
-e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_aadp&subdirectory=plugins/attention-and-distributed-packing | ||
-e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_foak&subdirectory=plugins/fused-ops-and-kernels | ||
-e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_moe&subdirectory=plugins/accelerated-moe | ||
-e git+https://github.com/foundation-model-stack/fms-acceleration.git@21af5fb9f2989b3dbf443c016e4c0470b536a593#egg=fms_acceleration_peft&subdirectory=plugins/accelerated-peft | ||
fms-hf-tuning @ git+https://github.com/foundation-model-stack/fms-hf-tuning.git@398c2a8fe26d734344240555585d95e05299faa8 | ||
fonttools==4.54.1 | ||
frozenlist==1.5.0 | ||
fsspec==2024.6.1 | ||
huggingface-hub==0.26.2 | ||
idna==3.10 | ||
Jinja2==3.1.4 | ||
kernel-hyperdrive @ git+https://github.com/fabianlim/kernel-hyperdrive.git@45036497e12444ca98a6f0072204538aee4543ba | ||
kiwisolver==1.4.7 | ||
llvmlite==0.43.0 | ||
markdown-it-py==3.0.0 | ||
MarkupSafe==3.0.2 | ||
matplotlib==3.9.2 | ||
mdurl==0.1.2 | ||
mpmath==1.3.0 | ||
multidict==6.1.0 | ||
multiprocess==0.70.16 | ||
networkx==3.4.2 | ||
numba==0.60.0 | ||
numpy==1.26.4 | ||
nvidia-cublas-cu12==12.1.3.1 | ||
nvidia-cuda-cupti-cu12==12.1.105 | ||
nvidia-cuda-nvrtc-cu12==12.1.105 | ||
nvidia-cuda-runtime-cu12==12.1.105 | ||
nvidia-cudnn-cu12==9.1.0.70 | ||
nvidia-cufft-cu12==11.0.2.54 | ||
nvidia-curand-cu12==10.3.2.106 | ||
nvidia-cusolver-cu12==11.4.5.107 | ||
nvidia-cusparse-cu12==12.1.0.106 | ||
nvidia-nccl-cu12==2.20.5 | ||
nvidia-nvjitlink-cu12==12.4.127 | ||
nvidia-nvtx-cu12==12.1.105 | ||
packaging==24.2 | ||
pandas==2.2.3 | ||
peft==0.13.2 | ||
pillow==11.0.0 | ||
propcache==0.2.0 | ||
protobuf==5.28.3 | ||
psutil==6.1.0 | ||
pyarrow==18.0.0 | ||
Pygments==2.18.0 | ||
pyparsing==3.2.0 | ||
python-dateutil==2.9.0.post0 | ||
pytz==2024.2 | ||
PyYAML==6.0.2 | ||
regex==2024.11.6 | ||
requests==2.32.3 | ||
rich==13.9.4 | ||
safetensors==0.4.5 | ||
sentencepiece==0.2.0 | ||
shtab==1.7.1 | ||
simpleeval==0.9.13 | ||
six==1.16.0 | ||
sympy==1.13.1 | ||
threadpoolctl==3.5.0 | ||
tokenizers==0.20.3 | ||
torch==2.4.1 | ||
tqdm==4.67.0 | ||
transformers==4.45.2 | ||
triton==3.0.0 | ||
trl==0.11.4 | ||
typing_extensions==4.12.2 | ||
tyro==0.8.14 | ||
tzdata==2024.2 | ||
urllib3==2.2.3 | ||
xxhash==3.5.0 | ||
yarl==1.17.1 |