Skip to content

Commit

Permalink
Adding mixtral. (#2093)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: #2093

Reviewed By: aaronenyeshi

Differential Revision: D52806835

Pulled By: xuzhao9

fbshipit-source-id: b47e37e77a2823a758dff668c2968351925d4e83
  • Loading branch information
MaanavD authored and facebook-github-bot committed Jan 18, 2024
1 parent e85d944 commit 19685da
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 1 deletion.
17 changes: 17 additions & 0 deletions torchbenchmark/canary_models/hf_mixtral/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from torchbenchmark.tasks import NLP
from torchbenchmark.util.framework.huggingface.model_factory import HuggingFaceModel

class Model(HuggingFaceModel):
task = NLP.LANGUAGE_MODELING
# DEFAULT_TRAIN_BSIZE not specified since we're not implementing a train test
# DEFAULT_TRAIN_BSIZE = 1
DEFAULT_EVAL_BSIZE = 1

def __init__(self, test, device, batch_size=None, extra_args=[]):
super().__init__(name="hf_mixtral", test=test, device=device, batch_size=batch_size, extra_args=extra_args)

# # def train(self):
# # return NotImplementedError("Not implemented")

# def eval(self):
# super().eval()
13 changes: 13 additions & 0 deletions torchbenchmark/canary_models/hf_mixtral/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import subprocess
import sys
import os
from torchbenchmark.util.framework.huggingface.patch_hf import patch_transformers, cache_model

def pip_install_requirements():
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-r', 'requirements.txt'])

if __name__ == '__main__':
pip_install_requirements()
patch_transformers()
model_name = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
cache_model(model_name, trust_remote_code=True)
11 changes: 11 additions & 0 deletions torchbenchmark/canary_models/hf_mixtral/metadata.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
devices:
NVIDIA A100-SXM4-40GB:
eval_batch_size: 1
eval_benchmark: false
eval_deterministic: false
eval_nograd: true
train_benchmark: false
train_deterministic: false
not_implemented:
- device: NVIDIA A10G
# - device: cpu
3 changes: 3 additions & 0 deletions torchbenchmark/canary_models/hf_mixtral/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
bitsandbytes
transformers>=4.36.2
numba
2 changes: 1 addition & 1 deletion torchbenchmark/util/framework/huggingface/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
'hf_Bert_large': (512, 512, 'BertConfig(hidden_size=1024, num_hidden_layers=24, num_attention_heads=16)', 'AutoModelForMaskedLM'),
'hf_Whisper': (1024, 1024, 'WhisperConfig()', 'AutoModelForAudioClassification'),
'hf_distil_whisper': (1024, 1024, 'AutoConfig.from_pretrained("distil-whisper/distil-medium.en")', 'AutoModelForAudioClassification'),
'hf_mixtral' : (512,512, 'AutoConfig.from_pretrained("mistralai/Mixtral-8x7B-v0.1")', 'AutoModelForCausalLM'),
# default num_hidden_layers=32 but that OOMs, feel free to change this config to something more real
'llama_v2_7b_16h' : (128,512, 'LlamaConfig(num_hidden_layers=16)', 'AutoModelForCausalLM'),
'hf_MPT_7b_instruct': (512, 512, 'AutoConfig.from_pretrained("mosaicml/mpt-7b-instruct", trust_remote_code=True)', 'AutoModelForCausalLM'),
Expand All @@ -39,7 +40,6 @@
# as per this page https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1 trust_remote_code=True is not required
'mistral_7b_instruct' : (128, 128, 'AutoConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")', 'AutoModelForCausalLM'),
'hf_Yi' : (512, 512, 'AutoConfig.from_pretrained("01-ai/Yi-6B", trust_remote_code=True)', 'AutoModelForCausalLM'),

}

cpu_input_slice = {
Expand Down

0 comments on commit 19685da

Please sign in to comment.