Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

align export model code between ET and AOTI #74

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions .github/workflows/compile-macos12.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Compile main

on:
push:
branches:
- main
pull_request:
workflow_dispatch:

jobs:
run-tinystories:
strategy:
matrix:
runner: [macos-12]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.11
- name: Print machine info
run: |
uname -a
if [ $(uname -s) == Darwin ]; then
sysctl machdep.cpu.brand_string
sysctl machdep.cpu.core_count
fi
- name: Install requirements
run: |
pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
pip install -r requirements.txt
- name: Download checkpoints
run: |
mkdir -p checkpoints/stories15M
pushd checkpoints/stories15M
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
popd
- name: Run inference
run: |
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_OUT=/tmp
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_OUT}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_OUT}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti
echo "tests complete"
echo "******************************************"
# echo "********* EAGER vs TORCH.COMPILE *********"
# echo "******************************************"
# diff output_eager output_compiled
# echo "******************************************"
# echo "********* EAGER vs AOT INDUCTOR *********"
# echo "******************************************"
# diff output_eager output_aoti
24 changes: 12 additions & 12 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
run-tinystories:
strategy:
matrix:
runner: [ubuntu-latest, macos-12, macos-14]
runner: [ubuntu-latest, macos-14]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
Expand Down Expand Up @@ -42,13 +42,13 @@ jobs:
run: |
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_DIR=/tmp
export MODEL_OUT=/tmp
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager
cat ./output_eager
python generate.py --compile --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
python export.py --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_OUT}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_OUT}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti

echo "******************************************"
Expand All @@ -58,8 +58,8 @@ jobs:
cat ./output_eager
python generate.py --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
python export.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_OUT}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_OUT}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti

echo "******************************************"
Expand All @@ -69,8 +69,8 @@ jobs:
cat ./output_eager
python generate.py --compile --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
python export.py --quant '{"embedding" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_OUT}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_OUT}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti

echo "******************************************"
Expand All @@ -80,8 +80,8 @@ jobs:
cat ./output_eager
python generate.py --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
python export.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 0}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_OUT}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_OUT}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti

echo "******************************************"
Expand All @@ -91,8 +91,8 @@ jobs:
cat ./output_eager
python generate.py --compile --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_compiled
cat ./output_compiled
python export.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_DIR}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_DIR}/${MODEL_NAME}.so > ./output_aoti
python export.py --quant '{"linear:int8" : {"bitwidth": 8, "group_size": 8}}' --checkpoint-path ${MODEL_PATH} --output-dso-path ${MODEL_OUT}/${MODEL_NAME}.so
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --dso-path ${MODEL_OUT}/${MODEL_NAME}.so > ./output_aoti
cat ./output_aoti

echo "tests complete"
Expand Down
13 changes: 7 additions & 6 deletions .github/workflows/et.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,13 @@ jobs:
run: |
export MODEL_PATH=${PWD}/checkpoints/stories15M/stories15M.pt
export MODEL_NAME=stories15M
export MODEL_OUT=/tmp

python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 > ${MODEL_OUT}/output_eager
cat ${MODEL_OUT}/output_eager

python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 > ${PWD}/output_eager
cat ${PWD}/output_eager

python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${PWD}/${MODEL_NAME}.pte
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${PWD}/${MODEL_NAME}.pte > ${PWD}/output_et
cat ${PWD}/output_et
python export.py --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_OUT}/${MODEL_NAME}.pte
python generate.py --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_OUT}/${MODEL_NAME}.pte > ${MODEL_OUT}/output_et
cat ${MODEL_OUT}/output_et

echo "Tests complete."
19 changes: 11 additions & 8 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
executorch_export_available = True
from export_et import export_model as export_model_et
except Exception as e:
print("ET EXPORT EXCEPTION: ", e) # TODO: remove
# print("ET EXPORT EXCEPTION: ", e) # TODO: remove
executorch_export_available = False

from export_aoti import export_model as export_model_aoti
Expand All @@ -39,15 +39,13 @@ def device_sync(device):


class model_wrapper(nn.Module):
def __init__(self, model, device):
def __init__(self, model, device, max_seq_length = 350):
super().__init__()

max_seq_length = 350
with torch.device(device):
model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)

self.model = model
# init model here if necessary


def forward(self, idx, input_pos):
# input_pos: [B, 1]
Expand All @@ -74,7 +72,12 @@ def main(checkpoint_path, device, quantize = "{ }", args = None):

quantize_model(model, args.quantize)
model = model_wrapper(model, device=device)


input = (
torch.tensor([[1, 9038, 2501, 263, 931]], dtype=torch.int, device=device),
torch.tensor([0, 1, 2, 3, 4], dtype=torch.int, device=device),
)

output_pte_path = args.output_pte_path
output_dso_path = args.output_dso_path

Expand All @@ -84,13 +87,13 @@ def main(checkpoint_path, device, quantize = "{ }", args = None):
print(f">{output_pte_path}<")
if executorch_export_available:
print(f"Exporting model using Executorch to {output_pte_path}")
export_model_et(model, device, args.output_pte_path, args)
export_model_et(model, input, device, args.output_pte_path, args)
else:
print(f"Export with executorch requested but Executorch could not be loaded")
if output_dso_path:
output_dso_path = str(os.path.abspath(output_dso_path))
print(f"Exporting model using AOT Inductor to {output_pte_path}")
export_model_aoti(model, device, output_dso_path, args)
export_model_aoti(model, input, device, output_dso_path, args)


def cli():
Expand Down
6 changes: 1 addition & 5 deletions export_aoti.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,11 @@ def device_sync(device):
print(f"device={device} is not yet suppported")


def export_model(model: nn.Module, device, output_path, args=None):
def export_model(model: nn.Module, input, device, output_path, args=None):
max_seq_length = 350
# with torch.device(device):
# model.setup_caches(max_batch_size=1, max_seq_length=max_seq_length)

input = (
torch.tensor([[1, 9038, 2501, 263, 931]], dtype=torch.int, device=device),
torch.tensor([0, 1, 2, 3, 4], dtype=torch.int, device=device),
)

print(f"len(input)={len(input)}")

Expand Down
10 changes: 5 additions & 5 deletions export_et.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def canonical_path(path):
return path


def export_model(model, device, output_path, args=None) -> str: # noqa: C901
def export_model(model, input, device, output_path, args=None) -> str: # noqa: C901

# applied wrapper already in export.
# export_model = model_wrapper(model, device=device)
export_model = model
print(export_model)

input = (
torch.tensor([[1]], dtype=torch.long, device=device),
torch.tensor([0], dtype=torch.long, device=device),
)
#input = (
# torch.tensor([[1]], dtype=torch.long, device=device),
# torch.tensor([0], dtype=torch.long, device=device),
#)

state_dict = model.state_dict()
state_dict_dtype = state_dict[next(iter(state_dict))].dtype
Expand Down
4 changes: 2 additions & 2 deletions model_et.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ def __init__(self, config, path) -> None:
self.config = config
self.model_ = exec_lib._load_for_executorch(str(path))

def forward(self, x, input_pos):
def forward(self, idx, input_pos):
# model_.forward expects inputs to be wrapped in a tuple
forward_inputs = (x.to(torch.long), input_pos.to(torch.long))
forward_inputs = (idx.to(torch.long), input_pos.to(torch.long))
logits = self.model_.forward(forward_inputs)

# After wrapping in a tuple, we get a list back, so we need to grab
Expand Down
Loading