Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyems committed Jan 25, 2024
1 parent 97ee6ee commit 990c1da
Show file tree
Hide file tree
Showing 2 changed files with 253 additions and 238 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -297,13 +297,13 @@ def optimize_phi2_onnx(self, onnx_path: str, onnx_path_opt: str, use_fp16: bool
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

converter = ConvertPhi2ToONNX(model_class, device)
converter.dynamo_export("phi-2_temp.onnx")
# TODO:preprocessed onnx model takes up large disk space
converter.preprocess_onnx(
"phi-2_temp.onnx",
"phi-2.onnx",
"modeling_phi_PhiModel_model_1",
use_gqa=True,
)
converter.erase_onnx_model("phi-2_temp.onnx")
# converter.dynamo_export("phi-2_temp.onnx")
# # TODO:preprocessed onnx model takes up large disk space
# converter.preprocess_onnx(
# "phi-2_temp.onnx",
# "phi-2.onnx",
# "modeling_phi_PhiModel_model_1",
# use_gqa=True,
# )
# converter.erase_onnx_model("phi-2_temp.onnx")
converter.optimize_phi2_onnx("phi-2.onnx", "phi-2_opt.onnx", use_fp16=True)
Loading

0 comments on commit 990c1da

Please sign in to comment.