Skip to content

Commit

Permalink
Improve the script to add Q, DQ nodes around EPContext node (#20107)
Browse files Browse the repository at this point in the history
Improve the script to add Q, DQ nodes around EPContext node so that the wrapper model use float data as inputs and outputs. User don't need to quantize or dequantize the data in their application
  • Loading branch information
HectorSVC authored Apr 5, 2024
1 parent c529e05 commit 1ccb164
Showing 1 changed file with 78 additions and 7 deletions.
85 changes: 78 additions & 7 deletions onnxruntime/python/tools/qnn/gen_qnn_ctx_onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,17 @@ class QnnTensorStruct:
def __init__(self):
self.name = ""
self.onnx_data_type = TensorProto.FLOAT
self.is_quantized = False
self.scale = 0.0
self.offset = 0
self.dim = []


def is_quantized_data_type(qnn_data_type):
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UFIXED_POINT_16 QNN_DATATYPE_FIXED_POINT_8 QNN_DATATYPE_FIXED_POINT_16
return qnn_data_type == 0x0408 or qnn_data_type == 0x0416 or qnn_data_type == 0x0308 or qnn_data_type == 0x0316


def qnn_data_type_to_onnx_data_type(qnn_data_type):
# QNN_DATATYPE_UFIXED_POINT_8 QNN_DATATYPE_UINT_8
if qnn_data_type == 0x0408 or qnn_data_type == 0x0108:
Expand Down Expand Up @@ -73,15 +81,29 @@ def parse_qnn_json_file(qnn_json_file_path, qnn_input_tensor_dic, qnn_output_ten
qnn_tensor = QnnTensorStruct()
qnn_tensor.name = qnn_tensor_name
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
qnn_tensor.is_quantized = is_quantized_data_type(qnn_tensor_attribute["data_type"])
qnn_tensor.dim = qnn_tensor_attribute["dims"]
if (
qnn_tensor_attribute["quant_params"]["definition"] == 1
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
):
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
qnn_input_tensor_dic[qnn_tensor_name] = qnn_tensor

# Get all graph outputs
if qnn_tensor_attribute["type"] == 1:
qnn_tensor = QnnTensorStruct()
qnn_tensor.name = qnn_tensor_name
qnn_tensor.onnx_data_type = qnn_data_type_to_onnx_data_type(qnn_tensor_attribute["data_type"])
qnn_tensor.is_quantized = is_quantized_data_type(qnn_tensor_attribute["data_type"])
qnn_tensor.dim = qnn_tensor_attribute["dims"]
if (
qnn_tensor_attribute["quant_params"]["definition"] == 1
and qnn_tensor_attribute["quant_params"]["encoding"] == 0
):
qnn_tensor.scale = qnn_tensor_attribute["quant_params"]["scale_offset"]["scale"]
qnn_tensor.offset = 0 - qnn_tensor_attribute["quant_params"]["scale_offset"]["offset"]
qnn_output_tensor_dic[qnn_tensor_name] = qnn_tensor

assert (
Expand Down Expand Up @@ -120,13 +142,33 @@ def main():
ep_cache_context_content = file.read()
ctx_embed_mode = 1

qnn_inputs = []
for qnn_input in qnn_input_tensor_dic.values():
qnn_inputs.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))
graph_nodes = []
ini_list = []
value_infos = []

qnn_outputs = []
for qnn_output in qnn_output_tensor_dic.values():
qnn_outputs.append(helper.make_tensor_value_info(qnn_output.name, qnn_output.onnx_data_type, qnn_output.dim))
model_inputs = []
for qnn_input in qnn_input_tensor_dic.values():
if qnn_input.is_quantized:
q_scale_input_name = qnn_input.name + "_scale"
q_offset_input_name = qnn_input.name + "_zp"
q_scale = helper.make_tensor(q_scale_input_name, TensorProto.FLOAT, [], [qnn_input.scale])
ini_list.append(q_scale)
q_offset = helper.make_tensor(q_offset_input_name, qnn_input.onnx_data_type, [], [qnn_input.offset])
ini_list.append(q_offset)
input_name = qnn_input.name + "_dq"

q_node = helper.make_node(
"QuantizeLinear",
name=qnn_input.name,
inputs=[input_name, q_scale_input_name, q_offset_input_name],
outputs=[qnn_input.name],
)

graph_nodes.append(q_node)
model_inputs.append(helper.make_tensor_value_info(input_name, TensorProto.FLOAT, qnn_input.dim))
value_infos.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))
else:
model_inputs.append(helper.make_tensor_value_info(qnn_input.name, qnn_input.onnx_data_type, qnn_input.dim))

qnn_ep_context_node = helper.make_node(
"EPContext",
Expand All @@ -138,8 +180,37 @@ def main():
source="Qnn",
domain="com.microsoft",
)
graph_nodes.append(qnn_ep_context_node)

graph_def = helper.make_graph([qnn_ep_context_node], "qnn-onnx-model", qnn_inputs, qnn_outputs)
model_outputs = []
for qnn_output in qnn_output_tensor_dic.values():
if qnn_output.is_quantized:
dq_scale_input_name = qnn_output.name + "_scale"
dq_offset_input_name = qnn_output.name + "_zp"
dq_scale = helper.make_tensor(dq_scale_input_name, TensorProto.FLOAT, [], [qnn_output.scale])
ini_list.append(dq_scale)
dq_offset = helper.make_tensor(dq_offset_input_name, qnn_output.onnx_data_type, [], [qnn_output.offset])
ini_list.append(dq_offset)
output_name = qnn_output.name + "_dq"

dq_node = helper.make_node(
"DequantizeLinear",
name=output_name,
inputs=[qnn_output.name, dq_scale_input_name, dq_offset_input_name],
outputs=[output_name],
)

graph_nodes.append(dq_node)
model_outputs.append(helper.make_tensor_value_info(output_name, TensorProto.FLOAT, qnn_output.dim))
value_infos.append(
helper.make_tensor_value_info(qnn_output.name, qnn_output.onnx_data_type, qnn_output.dim)
)
else:
model_outputs.append(
helper.make_tensor_value_info(qnn_output.name, qnn_output.onnx_data_type, qnn_output.dim)
)

graph_def = helper.make_graph(graph_nodes, "qnn-onnx-model", model_inputs, model_outputs, ini_list, "", value_infos)

model_def = helper.make_model(graph_def, producer_name="MS")

Expand Down

0 comments on commit 1ccb164

Please sign in to comment.