Skip to content

Commit

Permalink
QNN Compilation Support (#171)
Browse files Browse the repository at this point in the history
QNN Compilation Support in Command Line Interface commands

1. Infer/Compile API Changes to include --enable_qnn [Optional QNN Config File]
	2. Added qnn_config.json file format.
	3. Added generate_qnn_network_specialization_config.py to create custom_io_config.yaml file for QNN Compilation Step.
	4. Modified utils/constants.py to include QnnConstants required to support QNN compilation.
	5. Updated quick_start.md to include QNN Compilation steps.
	6. Added QNN Compilation utilities in _utils.py
	7. Added Unit Tests for QNN Compilation path

Signed-off-by: Shubham Agrawal <[email protected]>
  • Loading branch information
shubhagr-quic authored Dec 18, 2024
1 parent 1d7c624 commit dc2c509
Show file tree
Hide file tree
Showing 14 changed files with 880 additions and 44 deletions.
14 changes: 14 additions & 0 deletions QEfficient/cloud/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,20 @@
action="store_true",
help="If passed, this option allows MXINT8 compression of MDP IO traffic",
)
parser.add_argument(
"--enable_qnn",
"--enable-qnn",
action="store_true",
default=False,
help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
If not provided, the default configuration will be used.\
Sample Config: QEfficient/cloud/compile/qnn_config.json",
)
parser.add_argument(
"qnn_config",
nargs="?",
type=str,
)
# FIXME(ochougul): Allow extra compilation arguments
args = parser.parse_args()
QEfficient.compile(**vars(args))
32 changes: 31 additions & 1 deletion QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def main(
cache_dir: Optional[str] = None,
hf_token: Optional[str] = None,
allow_mxint8_mdp_io: bool = False,
enable_qnn: Optional[bool] = False,
qnn_config: Optional[str] = None,
) -> None:
"""
1. Check if compiled qpc for given config already exists, if it does jump to execute, else
Expand All @@ -62,6 +64,8 @@ def main(
:cache_dir (str): Cache dir where downloaded HuggingFace files are stored. ``Defaults to None.``
:hf_token (str): HuggingFace login token to access private repos. ``Defaults to None.``
:allow_mxint8_mdp_io (bool): Allows MXINT8 compression of MDP IO traffic. ``Defaults to False.``
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
.. code-block:: bash
Expand All @@ -76,7 +80,17 @@ def main(
)

qpc_dir_path = get_qpc_dir_path(
model_name, num_cores, mos, batch_size, prompt_len, ctx_len, mxfp6, mxint8, device_group, full_batch_size
model_name,
num_cores,
mos,
batch_size,
prompt_len,
ctx_len,
mxfp6,
mxint8,
device_group,
full_batch_size,
enable_qnn=enable_qnn,
)

# Handle qpc generation
Expand Down Expand Up @@ -107,6 +121,8 @@ def main(
device_group=device_group,
full_batch_size=full_batch_size,
allow_mxint8_mdp_io=allow_mxint8_mdp_io,
enable_qnn=enable_qnn,
qnn_config=qnn_config,
)

#########
Expand Down Expand Up @@ -206,6 +222,20 @@ def main(
action="store_true",
help="If passed, this option allows MXINT8 compression of MDP IO traffic",
)
parser.add_argument(
"--enable_qnn",
"--enable-qnn",
action="store_true",
default=False,
help="Enables QNN. Optionally, a configuration file can be provided with [--enable_qnn CONFIG_FILE].\
If not provided, the default configuration will be used.\
Sample Config: QEfficient/cloud/compile/qnn_config.json",
)
parser.add_argument(
"qnn_config",
nargs="?",
type=str,
)

args = parser.parse_args()
if args.verbose:
Expand Down
71 changes: 47 additions & 24 deletions QEfficient/compile/compile_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import warnings
from typing import List, Optional, Tuple

from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.utils.logging_utils import logger


Expand Down Expand Up @@ -133,6 +134,8 @@ def compile(
custom_io_file_path: Optional[str] = None,
full_batch_size: Optional[int] = None,
allow_mxint8_mdp_io: Optional[bool] = False,
enable_qnn: Optional[bool] = False,
qnn_config: Optional[str] = None,
**kwargs,
) -> str:
"""
Expand All @@ -157,6 +160,8 @@ def compile(
:mxint8 (bool): Compress Present/Past KV to ``MXINT8`` using ``CustomIO`` config. ``Defaults to False.``
:custom_io_file_path (str): Path to ``customIO`` file (formatted as a string). ``Defaults to None.``
:allow_mxint8_mdp_io (bool): Allows MXINT8 compression of MDP IO traffic ``Defaults to False.``
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): Path of QNN Config parameters file. ``Defaults to None.``
Returns:
:str: Path to compiled ``qpc`` package.
Expand All @@ -175,29 +180,47 @@ def compile(
full_batch_size=full_batch_size,
)

# Select the customIO config based on the mx flag.
custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml"

if custom_io_file_path is None:
custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name)

if not os.path.isfile(custom_io_file_path):
raise FileNotFoundError(
f"Custom IO file {custom_io_file_name} is not present at the expected path {custom_io_file_path}. Please pass the correct file path or rerun infer/export API"
if enable_qnn:
qpc_path = qnn_compile(
onnx_path=onnx_path,
qpc_path=qpc_path,
num_cores=num_cores,
batch_size=batch_size,
prompt_len=prompt_len,
ctx_len=ctx_len,
mxfp6=mxfp6,
mxint8=mxint8,
allow_mxint8_mdp_io=allow_mxint8_mdp_io,
aic_enable_depth_first=aic_enable_depth_first,
mos=mos,
device_group=device_group,
full_batch_size=full_batch_size,
qnn_config=qnn_config,
)

_, qpc_path = compile_kv_model_on_cloud_ai_100(
onnx_path=onnx_path,
specializations_json=specialization_json_path,
num_cores=num_cores,
custom_io_path=custom_io_file_path,
base_path=qpc_path,
mxfp6=mxfp6,
aic_enable_depth_first=aic_enable_depth_first,
allow_mxint8_mdp_io=allow_mxint8_mdp_io,
mos=mos,
device_group=device_group,
)

logger.info(f"Compiled QPC files can be found here: {qpc_path}")
logger.info(f"QNN Compiled QPC files can be found here: {qpc_path}")
else:
# Select the customIO config based on the mx flag.
custom_io_file_name = "custom_io_int8.yaml" if mxint8 else "custom_io_fp16.yaml"

if custom_io_file_path is None:
custom_io_file_path = os.path.join(os.path.dirname(onnx_path), custom_io_file_name)

if not os.path.isfile(custom_io_file_path):
raise FileNotFoundError(
f"Custom IO file {custom_io_file_name} is not present at the expected path {custom_io_file_path}. Please pass the correct file path or rerun infer/export API"
)

_, qpc_path = compile_kv_model_on_cloud_ai_100(
onnx_path=onnx_path,
specializations_json=specialization_json_path,
num_cores=num_cores,
custom_io_path=custom_io_file_path,
base_path=qpc_path,
mxfp6=mxfp6,
aic_enable_depth_first=aic_enable_depth_first,
allow_mxint8_mdp_io=allow_mxint8_mdp_io,
mos=mos,
device_group=device_group,
)
logger.info(f"Compiled QPC files can be found here: {qpc_path}")
return qpc_path
Loading

0 comments on commit dc2c509

Please sign in to comment.