Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code for SDK configs Inclusion #203

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion QEfficient/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from QEfficient.transformers.models.pytorch_transforms import CustomOpsTransform, KVCacheTransform, SpDTransform
from QEfficient.transformers.quantizers.auto import QEFF_AUTO_QUANTIZATION_CONFIG_MAPPING, with_replaced_quantizers
from QEfficient.transformers.quantizers.quant_transforms import AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform
from QEfficient.utils import constants, get_padding_shape_from_config
from QEfficient.utils import constants, create_and_dump_qconfigs, get_padding_shape_from_config
from QEfficient.utils.cache import to_hashable

logger = logging.getLogger(__file__)
Expand Down Expand Up @@ -380,6 +380,33 @@ def compile(
aic_num_cores=num_cores,
**compiler_options,
)

# Construct the qconfig json file
huggingface_config = self.model.config.__dict__
pytorch_transforms = [cls.__name__ for cls in self._pytorch_transforms]
onnx_transforms = [cls.__name__ for cls in self._onnx_transforms]
try:
create_and_dump_qconfigs(
qpc_path,
onnx_path,
huggingface_config,
pytorch_transforms,
onnx_transforms,
prefill_seq_len,
ctx_len,
batch_size,
full_batch_size,
num_devices,
num_cores,
mxfp6_matmul,
mxint8_kv_cache,
num_speculative_tokens,
enable_qnn,
qnn_config,
)
except Exception as e:
abukhoy marked this conversation as resolved.
Show resolved Hide resolved
logger.warning(f"Failed to create the qconfig file: {e}")

return qpc_path

# FIXME: Update this method to match with transformers AutoModelForCausalLM.generate
Expand Down
1 change: 1 addition & 0 deletions QEfficient/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)
from QEfficient.utils._utils import ( # noqa: F401
check_and_assign_cache_dir,
create_and_dump_qconfigs,
get_num_layers_from_config,
get_onnx_dir_name,
get_padding_shape_from_config,
Expand Down
102 changes: 101 additions & 1 deletion QEfficient/utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@
import json
import os
import subprocess
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional, Tuple, Union

import requests
import yaml
from huggingface_hub import login, snapshot_download
from requests.exceptions import HTTPError
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast

from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants, QnnConstants
from QEfficient.utils.logging_utils import logger


Expand Down Expand Up @@ -394,3 +396,101 @@ def create_json(file_path: str, json_data: object):
json.dump(json_data, file, indent=4)
except Exception as e:
print(f"Failed to create JSON File {file_path}: {e}")


def create_and_dump_qconfigs(
qpc_path,
onnx_path,
huggingface_config,
pytorch_transforms,
onnx_transforms,
prefill_seq_len,
ctx_len,
batch_size,
full_batch_size,
num_devices,
num_cores,
mxfp6_matmul,
mxint8_kv_cache,
num_speculative_tokens,
enable_qnn,
qnn_config,
):
"""
This Method creates a JSON file which contains all the configs for a model.
Such as huggingface configs, QEff transforms, QAIC sdk version, QNN sdk, compilation dir, qpc dir and
many other compilation options.
"""
qconfig_file_path = os.path.join(os.path.dirname(qpc_path), "qconfig.json")
onnx_path = str(onnx_path)
specializations_file_path = str(os.path.join(os.path.dirname(qpc_path), "specializations.json"))
compile_dir = str(os.path.dirname(qpc_path))
qnn_config_path = (
(qnn_config if qnn_config is not None else "QEfficient/compile/qnn_config.json") if enable_qnn else None
)

# Extract QAIC SDK Apps Version from SDK XML file
try:
tree = ET.parse(Constants.SDK_APPS_XML)
root = tree.getroot()
qaic_version = root.find(".//base_version").text
except Exception as e:
logger.warning(f"Failed to open XML File {Constants.SDK_APPS_XML}: {e}")
qaic_version = None

# Extract QNN SDK details from YAML file if the environment variable is set
qnn_sdk_details = None
qnn_sdk_path = os.getenv(QnnConstants.QNN_SDK_PATH_ENV_VAR_NAME)
if qnn_sdk_path:
qnn_sdk_yaml_path = os.path.join(qnn_sdk_path, QnnConstants.QNN_SDK_YAML)
try:
with open(qnn_sdk_yaml_path, "r") as file:
qnn_sdk_details = yaml.safe_load(file)
except Exception as e:
logger.warning(f"Failed to open YAML File {qnn_sdk_yaml_path}: {e}")
qnn_sdk_details = None

# Ensure all objects in the configs dictionary are JSON serializable
def make_serializable(obj):
if isinstance(obj, (int, float, str, bool, type(None))):
return obj
elif isinstance(obj, (list, tuple)):
return [make_serializable(item) for item in obj]
elif isinstance(obj, dict):
return {key: make_serializable(value) for key, value in obj.items()}
else:
return str(obj)

qconfigs = {
"huggingface_config": make_serializable(huggingface_config),
"qpc_config": {
"QEff_config": {
"pytorch_transforms": make_serializable(pytorch_transforms),
"onnx_transforms": make_serializable(onnx_transforms),
"onnx_path": onnx_path,
},
"compilation_config": {
"apps_sdk_version": qaic_version,
"compile_dir": compile_dir,
"specializtions_file_path": specializations_file_path,
"prefill_seq_len": prefill_seq_len,
"ctx_len": ctx_len,
"batch_size": batch_size,
"full_batch_size": full_batch_size,
"num_devices": num_devices,
"num_cores": num_cores,
"mxfp6_matmul": mxfp6_matmul,
"mxint8_kv_cache": mxint8_kv_cache,
"num_speculative_tokens": num_speculative_tokens,
},
"qnn_config": {
"enable_qnn": enable_qnn,
"qnn_config_path": qnn_config_path,
},
},
}

if qnn_sdk_details:
qconfigs["qpc_config"]["qnn_config"].update(qnn_sdk_details)

create_json(qconfig_file_path, qconfigs)
2 changes: 2 additions & 0 deletions QEfficient/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ class Constants:
MAX_QPC_LIMIT = 30
MAX_RETRIES = 5 # This constant will be used set the maximum number of retry attempts for downloading a model using huggingface_hub snapshot_download
NUM_SPECULATIVE_TOKENS = 2
SDK_APPS_XML = "/opt/qti-aic/versions/apps.xml" # This xml file is parsed to find out the SDK version.


@dataclass
class QnnConstants:
# QNN PATH to be read from environment variable.
QNN_SDK_PATH_ENV_VAR_NAME = "QNN_SDK_ROOT"
QNN_SDK_YAML = "sdk.yaml"

# QNN Compilation tools
QAIRT_CONVERTER = "{}/bin/{}/qairt-converter"
Expand Down
5 changes: 4 additions & 1 deletion tests/qnn_tests/test_causal_lm_models_qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#
# -----------------------------------------------------------------------------

import os

import numpy as np
import pytest
from transformers import AutoModelForCausalLM
Expand Down Expand Up @@ -98,14 +100,15 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
if not get_available_device_id():
pytest.skip("No available devices to run model on Cloud AI 100")

_ = qeff_model.compile(
qpc_path = qeff_model.compile(
prefill_seq_len=prompt_len,
ctx_len=ctx_len,
num_cores=14,
mxfp6=False,
aic_enable_depth_first=False,
enable_qnn=True,
)
assert os.path.isfile(os.path.join(os.path.dirname(qpc_path), "qconfig.json"))
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
gen_len = ort_tokens.shape[-1]
Expand Down
4 changes: 3 additions & 1 deletion tests/transformers/models/test_causal_lm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
# -----------------------------------------------------------------------------

import os
from typing import Optional

import numpy as np
Expand Down Expand Up @@ -122,14 +123,15 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
if not get_available_device_id():
pytest.skip("No available devices to run model on Cloud AI 100")

_ = qeff_model.compile(
qpc_path = qeff_model.compile(
prefill_seq_len=prompt_len,
ctx_len=ctx_len,
num_cores=14,
mxfp6=False,
aic_enable_depth_first=False,
num_speculative_tokens=num_speculative_tokens,
)
assert os.path.isfile(os.path.join(os.path.dirname(qpc_path), "qconfig.json"))
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
gen_len = ort_tokens.shape[-1]
Expand Down
Loading