From c96923732141bf3823f1b5acb666c407f07fa9c2 Mon Sep 17 00:00:00 2001 From: simonjub <78098752+simonjub@users.noreply.github.com> Date: Sun, 17 Sep 2023 15:19:32 -0400 Subject: [PATCH 1/3] [TRT EP] Fix ProviderOptions functions (#17567) ### Description When trying to use the TRT EP option trt_extra_plugin_lib_paths I noticed that my custom op library was not being loaded by the EP. After some digging I found that code was missing to update this option when UpdateTensorRTProviderOptions() is used to set it. At the same time I noticed that char arrays were allocated in that function and wondered where they are de-allocated. When I found it was done in ReleaseTensorRTProviderOptions(), I noticed that a few de-allocations were missing. ### Motivation and Context This PR fixes the problems described above. --- .../tensorrt/tensorrt_provider_factory.cc | 14 ++++++++++++++ onnxruntime/core/session/provider_bridge_ort.cc | 11 ++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 90aeeb64c9d24..18ec113734b97 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -202,6 +202,20 @@ struct Tensorrt_Provider : Provider { trt_options.trt_tactic_sources = (const char*)dest; } + str_size = internal_options.extra_plugin_lib_paths.size(); + if (str_size == 0) { + trt_options.trt_extra_plugin_lib_paths = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size); +#else + strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_options.trt_extra_plugin_lib_paths = (const char*)dest; + } + str_size = internal_options.profile_min_shapes.size(); if (str_size == 0) { trt_options.trt_profile_min_shapes = nullptr; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 8f0a5aeaa3975..bf7a3bbd9d380 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1930,9 +1930,14 @@ ORT_API_STATUS_IMPL(OrtApis::GetTensorRTProviderOptionsByName, ORT_API(void, OrtApis::ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensorRTProviderOptionsV2* ptr) { #ifdef USE_TENSORRT if (ptr != nullptr) { - delete ptr->trt_int8_calibration_table_name; - delete ptr->trt_engine_cache_path; - delete ptr->trt_engine_decryption_lib_path; + delete[] ptr->trt_int8_calibration_table_name; + delete[] ptr->trt_engine_cache_path; + delete[] ptr->trt_engine_decryption_lib_path; + delete[] ptr->trt_tactic_sources; + delete[] ptr->trt_extra_plugin_lib_paths; + delete[] ptr->trt_profile_min_shapes; + delete[] ptr->trt_profile_max_shapes; + delete[] ptr->trt_profile_opt_shapes; } std::unique_ptr p(ptr); From af14ae8050aa77d40031e7794fb2bf38bf30acef Mon Sep 17 00:00:00 2001 From: PeixuanZuo <94887879+PeixuanZuo@users.noreply.github.com> Date: Mon, 18 Sep 2023 13:34:39 +0800 Subject: [PATCH 2/3] [ROCm] Update whisper benchmark script (#17391) - update whisper benchmark for ROCm EP. --- .../tools/transformers/benchmark_helper.py | 172 ++++++++++++------ .../transformers/models/whisper/benchmark.py | 60 ++++-- .../models/whisper/benchmark_all.py | 57 +++--- 3 files changed, 198 insertions(+), 91 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index f4d3f2fa1c317..67d3c95922a87 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -8,7 +8,10 @@ import logging import os import random +import sys +import time import timeit +from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor from datetime import datetime from enum import Enum @@ -439,68 +442,127 @@ def get_gpu_info() -> Optional[List[Dict[str, Any]]]: return None -def measure_memory(is_gpu, func): - class MemoryMonitor: - def __init__(self, keep_measuring=True): - self.keep_measuring = keep_measuring +class MemoryMonitor(ABC): + def __init__(self, keep_measuring=True): + self.keep_measuring = keep_measuring - def measure_cpu_usage(self): - import psutil + def measure_cpu_usage(self): + import psutil - max_usage = 0 + max_usage = 0 + while True: + max_usage = max(max_usage, psutil.Process(os.getpid()).memory_info().rss / 1024**2) + sleep(0.005) # 5ms + if not self.keep_measuring: + break + return max_usage + + @abstractmethod + def measure_gpu_usage(self) -> Optional[List[Dict[str, Any]]]: + raise NotImplementedError() + + +class CudaMemoryMonitor(MemoryMonitor): + def __init__(self, keep_measuring=True): + super().__init__(keep_measuring) + + def measure_gpu_usage(self) -> Optional[List[Dict[str, Any]]]: + from py3nvml.py3nvml import ( + NVMLError, + nvmlDeviceGetCount, + nvmlDeviceGetHandleByIndex, + nvmlDeviceGetMemoryInfo, + nvmlDeviceGetName, + nvmlInit, + nvmlShutdown, + ) + + max_gpu_usage = [] + gpu_name = [] + try: + nvmlInit() + device_count = nvmlDeviceGetCount() + if not isinstance(device_count, int): + logger.error(f"nvmlDeviceGetCount result is not integer: {device_count}") + return None + + max_gpu_usage = [0 for i in range(device_count)] + gpu_name = [nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)) for i in range(device_count)] while True: - max_usage = max(max_usage, psutil.Process(os.getpid()).memory_info().rss / 1024**2) + for i in range(device_count): + info = nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(i)) + if isinstance(info, str): + logger.error(f"nvmlDeviceGetMemoryInfo returns str: {info}") + return None + max_gpu_usage[i] = max(max_gpu_usage[i], info.used / 1024**2) sleep(0.005) # 5ms if not self.keep_measuring: break - return max_usage - - def measure_gpu_usage(self) -> Optional[List[Dict[str, Any]]]: - from py3nvml.py3nvml import ( - NVMLError, - nvmlDeviceGetCount, - nvmlDeviceGetHandleByIndex, - nvmlDeviceGetMemoryInfo, - nvmlDeviceGetName, - nvmlInit, - nvmlShutdown, - ) + nvmlShutdown() + return [ + { + "device_id": i, + "name": gpu_name[i], + "max_used_MB": max_gpu_usage[i], + } + for i in range(device_count) + ] + except NVMLError as error: + logger.error("Error fetching GPU information using nvml: %s", error) + return None - max_gpu_usage = [] - gpu_name = [] - try: - nvmlInit() - device_count = nvmlDeviceGetCount() - if not isinstance(device_count, int): - logger.error(f"nvmlDeviceGetCount result is not integer: {device_count}") - return None - - max_gpu_usage = [0 for i in range(device_count)] - gpu_name = [nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)) for i in range(device_count)] - while True: - for i in range(device_count): - info = nvmlDeviceGetMemoryInfo(nvmlDeviceGetHandleByIndex(i)) - if isinstance(info, str): - logger.error(f"nvmlDeviceGetMemoryInfo returns str: {info}") - return None - max_gpu_usage[i] = max(max_gpu_usage[i], info.used / 1024**2) - sleep(0.005) # 5ms - if not self.keep_measuring: - break - nvmlShutdown() - return [ - { - "device_id": i, - "name": gpu_name[i], - "max_used_MB": max_gpu_usage[i], - } - for i in range(device_count) - ] - except NVMLError as error: - logger.error("Error fetching GPU information using nvml: %s", error) - return None - monitor = MemoryMonitor(False) +class RocmMemoryMonitor(MemoryMonitor): + def __init__(self, keep_measuring=True): + super().__init__(keep_measuring) + rocm_smi_path = "/opt/rocm/libexec/rocm_smi" + if os.path.exists(rocm_smi_path): + if rocm_smi_path not in sys.path: + sys.path.append(rocm_smi_path) + try: + import rocm_smi + + self.rocm_smi = rocm_smi + self.rocm_smi.initializeRsmi() + except ImportError: + self.rocm_smi = None + + def get_used_memory(self, dev): + if self.rocm_smi is None: + return -1 + return self.rocm_smi.getMemInfo(dev, "VRAM")[0] / 1024 / 1024 + + def measure_gpu_usage(self): + if self.rocm_smi is None: + return None + + device_count = len(self.rocm_smi.listDevices()) if self.rocm_smi is not None else 0 + max_gpu_usage = [0 for i in range(device_count)] + gpu_name = [f"GPU{i}" for i in range(device_count)] + while True: + for i in range(device_count): + max_gpu_usage[i] = max(max_gpu_usage[i], self.get_used_memory(i)) + time.sleep(0.005) # 2ms + if not self.keep_measuring: + break + return [ + { + "device_id": i, + "name": gpu_name[i], + "max_used_MB": max_gpu_usage[i], + } + for i in range(device_count) + ] + + +def measure_memory(is_gpu, func, monitor_type="cuda"): + memory_monitor_type = None + if monitor_type == "rocm": + memory_monitor_type = RocmMemoryMonitor + else: + memory_monitor_type = CudaMemoryMonitor + + monitor = memory_monitor_type(False) if is_gpu: memory_before_test = monitor.measure_gpu_usage() @@ -508,7 +570,7 @@ def measure_gpu_usage(self) -> Optional[List[Dict[str, Any]]]: return None with ThreadPoolExecutor() as executor: - monitor = MemoryMonitor() + monitor = memory_monitor_type() mem_thread = executor.submit(monitor.measure_gpu_usage) try: fn_thread = executor.submit(func) diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py index 07995f0a38e26..283528bea7465 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/benchmark.py +++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark.py @@ -11,7 +11,7 @@ import psutil import torch import whisper -from benchmark_helper import setup_logger +from benchmark_helper import measure_memory, setup_logger from onnxruntime_extensions import get_library_path from optimum.onnxruntime import ORTModelForSpeechSeq2Seq from torch.profiler import ProfilerActivity, profile, record_function @@ -19,7 +19,6 @@ from transformers import AutoModelForSpeechSeq2Seq, WhisperConfig, WhisperProcessor import onnxruntime as ort -from onnxruntime.transformers.benchmark_helper import measure_memory logger = logging.getLogger(__name__) @@ -123,6 +122,9 @@ def get_model(args: argparse.Namespace): if args.verbose: sess_options.log_verbosity_level = 1 sess_options.log_severity_level = 1 + if args.tune: + ort.set_default_logger_severity(0) + ort.set_default_logger_verbosity(0) else: raise Exception(f"Cannot recognize {args.benchmark_type}") @@ -159,6 +161,9 @@ def get_model(args: argparse.Namespace): def time_fn(args, fn, inputs): + warmup_inputs = inputs[0] if type(inputs) is tuple else inputs + benchmark_inputs = inputs[1] if type(inputs) is tuple else inputs + # Warm up warmup_range = ( range(args.warmup_runs) @@ -167,11 +172,11 @@ def time_fn(args, fn, inputs): ) if args.verbose: - outputs = fn(inputs) + outputs = fn(warmup_inputs) logger.info(outputs) for _ in warmup_range: - fn(inputs) + fn(warmup_inputs) # Benchmark if args.device != "cpu": @@ -184,7 +189,7 @@ def time_fn(args, fn, inputs): else trange(args.num_runs, file=sys.stdout, desc="Benchmark") ) for _ in bench_range: - fn(inputs) + fn(benchmark_inputs) if args.device != "cpu": torch.cuda.synchronize() @@ -244,7 +249,7 @@ def measure_fn(args, fn, inputs): # Measure memory usage gc.collect() torch.cuda.empty_cache() - measure_memory(is_gpu=(args.device != "cpu"), func=lambda: fn(inputs)) + measure_memory(is_gpu=(args.device != "cpu"), func=lambda: fn(inputs), monitor_type=args.monitor_type) # Flush output so memory usage is printed sys.stdout.flush() @@ -255,7 +260,7 @@ def run_hf_inference(args, inputs, model): def get_pred_ids(inputs): # Inference pass with predicted token ids generation predicted_ids = model.generate(**inputs) - return predicted_ids, [""] + return predicted_ids def gen_and_dec(inputs): # Inference pass with generation and decoding @@ -315,7 +320,7 @@ def gen_and_dec(inputs): def run_ort_inference(args, inputs, model): - def prepare_ort_inputs(inputs): + def prepare_ort_inputs(inputs, warmup=False): # Check that all model inputs will be provided model_inputs = set(map(lambda model_input: model_input.name, model.get_inputs())) user_inputs = set(inputs.keys()) @@ -324,6 +329,9 @@ def prepare_ort_inputs(inputs): logger.error(f"The following model inputs are missing: {missing_inputs}") raise Exception("There are missing inputs to the model. Please add them and try again.") + if warmup and args.tune: + inputs["min_length"] = inputs["max_length"] + # Remove unnecessary inputs from model inputs unnecessary_inputs = user_inputs - model_inputs if len(unnecessary_inputs): @@ -352,6 +360,13 @@ def without_io_binding(inputs): outputs = model.run(None, inputs) return outputs + def handle_output(output): + if args.eos_token_id in output: + first_end = np.where(output == args.eos_token_id)[0][0] + return output[: first_end + 1] + + return output + generate_fn = with_io_binding if args.device != "cpu" else without_io_binding ort_inputs = prepare_ort_inputs(inputs) @@ -367,7 +382,12 @@ def without_io_binding(inputs): # ORT evaluation logger.info("\nEvaluating ONNX Runtime...") - time_fn(args, generate_fn, ort_inputs) + ort_evaluate_inputs = ort_inputs + if args.tune: + ort_warmup_inputs = prepare_ort_inputs(inputs, warmup=True) + ort_evaluate_inputs = (ort_warmup_inputs, ort_inputs) + + time_fn(args, generate_fn, ort_evaluate_inputs) ort_outputs = generate_fn(ort_inputs) if args.device != "cpu": ort_outputs = ort_outputs.copy_outputs_to_cpu() @@ -378,7 +398,10 @@ def without_io_binding(inputs): logger.info(f"Transcription: {ort_outputs[0][0]}") else: # convert_to_onnx model produces generated ids - logger.info(f"Generated token length: {len(ort_outputs[0][0])} tokens") + actual_output = handle_output(ort_outputs[0][0]) + logger.info(f"Generated token length: {len(actual_output)} tokens") + transcription = args.processor.batch_decode(ort_outputs[0], skip_special_tokens=True)[0] + logger.info(f"Transcription: {transcription}") measure_fn(args, generate_fn, ort_inputs) @@ -483,6 +506,12 @@ def parse_args(): parser.add_argument("--pt-num-rows", type=int, default=1000, help="Number of rows for PyTorch profiler to display") parser.add_argument("--verbose", default=False, action="store_true") parser.add_argument("--log-folder", type=str, default=os.path.join("."), help="Folder to cache log files") + parser.add_argument( + "--tune", + default=False, + action="store_true", + help="Only used by ROCm EP, enable TunableOp tuning to select fastest kernel", + ) args = parser.parse_args() @@ -490,13 +519,21 @@ def parse_args(): np.random.seed(args.seed) torch.manual_seed(args.seed) + args.monitor_type = args.device # Set runtime properties if "ort" in args.benchmark_type: args.execution_provider = f"{args.device.upper()}ExecutionProvider" if args.execution_provider == "CUDAExecutionProvider": args.execution_provider = (args.execution_provider, {"device_id": args.device_id}) elif args.execution_provider == "ROCMExecutionProvider": - args.execution_provider = (args.execution_provider, {"device_id": args.device_id}) + args.execution_provider = ( + args.execution_provider, + { + "device_id": args.device_id, + "tunable_op_enable": 1, + "tunable_op_tuning_enable": 1 if args.tune else 0, + }, + ) args.device = "cuda" # Check that model paths have been specified for any benchmarking with ORT @@ -527,6 +564,7 @@ def main(): setattr(args, "target_device", target_device) # noqa: B010 setattr(args, "use_fp16", use_fp16) # noqa: B010 setattr(args, "has_audio_stream", False) # noqa: B010 + setattr(args, "eos_token_id", config.eos_token_id) # noqa: B010 logger.info(f"Forced decoder prompt ids: {args.decoder_input_ids}") diff --git a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py index f12723f1af2df..08d7befec3cfd 100644 --- a/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py +++ b/onnxruntime/python/tools/transformers/models/whisper/benchmark_all.py @@ -109,6 +109,8 @@ def get_args(): help="Number of mins to attempt the benchmark before moving on", ) + parser.add_argument("--tune", default=False, action="store_true") + args = parser.parse_args() setattr(args, "model_size", args.model_name.split("/")[-1].replace(".", "-")) # noqa: B010 @@ -292,6 +294,7 @@ def main(): ort_decoder_input_ids_cmd = ( ["--decoder-input-ids", str(ort_forced_decoder_ids)] if args.language and args.task else [] ) + ort_tune_cmd = ["--tune"] if args.tune else [] all_results = [] for audio_file in os.listdir(args.audio_path): @@ -395,31 +398,35 @@ def main(): # Benchmark ONNX Runtime if args.ort_model_path: - benchmark_cmd = [ # noqa: RUF005 - "python3", - "-m", - "models.whisper.benchmark", - "--audio-path", - audio_path, - "--benchmark-type", - "ort", - "--ort-model-path", - args.ort_model_path, - "--model-name", - args.model_name, - "--precision", - args.precision, - "--device", - args.device, - "--device-id", - str(args.device_id), - "--warmup-runs", - str(args.warmup_runs), - "--num-runs", - str(args.num_runs), - "--log-folder", - args.log_folder, - ] + ort_decoder_input_ids_cmd + benchmark_cmd = ( + [ # noqa: RUF005 + "python3", + "-m", + "models.whisper.benchmark", + "--audio-path", + audio_path, + "--benchmark-type", + "ort", + "--ort-model-path", + args.ort_model_path, + "--model-name", + args.model_name, + "--precision", + args.precision, + "--device", + args.device, + "--device-id", + str(args.device_id), + "--warmup-runs", + str(args.warmup_runs), + "--num-runs", + str(args.num_runs), + "--log-folder", + args.log_folder, + ] + + ort_decoder_input_ids_cmd + + ort_tune_cmd + ) logger.info("Benchmark ONNX Runtime") results = benchmark(args, benchmark_cmd, "onnxruntime", audio_file, duration) all_results.extend(results) From dea425e7c140a7216727421c434a1c5a3fb1f54d Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Mon, 18 Sep 2023 09:43:34 -0700 Subject: [PATCH 3/3] [QNN/CPU EP] Add 16-bit Quantize/Dequantize contrib ops (#17015) ### Description - Adds 16-bit integer support to: - Quantization kernel implementations: Intel, Neon, and Power intrinsics - DequantizeLinear and QuantizeLinear contrib ops - QNN EP Quantize and Dequantize operators - Python quantization scripts - Disables QDQ fusions for most 16-bit QDQ node groups (need to add 16-bit support to QLinear* ops) - Retains support for dropping QDQ nodes from Split, Gather, Reshape, Transpose, Squeeze, and Unsqueeze node groups. Sample python code to generate QDQ model with 16-bit activations and 8-bit weights: ```python quantize_static( input_model_path, output_model_path, data_reader, quant_format=args.quant_format, per_channel=args.per_channel, activation_type=QuantType.QUInt16, weight_type=QuantType.QUInt8, extra_options={"DedicatedQDQPair": True, "ForceQuantizeNoInputCheck": True, "UseQDQContribOps": True}, ) ``` Note that enabling the `UseQDQContribOps` extra option is not strictly necessary. If the 16bit types are used without enabling `UseQDQContribOps`, the QDQ ops domains are overridden to 'com.microsoft', and a warning is printed to stdout. ### Automated Tests MLAS/CPU EP: - [x] 16-bit QuantizeLinear computation - [x] 16-bit DequantizeLinear computation Optimizer: - [x] Transpose QDQ fusion - [x] Gather QDQ fusion - [x] Reshape QDQ fusion - [x] Squeeze QDQ fusion - [x] Unsqueeze QDQ fusion - [x] Split drop QDQ - [x] DoubleQDQPairRemover - [x] Transpose optimization - [x] EnsureUniqueDQForNodeUnit - [x] Common subexpression elimination (DQ not removed) - [x] Constant folding QNN EP: - [x] Conv 16-bit activations, 8-bit weights - [x] MatMul 16-bit activations, 8-bit weights - [x] Unary 16-bit QDQ ops - [x] Binary 16-bit QDQ ops Quantization tool: - [x] Test creation of 16-bit QDQ model ### Motivation and Context Support mixed precision (8bit weights, 16bit activations) models. --------- Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com> --- docs/ContribOperators.md | 13 +- docs/OperatorKernels.md | 4 +- .../contrib_ops/cpu/cpu_contrib_kernels.cc | 8 + .../cpu/quantization/quantize_ops.cc | 56 -- .../graph/contrib_ops/quantization_defs.cc | 16 +- onnxruntime/core/mlas/lib/mlasi.h | 24 + onnxruntime/core/mlas/lib/platform.cpp | 4 + .../core/mlas/lib/power/QuantizePower.cpp | 39 +- onnxruntime/core/mlas/lib/quantize.cpp | 321 ++++++++- .../optimizer/double_qdq_pairs_remover.cc | 283 ++++---- .../core/optimizer/double_qdq_pairs_remover.h | 32 +- .../qdq_selector_action_transformer.cc | 37 +- .../selectors_actions/qdq_selectors.cc | 101 ++- .../selectors_actions/qdq_selectors.h | 97 ++- .../onnx_transpose_optimization.cc | 2 +- .../cpu/quantization/quantize_linear.cc | 133 +++- .../cpu/quantization/quantize_linear.h | 45 -- .../builder/opbuilder/simple_op_builder.cc | 124 +++- .../qnn/builder/qnn_model_wrapper.cc | 10 + .../tools/quantization/onnx_quantizer.py | 2 +- .../tools/quantization/qdq_quantizer.py | 20 + .../python/tools/quantization/quant_utils.py | 63 +- .../python/tools/quantization/quantize.py | 10 + .../test/contrib_ops/quantize_ops_test.cc | 81 ++- .../mlas/unittest/test_quantizelinear.cpp | 40 +- .../ensure_unique_dq_for_node_unit_test.cc | 21 +- .../test/optimizer/graph_transform_test.cc | 282 ++++---- .../optimizer/graph_transform_test_builder.h | 12 + .../test/optimizer/qdq_transformer_test.cc | 433 ++++++++---- .../optimizer/transpose_optimizer_test.cc | 442 ++++++------ onnxruntime/test/providers/qnn/conv_test.cc | 653 ++++++++++++------ .../test/providers/qnn/matmul_test.cpp | 102 ++- .../test/providers/qnn/qnn_basic_test.cc | 2 +- .../test/providers/qnn/qnn_test_utils.cc | 8 +- .../test/providers/qnn/qnn_test_utils.h | 18 +- .../test/providers/qnn/reduce_op_test.cc | 2 +- .../test/providers/qnn/simple_op_htp_test.cc | 306 +++++++- .../test/python/quantization/test_qdq.py | 47 +- .../transform/convert_qdq_ops_to_ms_domain.py | 155 ++++- ...olding_dequantizelinear.qdq16_contrib.onnx | Bin 0 -> 3938 bytes ..._node_unit.graph_output.qdq16_contrib.onnx | Bin 0 -> 1433 bytes ...t_folding_qdq_node_unit.qdq16_contrib.onnx | Bin 0 -> 2061 bytes ...consumer_dq_nodes.fixed.qdq16_contrib.onnx | Bin 0 -> 272066 bytes 43 files changed, 2837 insertions(+), 1211 deletions(-) delete mode 100644 onnxruntime/contrib_ops/cpu/quantization/quantize_ops.cc delete mode 100644 onnxruntime/core/providers/cpu/quantization/quantize_linear.h create mode 100644 onnxruntime/test/testdata/transform/fusion/constant_folding_dequantizelinear.qdq16_contrib.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx create mode 100644 onnxruntime/test/testdata/transform/fusion/constant_folding_qdq_node_unit.qdq16_contrib.onnx create mode 100644 onnxruntime/test/testdata/transform/qdq_with_multi_consumer_dq_nodes.fixed.qdq16_contrib.onnx diff --git a/docs/ContribOperators.md b/docs/ContribOperators.md index 5bd1a89c0dea1..95dc8c3cde46c 100644 --- a/docs/ContribOperators.md +++ b/docs/ContribOperators.md @@ -1351,8 +1351,8 @@ This version of the operator has been available since version 1 of the 'com.micr #### Type Constraints
-
T1 : tensor(int8), tensor(uint8), tensor(int32)
-
Constrain 'x' and 'x_zero_point' to 8-bit integer tensors or 32-bit signed integer tensors.
+
T1 : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(int32)
+
Constrain 'x' and 'x_zero_point' to 8-bit integer tensors, 16-bit integer tensors, or 32-bit signed integer tensors.
T2 : tensor(float16), tensor(float)
Constrain 'y', 'x_scale' to float tensors.
@@ -4194,8 +4194,9 @@ This version of the operator has been available since version 1 of the 'com.micr ### **com.microsoft.QuantizeLinear** The linear quantization operator. It consumes a full precision data, a scale, a zero point to compute the low precision / quantized tensor. - The quantization formula is y = saturate ((x / y_scale) + y_zero_point).For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8. - For (x / y_scale), it's rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding for details. + The quantization formula is y = saturate ((x / y_scale) + y_zero_point). For saturation, it saturates to [0, 255] if it's uint8, [-128, 127] if it's int8, + [0, 65,535] if it's uint16, and [-32,768, 32,767] if it's int16. For (x / y_scale), it's rounding to nearest ties to even. + Refer to https://en.wikipedia.org/wiki/Rounding for details. Scale and zero point must have same shape. They must be either scalar (per tensor) or 1-D tensor (per 'axis'). #### Version @@ -4232,8 +4233,8 @@ This version of the operator has been available since version 1 of the 'com.micr
T1 : tensor(float16), tensor(float)
Constrain 'x', 'y_scale' to float tensors.
-
T2 : tensor(int8), tensor(uint8)
-
Constrain 'y_zero_point' and 'y' to 8-bit integer tensors.
+
T2 : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16)
+
Constrain 'y_zero_point' and 'y' to 8-bit and 16-bit integer tensors.
diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index d46f3ed9bd262..33c187a28b62e 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -439,7 +439,7 @@ Do not modify directly.* |CDist|*in* A:**T**
*in* B:**T**
*out* C:**T**|1+|**T** = tensor(double), tensor(float)| |ConvTransposeWithDynamicPads|*in* X:**T**
*in* W:**T**
*in* Pads:**tensor(int64)**
*in* B:**T**
*out* Y:**T**|1+|**T** = tensor(float)| |CropAndResize|*in* X:**T1**
*in* rois:**T1**
*in* batch_indices:**T2**
*in* crop_size:**T2**
*out* Y:**T1**|1+|**T1** = tensor(float)
**T2** = tensor(int32)| -|DequantizeLinear|*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|1+|**T1** = tensor(int32), tensor(int8), tensor(uint8)
**T2** = tensor(float)| +|DequantizeLinear|*in* x:**T1**
*in* x_scale:**T2**
*in* x_zero_point:**T1**
*out* y:**T2**|1+|**T1** = tensor(int16), tensor(int32), tensor(int8), tensor(uint16), tensor(uint8)
**T2** = tensor(float)| |DynamicQuantizeLSTM|*in* X:**T**
*in* W:**T2**
*in* R:**T2**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*in* W_scale:**T**
*in* W_zero_point:**T2**
*in* R_scale:**T**
*in* R_zero_point:**T2**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|1+|**T** = tensor(float)
**T1** = tensor(int32)
**T2** = tensor(int8), tensor(uint8)| |DynamicQuantizeMatMul|*in* A:**T1**
*in* B:**T2**
*in* b_scale:**T1**
*in* b_zero_point:**T2**
*in* bias:**T1**
*out* Y:**T1**|1+|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| |EmbedLayerNormalization|*in* input_ids:**T1**
*in* segment_ids:**T1**
*in* word_embedding:**T**
*in* position_embedding:**T**
*in* segment_embedding:**T**
*in* gamma:**T**
*in* beta:**T**
*in* mask:**T1**
*in* position_ids:**T1**
*out* output:**T**
*out* mask_index:**T1**
*out* embedding_sum:**T**|1+|**T** = tensor(float)| @@ -472,7 +472,7 @@ Do not modify directly.* |QLinearSigmoid|*in* X:**T**
*in* X_scale:**tensor(float)**
*in* X_zero_point:**T**
*in* Y_scale:**tensor(float)**
*in* Y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| |QLinearSoftmax|*in* X:**T**
*in* X_scale:**tensor(float)**
*in* x_zero_point:**T**
*in* y_scale:**tensor(float)**
*in* y_zero_point:**T**
*out* Y:**T**|1+|**T** = tensor(int8), tensor(uint8)| |QLinearWhere|*in* condition:**B**
*in* X:**T**
*in* x_scale:**TF**
*in* x_zero_point:**T**
*in* Y:**T**
*in* y_scale:**TF**
*in* y_zero_point:**T**
*in* z_scale:**TF**
*in* z_zero_point:**T**
*out* Z:**T**|1+|**T** = tensor(int8), tensor(uint8)| -|QuantizeLinear|*in* x:**T1**
*in* y_scale:**T1**
*in* y_zero_point:**T2**
*out* y:**T2**|1+|**T1** = tensor(float)
**T2** = tensor(int8), tensor(uint8)| +|QuantizeLinear|*in* x:**T1**
*in* y_scale:**T1**
*in* y_zero_point:**T2**
*out* y:**T2**|1+|**T1** = tensor(float)
**T2** = tensor(int16), tensor(int8), tensor(uint16), tensor(uint8)| |QuickGelu|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| |Range|*in* start:**T**
*in* limit:**T**
*in* delta:**T**
*out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(int16), tensor(int32), tensor(int64)| |SampleOp|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float)| diff --git a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc index 660c8bd9e0624..0ec5088808656 100644 --- a/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc +++ b/onnxruntime/contrib_ops/cpu/cpu_contrib_kernels.cc @@ -56,9 +56,13 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLine class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, QLinearAveragePool); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, DequantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint16_t, DequantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int16_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int32_t, DequantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QuantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QuantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint16_t, QuantizeLinear); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int16_t, QuantizeLinear); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearLeakyRelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, int8_t, QLinearLeakyRelu); class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kMSDomain, 1, uint8_t, QLinearSigmoid); @@ -191,9 +195,13 @@ Status RegisterQuantizationKernels(KernelRegistry& kernel_registry) { BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/contrib_ops/cpu/quantization/quantize_ops.cc b/onnxruntime/contrib_ops/cpu/quantization/quantize_ops.cc deleted file mode 100644 index 28a304bfc7f0e..0000000000000 --- a/onnxruntime/contrib_ops/cpu/quantization/quantize_ops.cc +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "core/providers/cpu/quantization/quantize_linear.h" -#include "core/providers/common.h" - -namespace onnxruntime { -namespace contrib { - -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - DequantizeLinear, - 1, - uint8_t, - KernelDefBuilder() - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), - DequantizeLinear); - -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - DequantizeLinear, - 1, - int8_t, - KernelDefBuilder() - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), - DequantizeLinear); - -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - DequantizeLinear, - 1, - int32_t, - KernelDefBuilder() - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), - DequantizeLinear); - -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - QuantizeLinear, - 1, - uint8_t, - KernelDefBuilder() - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), - QuantizeLinear); - -ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( - QuantizeLinear, - 1, - int8_t, - KernelDefBuilder() - .TypeConstraint("T1", DataTypeImpl::GetTensorType()) - .TypeConstraint("T2", DataTypeImpl::GetTensorType()), - QuantizeLinear); - -} // namespace contrib -} // namespace onnxruntime diff --git a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc index aa2ad9f1ff6b1..4313fae767fe5 100644 --- a/onnxruntime/core/graph/contrib_ops/quantization_defs.cc +++ b/onnxruntime/core/graph/contrib_ops/quantization_defs.cc @@ -136,8 +136,9 @@ Performs element-wise binary {name} on 8 bit data types (with Numpy-style broadc static const char* QuantizeLinear_ver1_doc = R"DOC( The linear quantization operator. It consumes a full precision data, a scale, a zero point to compute the low precision / quantized tensor. -The quantization formula is y = saturate ((x / y_scale) + y_zero_point).For saturation, it saturates to [0, 255] if it's uint8, or [-128, 127] if it's int8. -For (x / y_scale), it's rounding to nearest ties to even. Refer to https://en.wikipedia.org/wiki/Rounding for details. +The quantization formula is y = saturate ((x / y_scale) + y_zero_point). For saturation, it saturates to [0, 255] if it's uint8, [-128, 127] if it's int8, +[0, 65,535] if it's uint16, and [-32,768, 32,767] if it's int16. For (x / y_scale), it's rounding to nearest ties to even. +Refer to https://en.wikipedia.org/wiki/Rounding for details. Scale and zero point must have same shape. They must be either scalar (per tensor) or 1-D tensor (per 'axis').)DOC"; ONNX_MS_OPERATOR_SET_SCHEMA( @@ -161,8 +162,8 @@ ONNX_MS_OPERATOR_SET_SCHEMA( "T2", OpSchema::Optional) .Output(0, "y", "N-D quantized output tensor. It has same shape as input 'x'.", "T2") .TypeConstraint("T1", {"tensor(float16)", "tensor(float)"}, "Constrain 'x', 'y_scale' to float tensors.") - .TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)"}, - "Constrain 'y_zero_point' and 'y' to 8-bit integer tensors.") + .TypeConstraint("T2", {"tensor(int8)", "tensor(uint8)", "tensor(int16)", "tensor(uint16)"}, + "Constrain 'y_zero_point' and 'y' to 8-bit and 16-bit integer tensors.") .SetDoc(QuantizeLinear_ver1_doc) .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) { if (ctx.getNumInputs() == 3 && ctx.getInputType(2) != nullptr) { @@ -202,9 +203,10 @@ ONNX_MS_OPERATOR_SET_SCHEMA(DequantizeLinear, 1, "T1", OpSchema::Optional) .Output(0, "y", "N-D full precision output tensor. It has same shape as input 'x'.", "T2") - .TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)", "tensor(int32)"}, - "Constrain 'x' and 'x_zero_point' to 8-bit integer tensors or 32-bit " - "signed integer tensors.") + .TypeConstraint("T1", {"tensor(int8)", "tensor(uint8)", "tensor(int16)", + "tensor(uint16)", "tensor(int32)"}, + "Constrain 'x' and 'x_zero_point' to 8-bit integer tensors, " + "16-bit integer tensors, or 32-bit signed integer tensors.") .TypeConstraint("T2", {"tensor(float16)", "tensor(float)"}, "Constrain 'y', 'x_scale' to float tensors.") .SetDoc(DequantizeLinear_ver1_doc) diff --git a/onnxruntime/core/mlas/lib/mlasi.h b/onnxruntime/core/mlas/lib/mlasi.h index f517be185b3fa..b6ac4a1ca1d6c 100644 --- a/onnxruntime/core/mlas/lib/mlasi.h +++ b/onnxruntime/core/mlas/lib/mlasi.h @@ -633,6 +633,24 @@ void int8_t ZeroPoint ); +typedef +void +(MLASCALL MLAS_QUANTIZE_LINEAR_U16_KERNEL)( + const float* Input, + uint16_t* Output, + size_t N, + float Scale, + uint16_t ZeroPoint); + +typedef +void +(MLASCALL MLAS_QUANTIZE_LINEAR_S16_KERNEL)( + const float* Input, + int16_t* Output, + size_t N, + float Scale, + int16_t ZeroPoint); + template struct MLAS_QUANT_KERNEL { @@ -749,6 +767,8 @@ extern "C" { MLAS_QLINEAR_BINARY_OP_U8_KERNEL MlasQLinearAddU8Kernel; MLAS_QUANTIZE_LINEAR_S8_KERNEL MlasQuantizeLinearS8Kernel; MLAS_QUANTIZE_LINEAR_U8_KERNEL MlasQuantizeLinearU8Kernel; + MLAS_QUANTIZE_LINEAR_S16_KERNEL MlasQuantizeLinearS16Kernel; + MLAS_QUANTIZE_LINEAR_U16_KERNEL MlasQuantizeLinearU16Kernel; #if defined(MLAS_TARGET_AMD64) MLAS_COMPUTE_UNARY_FLOAT_KERNEL MlasErfKernelFma3; MLAS_COMPUTE_UNARY_FLOAT_KERNEL MlasComputeExpF32KernelFma3; @@ -959,6 +979,8 @@ struct MLAS_PLATFORM { const MLAS_GEMM_QUANT_DISPATCH* GemmU8X8Dispatch; MLAS_QUANTIZE_LINEAR_S8_KERNEL* QuantizeLinearS8Kernel; MLAS_QUANTIZE_LINEAR_U8_KERNEL* QuantizeLinearU8Kernel; + MLAS_QUANTIZE_LINEAR_S16_KERNEL* QuantizeLinearS16Kernel; + MLAS_QUANTIZE_LINEAR_U16_KERNEL* QuantizeLinearU16Kernel; #endif #if defined(MLAS_TARGET_AMD64) MLAS_SGEMM_KERNEL_M1_ROUTINE* KernelM1Routine; @@ -986,6 +1008,8 @@ struct MLAS_PLATFORM { MLAS_REDUCE_MINIMUM_MAXIMUM_FLOAT_KERNEL* ReduceMinimumMaximumF32Kernel; MLAS_QUANTIZE_LINEAR_S8_KERNEL* QuantizeLinearS8Kernel; MLAS_QUANTIZE_LINEAR_U8_KERNEL* QuantizeLinearU8Kernel; + MLAS_QUANTIZE_LINEAR_S16_KERNEL* QuantizeLinearS16Kernel; + MLAS_QUANTIZE_LINEAR_U16_KERNEL* QuantizeLinearU16Kernel; uint32_t NchwcBlockSize; uint32_t PreferredBufferAlignment; int32_t MaximumThreadCount; diff --git a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp index 86b7450a7c4e5..7e2b117d6f249 100644 --- a/onnxruntime/core/mlas/lib/platform.cpp +++ b/onnxruntime/core/mlas/lib/platform.cpp @@ -230,6 +230,8 @@ Return Value: this->QLinearAddU8Kernel = MlasQLinearAddU8Kernel; this->QuantizeLinearS8Kernel = MlasQuantizeLinearS8Kernel; this->QuantizeLinearU8Kernel = MlasQuantizeLinearU8Kernel; + this->QuantizeLinearS16Kernel = MlasQuantizeLinearS16Kernel; + this->QuantizeLinearU16Kernel = MlasQuantizeLinearU16Kernel; this->NchwcBlockSize = 8; this->PreferredBufferAlignment = MLAS_DEFAULT_PREFERRED_BUFFER_ALIGNMENT; @@ -475,6 +477,8 @@ Return Value: this->GemmDoubleKernel = MlasDgemmKernel; this->QuantizeLinearS8Kernel = MlasQuantizeLinearS8Kernel; this->QuantizeLinearU8Kernel = MlasQuantizeLinearU8Kernel; + this->QuantizeLinearS16Kernel = MlasQuantizeLinearS16Kernel; + this->QuantizeLinearU16Kernel = MlasQuantizeLinearU16Kernel; #if defined(__linux__) unsigned long hwcap2 = getauxval(AT_HWCAP2); diff --git a/onnxruntime/core/mlas/lib/power/QuantizePower.cpp b/onnxruntime/core/mlas/lib/power/QuantizePower.cpp index 0d38288c6d42c..830a3a6a492db 100644 --- a/onnxruntime/core/mlas/lib/power/QuantizePower.cpp +++ b/onnxruntime/core/mlas/lib/power/QuantizePower.cpp @@ -1,3 +1,4 @@ +#include #include "mlasi.h" #include @@ -82,8 +83,15 @@ Return Value: auto ShortVector0 = vec_pack(IntegerVector0, IntegerVector1); auto ShortVector1 = vec_pack(IntegerVector2, IntegerVector3); - auto CharVector = vec_pack(ShortVector0, ShortVector1); - vec_xst(CharVector, 0, (int8_t *) Output); + + if constexpr (std::is_same_v || std::is_same_v) { + auto CharVector = vec_pack(ShortVector0, ShortVector1); + vec_xst(CharVector, 0, Output); + } else { + static_assert(std::is_same_v || std::is_same_v); + vec_xst(ShortVector0, 0, Output); + vec_xst(ShortVector1, 0, &Output[8]); + } Output += 16; Input += 16; @@ -124,3 +132,30 @@ MlasQuantizeLinearS8Kernel( { MlasQuantizeLinearKernel(Input, Output, N, Scale, ZeroPoint); } + +void +MLASCALL +MlasQuantizeLinearU16Kernel( + const float* Input, + uint16_t* Output, + size_t N, + float Scale, + uint16_t ZeroPoint + ) +{ + MlasQuantizeLinearKernel(Input, Output, N, Scale, ZeroPoint); +} + +void +MLASCALL +MlasQuantizeLinearS16Kernel( + const float* Input, + int16_t* Output, + size_t N, + float Scale, + int16_t ZeroPoint + ) +{ + MlasQuantizeLinearKernel(Input, Output, N, Scale, ZeroPoint); +} + diff --git a/onnxruntime/core/mlas/lib/quantize.cpp b/onnxruntime/core/mlas/lib/quantize.cpp index c6e8af38c0020..133ad79594c55 100644 --- a/onnxruntime/core/mlas/lib/quantize.cpp +++ b/onnxruntime/core/mlas/lib/quantize.cpp @@ -21,6 +21,7 @@ Module Name: #include "mlasi.h" #if defined(MLAS_NEON64_INTRINSICS) || defined(MLAS_SSE2_INTRINSICS) +#include // // QuantizeLinear implementation using NEON or SSE2 intrinsics. @@ -79,6 +80,20 @@ MlasQuantizeLinearPackBytes( MLAS_INT32X4 IntegerVector ); +template +void +MlasQuantizeLinearStore4PackedValues( + MLAS_INT32X4 IntegerVector, + OutputType* Output + ); + +template +void +MlasQuantizeLinearStoreSingleValue( + MLAS_INT32X4 IntegerVector, + OutputType* Output + ); + #if defined(MLAS_NEON64_INTRINSICS) template @@ -100,6 +115,104 @@ MlasQuantizeLinearPackBytes( return vreinterpretq_s32_u8(ByteVector); } +template<> +MLAS_INT32X4 +MlasQuantizeLinearPackBytes( + MLAS_INT32X4 IntegerVector + ) +{ + // + // Swizzle the least significant u16 from each int32_t element to the + // bottom eight bytes of the vector register. + // + + uint16x8_t WordVector = vreinterpretq_u16_s32(IntegerVector); + WordVector = vuzp1q_u16(WordVector, WordVector); + return vreinterpretq_s32_u16(WordVector); +} + +template<> +MLAS_INT32X4 +MlasQuantizeLinearPackBytes( + MLAS_INT32X4 IntegerVector + ) +{ + // + // Swizzle the least significant u16 from each int32_t element to the + // bottom eight bytes of the vector register. + // + + int16x8_t WordVector = vreinterpretq_s16_s32(IntegerVector); + WordVector = vuzp1q_s16(WordVector, WordVector); + return vreinterpretq_s32_s16(WordVector); +} + +template +MLAS_FORCEINLINE +void +MlasQuantizeLinearStore4PackedValues( + MLAS_INT32X4 IntegerVector, + OutputType* Output + ) +{ + // Copies the lower 4 packed elements of the vector into memory (Output). + + if constexpr (std::is_same_v || std::is_same_v) { + vst1q_lane_s32(reinterpret_cast(Output), IntegerVector, 0); + } else { + static_assert(std::is_same_v || std::is_same_v); + vst1q_lane_s64(reinterpret_cast(Output), vreinterpretq_s64_s32(IntegerVector), 0); + } +} + +template <> +MLAS_FORCEINLINE +void +MlasQuantizeLinearStoreSingleValue( + MLAS_INT32X4 IntegerVector, + uint8_t* Output + ) +{ + // Copies the lower 8-bit element of the vector into memory (Output). + vst1q_lane_u8(Output, vreinterpretq_u8_s32(IntegerVector), 0); +} + +template <> +MLAS_FORCEINLINE +void +MlasQuantizeLinearStoreSingleValue( + MLAS_INT32X4 IntegerVector, + int8_t* Output + ) +{ + // Copies the lower 8-bit element of the vector into memory (Output). + vst1q_lane_s8(Output, vreinterpretq_s8_s32(IntegerVector), 0); +} + +template <> +MLAS_FORCEINLINE +void +MlasQuantizeLinearStoreSingleValue( + MLAS_INT32X4 IntegerVector, + uint16_t* Output + ) +{ + // Copies the lower 16-bit element of the vector into memory (Output). + vst1q_lane_u16(Output, vreinterpretq_u16_s32(IntegerVector), 0); +} + +template <> +MLAS_FORCEINLINE +void +MlasQuantizeLinearStoreSingleValue( + MLAS_INT32X4 IntegerVector, + int16_t* Output + ) +{ + // Copies the lower 16-bit element of the vector into memory (Output). + vst1q_lane_s16(Output, vreinterpretq_s16_s32(IntegerVector), 0); +} + #else template<> @@ -128,6 +241,86 @@ MlasQuantizeLinearPackBytes( return IntegerVector; } +template<> +MLAS_FORCEINLINE +MLAS_INT32X4 +MlasQuantizeLinearPackBytes( + MLAS_INT32X4 IntegerVector + ) +{ +#if defined(MLAS_SSE41_INTRINSICS) + IntegerVector = _mm_packus_epi32(IntegerVector, IntegerVector); // 16-bit values packed in lower 8 bytes. +#else + // Cannot use _mm_packus_epi32 because that was not available until SSE4.1. + // Instead, emulate by sign-extending the first 16-bits of each packed 32-bit element. + // Afterwards, can use _mm_packs_epi32, which is available on SSE2. + // See: https://stackoverflow.com/a/11028244 + + IntegerVector = _mm_slli_epi32(IntegerVector, 16); + IntegerVector = _mm_srai_epi32(IntegerVector, 16); // Sign-extend: undo left shift with right arithmetic shift + IntegerVector = _mm_packs_epi32(IntegerVector, IntegerVector); // 16-bit values packed in lower 8 bytes. +#endif // defined(MLAS_SSE41_INTRINSICS) + + return IntegerVector; +} + +template<> +MLAS_FORCEINLINE +MLAS_INT32X4 +MlasQuantizeLinearPackBytes( + MLAS_INT32X4 IntegerVector + ) +{ + IntegerVector = _mm_packs_epi32(IntegerVector, IntegerVector); // 16-bit values packed in lower 8 bytes. + + return IntegerVector; +} + +template +MLAS_FORCEINLINE +void +MlasQuantizeLinearStore4PackedValues( + MLAS_INT32X4 IntegerVector, + OutputType* Output + ) +{ + // Copies the lower 4 packed elements of the vector into memory (Output). + + if constexpr (std::is_same_v || std::is_same_v) { + *(reinterpret_cast(Output)) = _mm_cvtsi128_si32(IntegerVector); + } else { + static_assert(std::is_same_v || std::is_same_v); + +#if defined(MLAS_TARGET_IX86) + // x86 does not support _mm_cvtsi128_si64, so use _mm_maskmoveu_si128 instead. + constexpr uint32_t bytes_high_bit = 0x80808080; + const __m128i first_8_bytes_mask = _mm_set_epi32(0, 0, bytes_high_bit, bytes_high_bit); + _mm_maskmoveu_si128(IntegerVector, first_8_bytes_mask, reinterpret_cast(Output)); +#else + *(reinterpret_cast(Output)) = _mm_cvtsi128_si64(IntegerVector); +#endif // defined(MLAS_TARGET_IX86) + } +} + +template +MLAS_FORCEINLINE +void +MlasQuantizeLinearStoreSingleValue( + MLAS_INT32X4 IntegerVector, + OutputType* Output + ) +{ + static_assert(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v); + + // Copies the lower element of the vector into memory (Output). + // Expects that the 32-bit element in lane 0 is already within the valid numerical + // range of the OutputType. + *Output = static_cast(_mm_cvtsi128_si32(IntegerVector)); +} + #endif template @@ -180,12 +373,7 @@ Return Value: MinimumValueVector, MaximumValueVector, ZeroPointVector); IntegerVector = MlasQuantizeLinearPackBytes(IntegerVector); - -#if defined(MLAS_NEON64_INTRINSICS) - vst1q_lane_s32((int32_t*)Output, IntegerVector, 0); -#else - *((int32_t*)Output) = _mm_cvtsi128_si32(IntegerVector); -#endif + MlasQuantizeLinearStore4PackedValues(IntegerVector, Output); Input += 4; Output += 4; @@ -202,11 +390,7 @@ Return Value: auto IntegerVector = MlasQuantizeLinearVector(FloatVector, ScaleVector, MinimumValueVector, MaximumValueVector, ZeroPointVector); -#if defined(MLAS_NEON64_INTRINSICS) - vst1q_lane_u8((uint8_t*)Output + n, vreinterpretq_u8_s32(IntegerVector), 0); -#else - *((uint8_t*)Output + n) = (uint8_t)_mm_cvtsi128_si32(IntegerVector); -#endif + MlasQuantizeLinearStoreSingleValue(IntegerVector, &Output[n]); } } @@ -236,6 +420,32 @@ MlasQuantizeLinearU8Kernel( MlasQuantizeLinearKernel(Input, Output, N, Scale, ZeroPoint); } +void +MLASCALL +MlasQuantizeLinearU16Kernel( + const float* Input, + uint16_t* Output, + size_t N, + float Scale, + uint16_t ZeroPoint +) +{ + MlasQuantizeLinearKernel(Input, Output, N, Scale, ZeroPoint); +} + +void +MLASCALL +MlasQuantizeLinearS16Kernel( + const float* Input, + int16_t* Output, + size_t N, + float Scale, + int16_t ZeroPoint +) +{ + MlasQuantizeLinearKernel(Input, Output, N, Scale, ZeroPoint); +} + template<> void MLASCALL @@ -274,6 +484,44 @@ MlasQuantizeLinear( Input, Output, N, Scale, ZeroPoint); } +template<> +void +MLASCALL +MlasQuantizeLinear( + const float* Input, + uint16_t* Output, + size_t N, + float Scale, + uint16_t ZeroPoint + ) +{ +#if defined(MLAS_TARGET_AMD64) + GetMlasPlatform().QuantizeLinearU16Kernel( +#else + MlasQuantizeLinearU16Kernel( +#endif + Input, Output, N, Scale, ZeroPoint); +} + +template<> +void +MLASCALL +MlasQuantizeLinear( + const float* Input, + int16_t* Output, + size_t N, + float Scale, + int16_t ZeroPoint + ) +{ +#if defined(MLAS_TARGET_AMD64) + GetMlasPlatform().QuantizeLinearS16Kernel( +#else + MlasQuantizeLinearS16Kernel( +#endif + Input, Output, N, Scale, ZeroPoint); +} + #else #if defined(MLAS_TARGET_POWER) @@ -306,6 +554,34 @@ MlasQuantizeLinear( GetMlasPlatform().QuantizeLinearU8Kernel(Input, Output, N, Scale, ZeroPoint); } +template<> +void +MLASCALL +MlasQuantizeLinear( + const float* Input, + int16_t* Output, + size_t N, + float Scale, + int16_t ZeroPoint + ) +{ + GetMlasPlatform().QuantizeLinearS16Kernel(Input, Output, N, Scale, ZeroPoint); +} + +template<> +void +MLASCALL +MlasQuantizeLinear( + const float* Input, + uint16_t* Output, + size_t N, + float Scale, + uint16_t ZeroPoint + ) +{ + GetMlasPlatform().QuantizeLinearU16Kernel(Input, Output, N, Scale, ZeroPoint); +} + #endif // @@ -381,6 +657,29 @@ MlasQuantizeLinear( float Scale, uint8_t ZeroPoint ); + +template +void +MLASCALL +MlasQuantizeLinear( + const float* Input, + int16_t* Output, + size_t N, + float Scale, + int16_t ZeroPoint + ); + +template +void +MLASCALL +MlasQuantizeLinear( + const float* Input, + uint16_t* Output, + size_t N, + float Scale, + uint16_t ZeroPoint + ); + #endif #endif diff --git a/onnxruntime/core/optimizer/double_qdq_pairs_remover.cc b/onnxruntime/core/optimizer/double_qdq_pairs_remover.cc index b67f6d6ec0794..624679e7b1b4b 100644 --- a/onnxruntime/core/optimizer/double_qdq_pairs_remover.cc +++ b/onnxruntime/core/optimizer/double_qdq_pairs_remover.cc @@ -1,131 +1,37 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "core/optimizer/double_qdq_pairs_remover.h" +#include #include "core/graph/graph_utils.h" #include "core/optimizer/initializer.h" +#include "core/optimizer/qdq_transformer/qdq_util.h" namespace onnxruntime { -Status DoubleQDQPairsRemover::ApplyImpl( - Graph& graph, - bool& modified, - int /*graph_level*/, - const logging::Logger& /*logger*/) const { - const GraphViewer graph_viewer(graph); - const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder(); - - for (const auto& self_index : node_topology_list) { - NodeIndex parent_index = 0; - NodeIndex child_index = 0; - NodeIndex grandchild_index = 0; - if (IsNodeRemovable(graph, self_index, parent_index, child_index, grandchild_index)) { - graph.RemoveEdge(parent_index, self_index, 0, 0); - graph.RemoveEdge(self_index, child_index, 0, 0); - graph.RemoveEdge(child_index, grandchild_index, 0, 0); - graph_utils::ReplaceNodeInput(*graph.GetNode(grandchild_index), 0, *graph.GetNode(self_index)->MutableInputDefs()[0]); - graph.AddEdge(parent_index, grandchild_index, 0, 0); - graph.RemoveNode(child_index); - graph.RemoveNode(self_index); - modified = true; - } - } - return Status::OK(); -} - -bool DoubleQDQPairsRemover::IsNodeRemovable( - Graph& graph, - const NodeIndex& self_index, - NodeIndex& parent_index, - NodeIndex& child_index, - NodeIndex& grandchild_index) { - // Check if the self is a DQ, and have one parent and one child, and cannot be a graph output - Node* self = graph.GetNode(self_index); - if (self == nullptr || - self->OpType() != "DequantizeLinear" || - self->GetInputEdgesCount() != 1 || - self->GetOutputEdgesCount() != 1 || - self->InputDefs().size() != InputIndex::TOTAL_COUNT || - graph.NodeProducesGraphOutput(*self)) { - return false; - } - - // Type is either "tensor(uint8)" or "tensor(int8)" - const auto& self_zp_type = *self->InputDefs()[InputIndex::ZERO_POINT_ID]->Type(); - // child should be a Q, and have only one child, have the same type as self, and cannot be a graph output - child_index = self->OutputEdgesBegin()->GetNode().Index(); - const Node* child = graph.GetNode(child_index); - if (child == nullptr || - child->OpType() != "QuantizeLinear" || - child->GetOutputEdgesCount() != 1 || - child->InputDefs().size() != InputIndex::TOTAL_COUNT || - *child->InputDefs()[InputIndex::ZERO_POINT_ID]->Type() != self_zp_type || - graph.NodeProducesGraphOutput(*child)) { - return false; - } - - // parent should be a Q, and have only one output, and cannot be a graph output - parent_index = self->InputEdgesBegin()->GetNode().Index(); - Node* parent = graph.GetNode(parent_index); - if (parent == nullptr || - parent->GetOutputEdgesCount() != 1 || - parent->OpType() != "QuantizeLinear" || - graph.NodeProducesGraphOutput(*parent)) { - return false; - } - - // grandchild should be a DQ - grandchild_index = child->OutputEdgesBegin()->GetNode().Index(); - Node* grandchild = graph.GetNode(grandchild_index); - if (grandchild == nullptr || - grandchild->OpType() != "DequantizeLinear") { - return false; - } - const auto get_constant_initializer = [&graph](const std::string& initializer_name) { - return graph.GetConstantInitializer(initializer_name, true); - }; - if (!QDQ::IsQDQPairSupported(*parent, *self, get_constant_initializer, graph.ModelPath()) || - !QDQ::IsQDQPairSupported(*child, *grandchild, get_constant_initializer, graph.ModelPath())) { - return false; - } - bool skip_reset = false; - float new_scale = 0.0f; - if (self_zp_type == "tensor(uint8)") { - uint8_t new_zero_point = 0; - if (!FindNewZeroPointAndScale(graph, *self, *child, new_scale, new_zero_point, skip_reset)) { - return false; - } - if (skip_reset) { - return true; - } - ApplyNewInputValue(graph, *grandchild, InputIndex::SCALE_ID, new_scale); - ApplyNewInputValue(graph, *parent, InputIndex::SCALE_ID, new_scale); - ApplyNewInputValue(graph, *grandchild, InputIndex::ZERO_POINT_ID, new_zero_point); - ApplyNewInputValue(graph, *parent, InputIndex::ZERO_POINT_ID, new_zero_point); - } else { - int8_t new_zero_point = 0; - if (!FindNewZeroPointAndScale(graph, *self, *child, new_scale, new_zero_point, skip_reset)) { - return false; - } - if (skip_reset) { - return true; - } - ApplyNewInputValue(graph, *grandchild, InputIndex::SCALE_ID, new_scale); - ApplyNewInputValue(graph, *parent, InputIndex::SCALE_ID, new_scale); - ApplyNewInputValue(graph, *grandchild, InputIndex::ZERO_POINT_ID, new_zero_point); - ApplyNewInputValue(graph, *parent, InputIndex::ZERO_POINT_ID, new_zero_point); - } - return true; +// Applies a new zero point or scale as the input for a Q/DQ node. +template +static void ApplyNewInputValue(Graph& graph, Node& node, QDQ::InputIndex index, T value) { + const auto* input_tensor = graph_utils::GetConstantInitializer(graph, node.InputDefs()[index]->Name()); + Initializer input_init{*input_tensor, graph.ModelPath()}; + ONNX_NAMESPACE::TensorProto new_input_tensor(*input_tensor); + input_init.data()[0] = value; + input_init.ToProto(new_input_tensor); + auto new_name = graph.GenerateNodeArgName("DoubleQDQRemoved_" + node.InputDefs()[index]->Name()); + new_input_tensor.set_name(new_name); + NodeArg& new_input = graph_utils::AddInitializer(graph, new_input_tensor); + graph_utils::ReplaceNodeInput(node, index, new_input); } +// Returns a new zero point and scale value for the given Q/DQ nodes. template -bool DoubleQDQPairsRemover::FindNewZeroPointAndScale(const Graph& graph, const Node& node1, const Node& node2, - float& new_scale, T& new_zero_point, bool& skip_reset) { +static bool FindNewZeroPointAndScale(const Graph& graph, const Node& node1, const Node& node2, + float& new_scale, T& new_zero_point, bool& skip_reset) { // scale & zero point share same initializer, no need to reset the value - const std::string& node1_scale_name = node1.InputDefs()[InputIndex::SCALE_ID]->Name(); - const std::string& node2_scale_name = node2.InputDefs()[InputIndex::SCALE_ID]->Name(); - const std::string& node1_zp_name = node1.InputDefs()[InputIndex::ZERO_POINT_ID]->Name(); - const std::string& node2_zp_name = node2.InputDefs()[InputIndex::ZERO_POINT_ID]->Name(); + const std::string& node1_scale_name = node1.InputDefs()[QDQ::InputIndex::SCALE_ID]->Name(); + const std::string& node2_scale_name = node2.InputDefs()[QDQ::InputIndex::SCALE_ID]->Name(); + const std::string& node1_zp_name = node1.InputDefs()[QDQ::InputIndex::ZERO_POINT_ID]->Name(); + const std::string& node2_zp_name = node2.InputDefs()[QDQ::InputIndex::ZERO_POINT_ID]->Name(); skip_reset = false; if (node1_scale_name == node2_scale_name && node1_zp_name == node2_zp_name) { skip_reset = true; @@ -175,16 +81,141 @@ bool DoubleQDQPairsRemover::FindNewZeroPointAndScale(const Graph& graph, const N return true; } -template -void DoubleQDQPairsRemover::ApplyNewInputValue(Graph& graph, Node& node, const InputIndex& index, T value) { - const auto* input_tensor = graph_utils::GetConstantInitializer(graph, node.InputDefs()[index]->Name()); - Initializer input_init{*input_tensor, graph.ModelPath()}; - TensorProto new_input_tensor(*input_tensor); - input_init.data()[0] = value; - input_init.ToProto(new_input_tensor); - auto new_name = graph.GenerateNodeArgName("DoubleQDQRemoved_" + node.InputDefs()[index]->Name()); - new_input_tensor.set_name(new_name); - NodeArg& new_input = graph_utils::AddInitializer(graph, new_input_tensor); - graph_utils::ReplaceNodeInput(node, index, new_input); +// Recomputes the zero point and scale of the outer Q/DQ nodes (i.e., Q1 and DQ2). This is necessary because +// the original two QDQ pairs may have different zero-points and scales. Ex: Q1 -> DQ1 -> Q2 -> DQ2, where +// the first pair has (zp1, scale1) and the second pair has (zp2, scale2). +// After removing the middle two nodes, the zero point and scale of the final (outer) ops must be recomputed +// for correctness. +template +static bool RecomputeOuterQDQZeroPointAndScale(Graph& graph, Node& q1, const Node& dq1, const Node& q2, Node& dq2) { + bool skip_reset = false; + float new_scale = 0.0f; + ZeroPointType new_zero_point = 0; + if (!FindNewZeroPointAndScale(graph, dq1, q2, new_scale, new_zero_point, skip_reset)) { + return false; + } + if (skip_reset) { + return true; + } + ApplyNewInputValue(graph, dq2, QDQ::InputIndex::SCALE_ID, new_scale); + ApplyNewInputValue(graph, q1, QDQ::InputIndex::SCALE_ID, new_scale); + ApplyNewInputValue(graph, dq2, QDQ::InputIndex::ZERO_POINT_ID, new_zero_point); + ApplyNewInputValue(graph, q1, QDQ::InputIndex::ZERO_POINT_ID, new_zero_point); + + return true; +} + +// Checks if the provided node index (dq1_index) is a part of a valid double QDQ pair sequence +// (i.e., Q1 -> DQ1 -> Q2 -> DQ2) that can be reduced to the outer Q/DQ nodes (i.e., Q1 -> DQ2). +// If so, the zero point and scale of the outer Q/DQ nodes are recomputed and the node indices of the other nodes +// in the sequence (i.e., Q1, Q2, and DQ2) are returned via output parameters. +static bool IsReducibleDoubleQDQSequence(Graph& graph, NodeIndex& q1_index, NodeIndex dq1_index, + NodeIndex& q2_index, NodeIndex& dq2_index) { + // Ensure that dq1 is a DQ operator, has one parent and one child, and is not a graph output + Node* dq1 = graph.GetNode(dq1_index); + if (dq1 == nullptr || + dq1->OpType() != "DequantizeLinear" || + dq1->GetInputEdgesCount() != 1 || + dq1->GetOutputEdgesCount() != 1 || + graph.NodeProducesGraphOutput(*dq1)) { + return false; + } + + // Ensure that q2 is a Q operator, has only one child, and is not a graph output + q2_index = dq1->OutputEdgesBegin()->GetNode().Index(); + const Node* q2 = graph.GetNode(q2_index); + if (q2 == nullptr || + q2->OpType() != "QuantizeLinear" || + q2->GetOutputEdgesCount() != 1 || + graph.NodeProducesGraphOutput(*q2)) { + return false; + } + + // Ensure that q1 is a Q operator, has only one output, and is not a graph output + q1_index = dq1->InputEdgesBegin()->GetNode().Index(); + Node* q1 = graph.GetNode(q1_index); + if (q1 == nullptr || + q1->GetOutputEdgesCount() != 1 || + q1->OpType() != "QuantizeLinear" || + graph.NodeProducesGraphOutput(*q1)) { + return false; + } + + // Ensure the dq2 is a DQ operator. + dq2_index = q2->OutputEdgesBegin()->GetNode().Index(); + Node* dq2 = graph.GetNode(dq2_index); + if (dq2 == nullptr || + dq2->OpType() != "DequantizeLinear") { + return false; + } + + const auto get_constant_initializer = [&graph](const std::string& initializer_name) { + return graph.GetConstantInitializer(initializer_name, true); + }; + + // Each QDQ pair (i.e., q1 -> dq1, q2 -> dq2) has to meet the following additional requirements: + // - Scalar/constant zero-point and scale. + // - The DQ and Q ops within a pair must have the same scale and zero-point. + // However, each pair is allowed to have different scales and zero-points. + // + // TODO: IsQDQPairSupported() requires an explicit zero-point input, but technically a default + // value of 0 could be fine. + if (!QDQ::IsQDQPairSupported(*q1, *dq1, get_constant_initializer, graph.ModelPath()) || + !QDQ::IsQDQPairSupported(*q2, *dq2, get_constant_initializer, graph.ModelPath())) { + return false; + } + + const auto& dq1_input_defs = dq1->InputDefs(); + const ONNX_NAMESPACE::TensorProto* dq1_zp_tensor_proto = graph.GetConstantInitializer( + dq1_input_defs[QDQ::InputIndex::ZERO_POINT_ID]->Name(), true); + + assert(dq1_zp_tensor_proto != nullptr); // IsQDQPairSupported should have checked that this exists. + + auto dq1_zp_type = dq1_zp_tensor_proto->data_type(); + + if (dq1_zp_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8) { + return RecomputeOuterQDQZeroPointAndScale(graph, *q1, *dq1, *q2, *dq2); + } + + if (dq1_zp_type == ONNX_NAMESPACE::TensorProto_DataType_INT8) { + return RecomputeOuterQDQZeroPointAndScale(graph, *q1, *dq1, *q2, *dq2); + } + + if (dq1_zp_type == ONNX_NAMESPACE::TensorProto_DataType_UINT16) { + return RecomputeOuterQDQZeroPointAndScale(graph, *q1, *dq1, *q2, *dq2); + } + + if (dq1_zp_type == ONNX_NAMESPACE::TensorProto_DataType_INT16) { + return RecomputeOuterQDQZeroPointAndScale(graph, *q1, *dq1, *q2, *dq2); + } + + return false; // Unsupported zero-point type +} + +Status DoubleQDQPairsRemover::ApplyImpl( + Graph& graph, + bool& modified, + int /*graph_level*/, + const logging::Logger& /*logger*/) const { + const GraphViewer graph_viewer(graph); + const auto& node_topology_list = graph_viewer.GetNodesInTopologicalOrder(); + + for (const auto& dq1_index : node_topology_list) { + NodeIndex q1_index = 0; + NodeIndex q2_index = 0; + NodeIndex dq2_index = 0; + if (IsReducibleDoubleQDQSequence(graph, q1_index, dq1_index, q2_index, dq2_index)) { + graph.RemoveEdge(q1_index, dq1_index, 0, 0); + graph.RemoveEdge(dq1_index, q2_index, 0, 0); + graph.RemoveEdge(q2_index, dq2_index, 0, 0); + graph_utils::ReplaceNodeInput(*graph.GetNode(dq2_index), 0, *graph.GetNode(dq1_index)->MutableInputDefs()[0]); + graph.AddEdge(q1_index, dq2_index, 0, 0); + graph.RemoveNode(q2_index); + graph.RemoveNode(dq1_index); + modified = true; + } + } + return Status::OK(); } + } // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/double_qdq_pairs_remover.h b/onnxruntime/core/optimizer/double_qdq_pairs_remover.h index c016f7181b7fe..1833b007674fd 100644 --- a/onnxruntime/core/optimizer/double_qdq_pairs_remover.h +++ b/onnxruntime/core/optimizer/double_qdq_pairs_remover.h @@ -3,19 +3,16 @@ #pragma once -#include "core/common/common.h" #include "core/optimizer/graph_transformer.h" -#include "core/optimizer/qdq_transformer/qdq_util.h" namespace onnxruntime { -using ONNX_NAMESPACE::TensorProto; -using ONNX_NAMESPACE::TensorProto_DataType; -using QDQ::InputIndex; - /** * @Class DoubleQDQPairsRemover * @brief Remove one pair of Q-DQ from Double Q-DQ pairs. + * Specifically, this transformer converts the sequence Q1 -> DQ1 -> Q2 -> DQ2, where the first pair has (zp1, scale1) + * and the second pair has (zp2, scale2), into the sequence Q1 -> DQ2 by removing the middle two nodes. The zero-point + * and scale of the final QDQ pair is recomputed to preserve equality to the original sequence. */ class DoubleQDQPairsRemover : public GraphTransformer { public: @@ -27,28 +24,5 @@ class DoubleQDQPairsRemover : public GraphTransformer { bool& modified, int graph_level, const logging::Logger& logger) const override; - - static bool IsNodeRemovable( - Graph& graph, - const NodeIndex& self_index, - NodeIndex& parent_index, - NodeIndex& child_index, - NodeIndex& grandchild_index); - - template - static bool FindNewZeroPointAndScale( - const Graph& graph, - const Node& node1, - const Node& node2, - float& new_scale, - T& new_zero_point, - bool& skip_reset); - - template - static void ApplyNewInputValue( - Graph& graph, - Node& node, - const InputIndex& index, - T value); }; } // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc index d7039cb4b7cfc..0e383c3031ca6 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h" +#include #include "core/mlas/inc/mlas.h" #include "core/optimizer/qdq_transformer/selectors_actions/qdq_actions.h" @@ -32,7 +33,8 @@ void SplitQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { // create rules for ops that don't change the data void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { // 3 nodes. DQ, target, Q. Merge into target and remove DQ and Q. - const std::string action_name{"drop"}; + const std::string drop_action_name{"drop"}; + const std::string drop_action_no_int16_name{"drop_no_int16_support"}; NTO::NodeLocation dq{NTO::NodeType::kInput, 0}; NTO::NodeLocation q{NTO::NodeType::kOutput, 0}; @@ -42,22 +44,33 @@ void DropQDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { MoveToSlot(dq, ArgType::kInput, 0, ArgType::kInput, 0), MoveToSlot(q, ArgType::kOutput, 0, ArgType::kOutput, 0)}; - std::unique_ptr action = std::make_unique(std::move(moves)); + std::unique_ptr drop_action_no_int16 = std::make_unique( + std::vector(moves)); // Copy before std::move(moves) + std::unique_ptr drop_action = std::make_unique(std::move(moves)); #if !defined(ORT_MINIMAL_BUILD) - std::unique_ptr selector = std::make_unique(); - qdq_selector_action_registry.RegisterSelectorAndAction(action_name, + // Use a separate selector + action that disallows 16-bit types for MaxPool and Resize. + // int16 MaxPool is not supported by the ONNX specification. + // int16 Resize is not supported by the ORT implementation (although allowed by ONNX). + std::unique_ptr selector_disallow_16bit = std::make_unique(false); + qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_no_int16_name, + {{"MaxPool", {12}}, + {"Resize", {}}}, + std::move(selector_disallow_16bit), + std::move(drop_action_no_int16)); + + std::unique_ptr selector = std::make_unique(true); + qdq_selector_action_registry.RegisterSelectorAndAction(drop_action_name, {{"Gather", {}}, {"Reshape", {}}, {"Transpose", {}}, - {"MaxPool", {12}}, - {"Resize", {}}, {"Squeeze", {}}, {"Unsqueeze", {}}}, std::move(selector), - std::move(action)); + std::move(drop_action)); #else - qdq_selector_action_registry.RegisterAction(action_name, std::move(action)); + qdq_selector_action_registry.RegisterAction(drop_action_no_int16_name, std::move(drop_action_no_int16)); + qdq_selector_action_registry.RegisterAction(drop_action_name, std::move(drop_action)); #endif } @@ -74,6 +87,7 @@ void DropDQNodesRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr action = std::make_unique(std::move(moves)); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when ArgMax supports 16-bit integer input tensors. std::unique_ptr selector = std::make_unique(); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, {{"ArgMax", {}}}, @@ -91,6 +105,7 @@ void UnaryOpQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr action = std::make_unique(kMSDomain); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when unary QLinear* ops support 16-bit. std::unique_ptr selector = std::make_unique(); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, {{"AveragePool", {}}, @@ -112,6 +127,7 @@ void BinaryOpQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr action = std::make_unique(kMSDomain); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when binary QLinear* ops support 16-bit. std::unique_ptr selector = std::make_unique(); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, {{"Add", {}}, @@ -131,6 +147,7 @@ void VariadicOpQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr action = std::make_unique(kMSDomain); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when QLinearConcat supports 16-bit. std::unique_ptr selector = std::make_unique(); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, @@ -152,6 +169,7 @@ void ConvQDQRules(SelectorActionRegistry& qdq_selector_action_registry, bool is_ std::unique_ptr action = std::make_unique(); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when QLinearConv supports 16-bit. std::unique_ptr selector = std::make_unique(is_int8_allowed); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, @@ -174,6 +192,7 @@ void MatMulQDQRules(SelectorActionRegistry& qdq_selector_action_registry, bool i std::unique_ptr action = std::make_unique(); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when QLinearMatMul and MatMulInteger support 16-bit. std::unique_ptr selector = std::make_unique(is_int8_allowed); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, {{"MatMul", {}}}, @@ -195,6 +214,7 @@ void GemmQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr action = std::make_unique(); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when QGemm supports 16-bit. std::unique_ptr selector = std::make_unique(); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, {{"Gemm", {}}}, @@ -215,6 +235,7 @@ void WhereQDQRules(SelectorActionRegistry& qdq_selector_action_registry) { std::unique_ptr action = std::make_unique(); #if !defined(ORT_MINIMAL_BUILD) + // TODO: Enable 16-bit types in selector when QLinearWhere supports 16-bit. std::unique_ptr selector = std::make_unique(); qdq_selector_action_registry.RegisterSelectorAndAction(action_name, {{"Where", {}}}, diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc index 02a7fb733813c..16c7bd5fce960 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.cc @@ -14,6 +14,12 @@ namespace onnxruntime { namespace QDQ { namespace { + +constexpr bool Is16BitIntType(int32_t data_type) { + return (data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16) || + (data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16); +} + // adjust for an optional input/output that has an entry but does not exist int NumActualValues(const Node& node, bool input) { const auto& defs = input ? node.InputDefs() : node.OutputDefs(); @@ -110,6 +116,17 @@ bool DropQDQNodeGroupSelector::Check(const GraphViewer& graph_viewer, return false; } + int32_t dt_input = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + + if (dt_input != dt_output) { + return false; + } + + if (!allow_16bit_ && Is16BitIntType(dt_input)) { + return false; + } + const Node& dq_node = *dq_nodes.front(); const Node& q_node = *q_nodes.front(); @@ -124,7 +141,7 @@ bool DropDQNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const { - int num_dq_inputs = NumActualValues(node, true); + constexpr int num_dq_inputs = 1; if (num_dq_inputs != gsl::narrow_cast(dq_nodes.size())) { return false; } @@ -136,6 +153,12 @@ bool DropDQNodeGroupSelector::Check(const GraphViewer& graph_viewer, (void)q_nodes; const Node& dq_node = *dq_nodes.front(); + const int32_t dt_input = dq_node.InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && Is16BitIntType(dt_input)) { + return false; + } auto get_const_initializer = [&graph_viewer](const std::string& initializer_name) { return graph_viewer.GetConstantInitializer(initializer_name, true); @@ -154,7 +177,16 @@ bool UnaryNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& int32_t dt_input = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - return dt_input == dt_output; + if (dt_input != dt_output) { + return false; + } + + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && Is16BitIntType(dt_input)) { + return false; + } + + return true; } bool BinaryNodeGroupSelector::Check(const GraphViewer& graph_viewer, @@ -168,8 +200,18 @@ bool BinaryNodeGroupSelector::Check(const GraphViewer& graph_viewer, int32_t dt_input_1 = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); int32_t dt_input_2 = dq_nodes[1]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - return dt_input_1 == dt_input_2 && - dt_input_1 == dt_output; + + // All input and output types must match. + if (dt_input_1 != dt_input_2 || dt_input_1 != dt_output) { + return false; + } + + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && Is16BitIntType(dt_input_1)) { + return false; + } + + return true; } bool VariadicNodeGroupSelector::Check(const GraphViewer& graph_viewer, @@ -194,7 +236,17 @@ bool VariadicNodeGroupSelector::Check(const GraphViewer& graph_viewer, return false; } } - return dt_input == dt_output; + + if (dt_input != dt_output) { + return false; + } + + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && Is16BitIntType(dt_input)) { + return false; + } + + return true; } void InputVariadicSelector::UpdateBuilder(NodesToOptimizeIndicesBuilder& builder) const { @@ -227,12 +279,19 @@ bool ConvNodeGroupSelector::Check(const GraphViewer& graph_viewer, } } - if (dq_nodes.size() < 3) { // no bias - return true; + if (dq_nodes.size() == 3) { // has bias + int32_t dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); + if (dt_bias != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32) { + return false; + } } - int32_t dt_bias = dq_nodes[2]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - return dt_bias == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32; + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && (Is16BitIntType(dt_input) || Is16BitIntType(dt_weight))) { + return false; + } + + return true; } void ConvSelector::UpdateBuilder(NodesToOptimizeIndicesBuilder& builder) const { @@ -256,6 +315,11 @@ bool MatMulNodeGroupSelector::Check(const GraphViewer& graph_viewer, } } + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && (Is16BitIntType(dt_input) || Is16BitIntType(dt_weight))) { + return false; + } + // potential match for QLinearMatMul or MatMulIntegerToFloat bool qlinear = !q_nodes.empty(); @@ -299,6 +363,11 @@ bool GemmNodeGroupSelector::Check(const GraphViewer& graph_viewer, } } + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && (Is16BitIntType(dt_A) || Is16BitIntType(dt_B))) { + return false; + } + if (dq_nodes.size() < 3) { // no bias return true; } @@ -326,8 +395,18 @@ bool WhereNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& const int32_t dt_input_1 = dq_nodes[0]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); const int32_t dt_input_2 = dq_nodes[1]->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); const int32_t dt_output = q_nodes[0]->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type(); - return dt_input_1 == dt_input_2 && - dt_input_1 == dt_output; + + // All input and output types must match. + if (dt_input_1 != dt_input_2 || dt_input_1 != dt_output) { + return false; + } + + // 16-bit int types must be explicitly allowed. + if (!allow_16bit_ && Is16BitIntType(dt_input_1)) { + return false; + } + + return true; } bool PadNodeGroupSelector::Check(const GraphViewer& graph_viewer, const Node& node, diff --git a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h index 58ebf81508962..d8fefdd8dc3d9 100644 --- a/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h +++ b/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selectors.h @@ -52,45 +52,75 @@ class NodeGroupSelector { // Single DQ -> node that does not change data -> Q. // Zero point and scale are constant scalars and must match class DropQDQNodeGroupSelector : public NodeGroupSelector { + public: + explicit DropQDQNodeGroupSelector(bool allow_16bit = true) : allow_16bit_(allow_16bit) {} + + private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; // Single DQ -> node. class DropDQNodeGroupSelector : public NodeGroupSelector { + public: + explicit DropDQNodeGroupSelector(bool allow_16bit = true) : allow_16bit_(allow_16bit) {} + + private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; // single input. default is to only support uint8. class UnaryNodeGroupSelector : public NodeGroupSelector { + public: + explicit UnaryNodeGroupSelector(bool allow_16bit = true) : allow_16bit_(allow_16bit) {} + + private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; // 2 DQ nodes providing input -> node -> Q class BinaryNodeGroupSelector : public NodeGroupSelector { + public: + explicit BinaryNodeGroupSelector(bool allow_16bit = true) : allow_16bit_(allow_16bit) {} + + private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; // Variadic DQ nodes -> node -> Q class VariadicNodeGroupSelector : public NodeGroupSelector { + public: + explicit VariadicNodeGroupSelector(bool allow_16bit = true) : allow_16bit_(allow_16bit) {} + private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; // DQ nodes for X, W and optionally B -> node -> Q class ConvNodeGroupSelector : public NodeGroupSelector { public: // default to 'true' - ConvNodeGroupSelector(bool int8_allowed = true) : int8_allowed_(int8_allowed) {} + ConvNodeGroupSelector(bool int8_allowed = true, bool allow_16bit = true) + : int8_allowed_(int8_allowed), allow_16bit_(allow_16bit) {} private: bool Check(const GraphViewer& graph_viewer, const Node& node, @@ -98,16 +128,20 @@ class ConvNodeGroupSelector : public NodeGroupSelector { const std::vector& q_nodes) const override; bool int8_allowed_; + bool allow_16bit_; }; class WhereNodeGroupSelector : public NodeGroupSelector { public: - WhereNodeGroupSelector() = default; + explicit WhereNodeGroupSelector(bool allow_16bit = true) + : allow_16bit_(allow_16bit) {} private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; class PadNodeGroupSelector : public NodeGroupSelector { @@ -125,9 +159,11 @@ class PadNodeGroupSelector : public NodeGroupSelector { class MatMulNodeGroupSelector : public NodeGroupSelector { public: MatMulNodeGroupSelector(bool int8_allowed = true, - bool matmulintegertofloat_allowed = false) + bool matmulintegertofloat_allowed = false, + bool allow_16bit = true) : int8_allowed_(int8_allowed), - matmulintegertofloat_allowed_(matmulintegertofloat_allowed) { + matmulintegertofloat_allowed_(matmulintegertofloat_allowed), + allow_16bit_(allow_16bit) { } private: @@ -136,15 +172,21 @@ class MatMulNodeGroupSelector : public NodeGroupSelector { const std::vector& q_nodes) const override; bool int8_allowed_; bool matmulintegertofloat_allowed_; + bool allow_16bit_; }; // Input: DQ nodes for A, B and optional C // Output: optional Q node for Y class GemmNodeGroupSelector : public NodeGroupSelector { + public: + explicit GemmNodeGroupSelector(bool allow_16bit = true) : allow_16bit_(allow_16bit) {} + private: bool Check(const GraphViewer& graph_viewer, const Node& node, const std::vector& dq_nodes, const std::vector& q_nodes) const override; + + bool allow_16bit_; }; // Input: DQ nodes for input, scale, and B @@ -207,28 +249,33 @@ class BaseSelector : public NodeSelector { class DropQDQNodesSelector : public BaseSelector { public: - DropQDQNodesSelector() : BaseSelector(std::make_unique()) {} + explicit DropQDQNodesSelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} }; class DropDQNodesSelector : public BaseSelector { public: - DropDQNodesSelector() : BaseSelector(std::make_unique()) {} + explicit DropDQNodesSelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} }; class UnarySelector : public BaseSelector { public: - UnarySelector() : BaseSelector(std::make_unique()) {} + explicit UnarySelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} }; class BinarySelector : public BaseSelector { public: - BinarySelector() : BaseSelector(std::make_unique()) {} + explicit BinarySelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} }; // Variadic DQ nodes -> node -> Q class InputVariadicSelector : public BaseSelector { public: - InputVariadicSelector() : BaseSelector(std::make_unique()) {} + explicit InputVariadicSelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} void UpdateBuilder(NodesToOptimizeIndicesBuilder&) const override; }; @@ -244,46 +291,36 @@ class OutputVariadicSelector : public BaseSelector { // DQ nodes for X, W and optionally B -> node -> Q class ConvSelector : public BaseSelector { public: - ConvSelector(bool int8_allowed = false) : BaseSelector(std::make_unique(int8_allowed)) {} + ConvSelector(bool int8_allowed = false, bool allow_16bit = false) + : BaseSelector(std::make_unique(int8_allowed, allow_16bit)) {} void UpdateBuilder(NodesToOptimizeIndicesBuilder&) const override; }; + class WhereSelector : public BaseSelector { public: - WhereSelector() : BaseSelector(std::make_unique()) {} + explicit WhereSelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} }; + // 2 DQ nodes for input -> node -> optional Q if QLinearMatMul, MatMulIntegerToFloat if not class MatMulSelector : public BaseSelector { public: - MatMulSelector(bool int8_allowed) - : BaseSelector(std::make_unique(int8_allowed, /*matmulintegertofloat_allowed*/ true)) {} + MatMulSelector(bool int8_allowed, bool allow_16bit = false) + : BaseSelector(std::make_unique(int8_allowed, /*matmulintegertofloat_allowed*/ true, + allow_16bit)) {} }; // Input: DQ nodes for A, B and optional C // Output: optional Q node for Y class GemmSelector : public BaseSelector { public: - GemmSelector() - : BaseSelector(std::make_unique()) {} + explicit GemmSelector(bool allow_16bit = false) + : BaseSelector(std::make_unique(allow_16bit)) {} void UpdateBuilder(NodesToOptimizeIndicesBuilder&) const override; }; -// Input: DQ nodes for input, scale, and B (bias) -// Output: Q node for output -class InstanceNormalizationSelector : public BaseSelector { - public: - InstanceNormalizationSelector() - : BaseSelector(std::make_unique()) {} -}; - -// DQ nodes for X, W and optionally B, (mean, var not required) -> node -> Q -class BatchNormalizationSelector : public BaseSelector { - public: - BatchNormalizationSelector(bool int8_allowed = false) - : BaseSelector(std::make_unique(int8_allowed)) {} -}; - } // namespace QDQ } // namespace onnxruntime diff --git a/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc b/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc index 3723ee6032582..2c11bf144999e 100644 --- a/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc +++ b/onnxruntime/core/optimizer/transpose_optimization/onnx_transpose_optimization.cc @@ -1195,7 +1195,7 @@ bool TransposeQuantizeDequantizeAxis(const api::GraphRef& graph, const std::vect static bool HandleQuantizeDequantizeAxis(const api::GraphRef& graph, const std::vector& perm, api::NodeRef& node, int64_t opset) { if (opset < 13) { - // no `axis` value until opset 13 + // no `axis` attribute until opset 13 return true; } diff --git a/onnxruntime/core/providers/cpu/quantization/quantize_linear.cc b/onnxruntime/core/providers/cpu/quantization/quantize_linear.cc index 67a9a5991939a..a0d75e8cc0e69 100644 --- a/onnxruntime/core/providers/cpu/quantization/quantize_linear.cc +++ b/onnxruntime/core/providers/cpu/quantization/quantize_linear.cc @@ -5,13 +5,47 @@ #include "core/framework/element_type_lists.h" #include "core/framework/float8.h" #include "core/framework/float16.h" -#include "core/providers/cpu/quantization/quantize_linear.h" +#include "core/framework/op_kernel.h" #include "core/providers/common.h" #include "core/mlas/inc/mlas.h" #include "core/util/qmath.h" namespace onnxruntime { +template +class DequantizeLinear final : public OpKernel { + public: + explicit DequantizeLinear(const OpKernelInfo& info) : OpKernel(info) { + if (!info.GetAttr("axis", &axis_).IsOK()) { + axis_ = 1; + } + } + + Status Compute(OpKernelContext* context) const override; + + private: + int64_t axis_; +}; + +template +class QuantizeLinear final : public OpKernel { + public: + explicit QuantizeLinear(const OpKernelInfo& info) : OpKernel(info) { + if (!info.GetAttr("axis", &axis_).IsOK()) { + axis_ = 1; + } + if (!info.GetAttr("saturate", &saturate_).IsOK()) { + saturate_ = 1; + } + } + + Status Compute(OpKernelContext* context) const override; + + private: + int64_t axis_; + int64_t saturate_; +}; + static void PrepareForQDQ(const TensorShape& input_shape, const Tensor& scale, const Tensor* zero_point_ptr, @@ -86,6 +120,59 @@ REGISTER_DEQUANTIZELINEAR_VERSIONED(int8_t) REGISTER_DEQUANTIZELINEAR_VERSIONED(uint8_t) REGISTER_DEQUANTIZELINEAR_VERSIONED(int32_t) +#if !defined(DISABLE_CONTRIB_OPS) +namespace contrib { + +// Register alternate MS domain versions of the DequantizeLinear kernel. +// The MS domain versions additionally support 16-bit integer quantization types. +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + DequantizeLinear, + 1, + uint8_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + DequantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + DequantizeLinear, + 1, + int8_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + DequantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + DequantizeLinear, + 1, + uint16_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + DequantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + DequantizeLinear, + 1, + int16_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + DequantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + DequantizeLinear, + 1, + int32_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + DequantizeLinear); + +} // namespace contrib +#endif // !defined(DISABLE_CONTRIB_OPS) + template struct DequantizeLinearApply { void op(int64_t N, int64_t broadcast_dim, int64_t block_size, const T* input, const OutT* scale, OutT* output, const T* zero_point) { @@ -220,6 +307,49 @@ REGISTER_QUANTIZELINEAR(Float8E5M2FNUZ) REGISTER_QUANTIZELINEAR_VERSIONED(int8_t) REGISTER_QUANTIZELINEAR_VERSIONED(uint8_t) +#if !defined(DISABLE_CONTRIB_OPS) +namespace contrib { + +// Register alternate MS domain versions of the QuantizeLinear kernel. +// The MS domain versions additionally support 16-bit integer quantization types. +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + QuantizeLinear, + 1, + uint8_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + QuantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + QuantizeLinear, + 1, + int8_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + QuantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + QuantizeLinear, + 1, + uint16_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + QuantizeLinear); + +ONNX_CPU_OPERATOR_TYPED_MS_KERNEL( + QuantizeLinear, + 1, + int16_t, + KernelDefBuilder() + .TypeConstraint("T1", DataTypeImpl::GetTensorType()) + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + QuantizeLinear); +} // namespace contrib +#endif // !defined(DISABLE_CONTRIB_OPS) + template void ParQuantizeLinear(const InputType* Input, OutputType* Output, @@ -279,5 +409,4 @@ Status QuantizeLinear::Compute(OpKernelContext* ctx) const { return Status::OK(); } - } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/quantization/quantize_linear.h b/onnxruntime/core/providers/cpu/quantization/quantize_linear.h deleted file mode 100644 index 60e9d09665ab2..0000000000000 --- a/onnxruntime/core/providers/cpu/quantization/quantize_linear.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include "core/common/common.h" -#include "core/framework/op_kernel.h" -#include "core/util/math_cpuonly.h" - -namespace onnxruntime { - -template -class DequantizeLinear final : public OpKernel { - public: - DequantizeLinear(const OpKernelInfo& info) : OpKernel(info) { - if (!info.GetAttr("axis", &axis_).IsOK()) { - axis_ = 1; - } - } - - Status Compute(OpKernelContext* context) const override; - - private: - int64_t axis_; -}; - -template -class QuantizeLinear final : public OpKernel { - public: - QuantizeLinear(const OpKernelInfo& info) : OpKernel(info) { - if (!info.GetAttr("axis", &axis_).IsOK()) { - axis_ = 1; - } - if (!info.GetAttr("saturate", &saturate_).IsOK()) { - saturate_ = 1; - } - } - - Status Compute(OpKernelContext* context) const override; - - private: - int64_t axis_; - int64_t saturate_; -}; -} // namespace onnxruntime diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc index 556a86bb1519b..8081033c35618 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc @@ -30,6 +30,12 @@ class SimpleOpBuilder : public BaseOpBuilder { private: Status ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const; + Status ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + std::vector&& param_tensor_names, + const logging::Logger& logger, + bool do_op_validation) const ORT_MUST_USE_RESULT; static constexpr std::array gridsample_supported_modes = {"bilinear", "nearest"}; static constexpr std::array gridsample_supported_padding_modes = {"zeros", "border", "reflection"}; @@ -279,10 +285,120 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w ORT_RETURN_IF_ERROR(ProcessGridSampleAttributes(qnn_model_wrapper, node_unit, param_tensor_names)); } - ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, - std::move(input_names), - std::move(param_tensor_names), - logger, do_op_validation, GetQnnOpType(op_type))); + if (op_type == "Sigmoid" || op_type == "Tanh") { + // QNN requires 16-bit QDQ Sigmoid and Tanh to use specific output scale and zero-point values + // regardless of floating-point range. + return ProcessSigmoidOrTanhOutput(qnn_model_wrapper, + node_unit, + std::move(input_names), + std::move(param_tensor_names), + logger, do_op_validation); + } + + return ProcessOutputs(qnn_model_wrapper, node_unit, + std::move(input_names), + std::move(param_tensor_names), + logger, do_op_validation, GetQnnOpType(op_type)); +} + +/** + * Overrides offset and scale quantization parameters for operators (e.g., Sigmoid or Tanh) that require + * specific values. Returns true if the quantization parameters were overridden. + * + * \param op_type The ONNX operator type. + * \param qnn_data_type The QNN tensor data type. + * \param quant_params Output scale/offset parameter that may be overridden. + * \return True if the offset and scale were overridden. + */ +static bool OverrideQuantParams(const std::string& op_type, Qnn_DataType_t qnn_data_type, + Qnn_ScaleOffset_t& quant_params) { + const int32_t orig_offset = quant_params.offset; + const float orig_scale = quant_params.scale; + + if (op_type == "Sigmoid") { + switch (qnn_data_type) { + case QNN_DATATYPE_UFIXED_POINT_16: + quant_params.offset = 0; + quant_params.scale = 1.0f / 65536.0f; + break; + case QNN_DATATYPE_SFIXED_POINT_16: + quant_params.offset = 0; + quant_params.scale = 1.0f / 32768.0f; + break; + default: + break; // Do nothing. + } + } + + if (op_type == "Tanh") { + switch (qnn_data_type) { + case QNN_DATATYPE_UFIXED_POINT_16: + quant_params.offset = -32768; + quant_params.scale = 1.0f / 32768.0f; + break; + case QNN_DATATYPE_SFIXED_POINT_16: + quant_params.offset = 0; + quant_params.scale = 1.0f / 32768.0f; + break; + default: + break; // Do nothing. + } + } + + return quant_params.offset != orig_offset || quant_params.scale != orig_scale; +} + +/** + * Processes the output for Sigmoid or Tanh operators and creates the corresponding QNN operator. + * These operator types are handled separately because QNN requires 16-bit QDQ Sigmoid and Tanh operators to use + * specific scale and zero-point values regardless of floating-point range. + * + * \param qnn_model_wrapper The QNN model wrapper object. + * \param node_unit The QDQ node unit for the Sigmoid or Tanh node. + * \param input_names List of input names. + * \param param_tensor_names List of param tensor names. + * \param logger Logger used to report information. + * \param do_op_validation True if the new QNN node should be validated. + */ +Status SimpleOpBuilder::ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + std::vector&& param_tensor_names, + const logging::Logger& logger, + bool do_op_validation) const { + const std::string& op_type = node_unit.OpType(); + const auto& output = node_unit.Outputs()[0]; + const std::string& output_name = output.node_arg.Name(); + + OnnxInputInfo output_info = {}; + + // TODO(adrianlizarraga): Rename GetOnnxInputInfo() since it can be used for outputs as well. + ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(output, output_info)); + + if (output_info.quant_param.quantizationEncoding == QNN_QUANTIZATION_ENCODING_SCALE_OFFSET) { + if (OverrideQuantParams(op_type, output_info.qnn_data_type, output_info.quant_param.scaleOffsetEncoding)) { + const int32_t offset = output_info.quant_param.scaleOffsetEncoding.offset; + const float scale = output_info.quant_param.scaleOffsetEncoding.scale; + + LOGS(logger, VERBOSE) << "QNN requires that 16-bit quantized " << op_type << " operators use offset/scale values " + << "of <" << offset << ", " << scale << ">. QNN EP will override the original values."; + } + } + + Qnn_TensorType_t tensor_type = qnn_model_wrapper.IsGraphOutput(output_name) ? QNN_TENSOR_TYPE_APP_READ + : QNN_TENSOR_TYPE_NATIVE; + QnnTensorWrapper output_tensorwrapper(output_name, tensor_type, output_info.qnn_data_type, output_info.quant_param, + std::move(output_info.shape)); + ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor."); + ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit), + QNN_OP_PACKAGE_NAME_QTI_AISW, + GetQnnOpType(op_type), + std::move(input_names), + {output_name}, + std::move(param_tensor_names), + do_op_validation), + "Failed to add node."); + return Status::OK(); } diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc index eebe75d839b12..9d339387b0a43 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc @@ -301,6 +301,16 @@ bool QnnModelWrapper::ProcessOffset(const std::string& offset_name, offset_value = 0 - (uint8_span.data()[0]); break; } + case ONNX_NAMESPACE::TensorProto_DataType_UINT16: { + auto uint16_span = ReinterpretAsSpan(gsl::make_span(unpacked_tensor)); + offset_value = -static_cast(uint16_span.data()[0]); + break; + } + case ONNX_NAMESPACE::TensorProto_DataType_INT16: { + auto int16_span = ReinterpretAsSpan(gsl::make_span(unpacked_tensor)); + offset_value = -static_cast(int16_span.data()[0]); + break; + } case ONNX_NAMESPACE::TensorProto_DataType_INT32: { auto int32_span = ReinterpretAsSpan(gsl::make_span(unpacked_tensor)); offset_value = -(int32_span.data()[0]); diff --git a/onnxruntime/python/tools/quantization/onnx_quantizer.py b/onnxruntime/python/tools/quantization/onnx_quantizer.py index 924d4c72b6390..2d1e418f9d2b4 100644 --- a/onnxruntime/python/tools/quantization/onnx_quantizer.py +++ b/onnxruntime/python/tools/quantization/onnx_quantizer.py @@ -104,7 +104,7 @@ def __init__( ) self.q_matmul_const_b_only = "MatMulConstBOnly" in self.extra_options and self.extra_options["MatMulConstBOnly"] self.is_weight_symmetric = ( - weight_qType in (QuantType.QInt8, QuantType.QFLOAT8E4M3FN) + weight_qType in (QuantType.QInt8, QuantType.QInt16, QuantType.QFLOAT8E4M3FN) if "WeightSymmetric" not in self.extra_options else self.extra_options["WeightSymmetric"] ) diff --git a/onnxruntime/python/tools/quantization/qdq_quantizer.py b/onnxruntime/python/tools/quantization/qdq_quantizer.py index f87a9d8228bac..e595b580b20df 100644 --- a/onnxruntime/python/tools/quantization/qdq_quantizer.py +++ b/onnxruntime/python/tools/quantization/qdq_quantizer.py @@ -25,6 +25,7 @@ add_quant_output_suffix, add_quant_suffix, find_by_name, + ms_domain, ) from .registry import CreateQDQQuantizer @@ -119,6 +120,20 @@ def __init__( else extra_options["QDQOpTypePerChannelSupportToAxis"] ) + self.qdq_op_domain = ms_domain if extra_options.get("UseQDQContribOps", False) else None + + # The ONNX spec does not yet support 16-bit Q/DQ ops. So, must override the Q/DQ op domain to 'com.microsoft' + # if the activation or weight types are 16-bit integers. + # TODO: Remove this override (and use only the 'UseQDQContribOps' option) if/when ONNX adds 16-bit support. + int16_types = (TensorProto.UINT16, TensorProto.INT16) + if not self.qdq_op_domain and (self.activation_qType in int16_types or self.weight_qType in int16_types): + logging.warning( + "ONNX QuantizeLinear and DequantizeLinear operators do not support 16-bit integer quantization types. " + f"The domain of QuantizeLinear and DequantizeLinear operators will be set to '{ms_domain}' to " + "enable support." + ) + self.qdq_op_domain = ms_domain + def _is_tensor_quantizable(self, tensor_name): """ Check if tensor can be quantized @@ -249,6 +264,7 @@ def _create_qdq_nodes( [q_output], quant_node_name, axis=axis, + domain=self.qdq_op_domain, ) dequant_node = onnx.helper.make_node( DEQUANT_OP_NAME, @@ -256,6 +272,7 @@ def _create_qdq_nodes( [dq_output], dequant_node_name, axis=axis, + domain=self.qdq_op_domain, ) self.model.add_nodes([qlinear_node, dequant_node]) @@ -300,6 +317,7 @@ def _add_qdq_pair_for_initializer(self, weight_proto, tensor_type, axis=None): [weight_dequant_output], add_dequant_suffix(weight_name), axis=axis, + domain=self.qdq_op_domain, ) self.model.add_node(dequant_node) @@ -443,6 +461,7 @@ def _quantize_bias_tensors(self): [bias_name], node_name, axis=quant_value.axis, + domain=self.qdq_op_domain, ) else: dequant_node = onnx.helper.make_node( @@ -450,6 +469,7 @@ def _quantize_bias_tensors(self): inputs, [bias_name], node_name, + domain=self.qdq_op_domain, ) else: raise RuntimeError(f"Unexpected operator type {quant_value.node_type!r}.") diff --git a/onnxruntime/python/tools/quantization/quant_utils.py b/onnxruntime/python/tools/quantization/quant_utils.py index 4d5bcca29618f..74e54c3f1fa37 100644 --- a/onnxruntime/python/tools/quantization/quant_utils.py +++ b/onnxruntime/python/tools/quantization/quant_utils.py @@ -72,6 +72,8 @@ class QuantType(Enum): QInt8 = 0 QUInt8 = 1 QFLOAT8E4M3FN = 2 + QInt16 = 3 + QUInt16 = 4 def __str__(self): return self.name @@ -89,6 +91,10 @@ def tensor_type(self): return TensorProto.INT8 if self == QuantType.QUInt8: return TensorProto.UINT8 + if self == QuantType.QUInt16: + return TensorProto.UINT16 + if self == QuantType.QInt16: + return TensorProto.INT16 if self == QuantType.QFLOAT8E4M3FN: return TensorProto.FLOAT8E4M3FN raise ValueError(f"Unexpected value qtype={self!r}.") @@ -112,12 +118,35 @@ def from_string(format): ONNX_TYPE_TO_NP_TYPE = { onnx_proto.TensorProto.INT8: numpy.dtype("int8"), onnx_proto.TensorProto.UINT8: numpy.dtype("uint8"), + onnx_proto.TensorProto.INT16: numpy.dtype("int16"), + onnx_proto.TensorProto.UINT16: numpy.dtype("uint16"), onnx_proto.TensorProto.FLOAT8E4M3FN: float8e4m3fn, } +ONNX_INT_TYPE_RANGE = { + onnx_proto.TensorProto.UINT8: (0, 255), + onnx_proto.TensorProto.INT8: (-128, 127), + onnx_proto.TensorProto.UINT16: (0, 65535), + onnx_proto.TensorProto.INT16: (-32768, 32767), +} + +ONNX_INT_TYPE_SYMMETRIC_RANGE = { + onnx_proto.TensorProto.INT8: (-127, 127), + onnx_proto.TensorProto.INT16: (-32767, 32767), +} + +ONNX_INT_TYPE_REDUCED_RANGE = { + onnx_proto.TensorProto.UINT8: (0, 127), + onnx_proto.TensorProto.INT8: (-64, 64), + onnx_proto.TensorProto.UINT16: (0, 32767), + onnx_proto.TensorProto.INT16: (-16384, 16384), +} + def quantize_nparray(qType, arr, scale, zero_point, low=None, high=None): - assert qType in ONNX_TYPE_TO_NP_TYPE, f"Unexpected data type {qType} requested. Only INT8 and UINT8 are supported." + assert ( + qType in ONNX_TYPE_TO_NP_TYPE + ), f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported." if qType in ( onnx_proto.TensorProto.FLOAT8E4M3FN, onnx_proto.TensorProto.FLOAT8E4M3FNUZ, @@ -146,8 +175,10 @@ def quantize_nparray(qType, arr, scale, zero_point, low=None, high=None): return ref.run(None, {"X": arr.astype(numpy.float32), "scale": scale.astype(numpy.float32)})[0] else: dtype = ONNX_TYPE_TO_NP_TYPE[qType] - cliplow = max(0 if dtype == numpy.uint8 else -127, -127 if low is None else low) - cliphigh = min(255 if dtype == numpy.uint8 else 127, 255 if high is None else high) + (qmin, qmax) = get_qmin_qmax_for_qType(qType, reduce_range=False, symmetric=True) + + cliplow = max(qmin, low) if low is not None else qmin + cliphigh = min(qmax, high) if high is not None else qmax arr_fp32 = numpy.asarray((arr.astype(numpy.float32) / scale).round() + zero_point) numpy.clip(arr_fp32, cliplow, cliphigh, out=arr_fp32) return arr_fp32.astype(dtype) @@ -267,7 +298,7 @@ def quantize_data(data, qType, symmetric, reduce_range=False): ) return rmin, rmax, zero_point, scale, quantized_data - if qType in (TensorProto.INT8, TensorProto.UINT8): + if qType in (TensorProto.INT8, TensorProto.UINT8, TensorProto.INT16, TensorProto.UINT16): if len(data): qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, symmetric=symmetric) zero_point, scale = compute_scale_zp(rmin, rmax, qmin, qmax, symmetric) @@ -283,18 +314,22 @@ def get_qmin_qmax_for_qType(qType, reduce_range=False, symmetric=False): # noqa :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8 :return: qmin, qmax """ - if qType == onnx_proto.TensorProto.UINT8: - (qmin, qmax) = (0, 127) if reduce_range else (0, 255) - elif qType == onnx_proto.TensorProto.INT8: - if symmetric: - (qmin, qmax) = (-64, 64) if reduce_range else (-127, 127) - else: - (qmin, qmax) = (-64, 64) if reduce_range else (-128, 127) - elif qType == onnx_proto.TensorProto.FLOAT8E4M3FN: + if qType == onnx_proto.TensorProto.FLOAT8E4M3FN: raise NotImplementedError("This function is not implemented for float 8 as not needed.") + + qrange = None + + if reduce_range: + qrange = ONNX_INT_TYPE_REDUCED_RANGE.get(qType) + elif symmetric and qType in ONNX_INT_TYPE_SYMMETRIC_RANGE: + qrange = ONNX_INT_TYPE_SYMMETRIC_RANGE[qType] else: - raise ValueError(f"Unexpected data type {qType} requested. Only INT8 and UINT8 are supported.") - return qmin, qmax + qrange = ONNX_INT_TYPE_RANGE.get(qType) + + if not qrange: + raise ValueError(f"Unexpected data type {qType} requested. Only INT8, UINT8, INT16, and UINT16 are supported.") + + return qrange def get_qrange_for_qType(qType, reduce_range=False, symmetric=False): # noqa: N802 diff --git a/onnxruntime/python/tools/quantization/quantize.py b/onnxruntime/python/tools/quantization/quantize.py index 6b1646aec9679..706047fe32400 100644 --- a/onnxruntime/python/tools/quantization/quantize.py +++ b/onnxruntime/python/tools/quantization/quantize.py @@ -240,6 +240,11 @@ def check_static_quant_arguments(quant_format: QuantFormat, activation_type: Qua f"weight_type={weight_type}!=QuantType.QFLOAT8E4M3FN" ) + q16_types = [QuantType.QInt16, QuantType.QUInt16] + + if (activation_type in q16_types or weight_type in q16_types) and quant_format != QuantFormat.QDQ: + raise ValueError("Only QuantFormat.QDQ supports 16-bit quantization types.") + if activation_type == QuantType.QInt8 and weight_type == QuantType.QInt8 and quant_format != QuantFormat.QDQ: logging.warning( "Please use QuantFormat.QDQ for activation type QInt8 and weight type QInt8. " @@ -356,6 +361,11 @@ def quantize_static( SmoothQuantFolding = True/False : Default is True. It only works if SmoothQuant is True. If enabled, inserted Mul ops during SmoothQuant will be folded into the previous op if the previous op is foldable. + UseQDQContribOps = True/False : + Default is False. If enabled, the inserted QuantizeLinear and DequantizeLinear ops will have the + `com.microsoft` domain, which forces use of ONNX Runtime's QuantizeLinear and DequantizeLinear + contrib op implementations. The contrib op implementations may support features not standardized + into the ONNX specification (e.g., 16-bit quantization types). """ if activation_type == QuantType.QFLOAT8E4M3FN or weight_type == QuantType.QFLOAT8E4M3FN: if calibrate_method != CalibrationMethod.Distribution: diff --git a/onnxruntime/test/contrib_ops/quantize_ops_test.cc b/onnxruntime/test/contrib_ops/quantize_ops_test.cc index af29f972a64cf..64a97ed4f945b 100644 --- a/onnxruntime/test/contrib_ops/quantize_ops_test.cc +++ b/onnxruntime/test/contrib_ops/quantize_ops_test.cc @@ -4,6 +4,7 @@ #include "gtest/gtest.h" #include "test/common/tensor_op_test_utils.h" #include "test/providers/provider_test_utils.h" +#include "test/util/include/default_providers.h" namespace onnxruntime { namespace test { @@ -40,7 +41,31 @@ TEST(DequantizeLinearOpTest, DequantizeLinear_per_tensor_float_int8) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } -// Scalar zero & scale with int32 +// Test int16 com.microsoft.DequantizeLinear (per tensor) +TEST(DequantizeLinearOpTest, DequantizeLinear_per_tensor_float_int16_cpu) { + OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{4}; + test.AddInput("x", dims, {-300, -30, -1025, 1270}); + test.AddInput("scale", {}, {2.0f}, true); + test.AddInput("zero_point", {}, {-1024}, true); + test.AddOutput("y", dims, {1448.0f, 1988.0f, -2.0f, 4588.0f}); + // Disable Tensorrt EP due to error: unsupported data type + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Test uint16 com.microsoft.DequantizeLinear (per tensor) +TEST(DequantizeLinearOpTest, DequantizeLinear_per_tensor_float_uint16_cpu) { + OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{4}; + test.AddInput("x", dims, {30000, 31000, 32768, 33000}); + test.AddInput("scale", {}, {2.0f}, true); + test.AddInput("zero_point", {}, {32767}, true); + test.AddOutput("y", dims, {-5534.0f, -3534.0f, 2.0f, 466.0f}); + // Disable Tensorrt EP due to error: unsupported data type + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Test int32 DequantizeLinear with scalar zero-point & scale. TEST(DequantizeLinearOpTest, DequantizeLinear_per_tensor_float_int32_cpu) { OpTester test("DequantizeLinear", 1, onnxruntime::kMSDomain); std::vector dims{4}; @@ -256,6 +281,60 @@ TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_int8) { test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); } +// Test uint16 com.microsoft.QuantizeLinear (per tensor) +TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_uint16) { + OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{12}; + test.AddInput("x", dims, { + 0.f, -128.f, 3.f, -3.f, // rounding half to even + 2.9f, -2.9f, // round < .5 + 3.1f, -3.1f, // round > .5 + 65536.f, -65534.f, // critical point + 70000.f, -70000.f // saturate case + }); + test.AddInput("scale", {}, {2.0f}, true); + test.AddInput("zero_point", {}, {32767}, true); + test.AddOutput("y", dims, + {32767, 32703, + 32769, 32765, + 32768, 32766, + 32769, 32765, + 65535, 0, + 65535, 0}); + + // Disable Tensorrt EP due to error: unsupported data type + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + +// Test int16 com.microsoft.QuantizeLinear (per tensor) +TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_float_int16) { + OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain); + std::vector dims{16}; + test.AddInput("x", dims, { + 0.f, -514.f, 3.f, -3.f, // rounding half to even + 2.9f, -2.9f, // round < .5 + 3.1f, -3.1f, // round > .5 + 65022.f, -66046.f, // critical point + 65023.f, -66047.f, // critical point + 65024.f, -66048.f, // critical point + 70000.f, -70000.f // saturate case + }); + test.AddInput("scale", {}, {2.0f}, true); + test.AddInput("zero_point", {}, {256}, true); + test.AddOutput("y", dims, + {256, -1, + 258, 254, + 257, 255, + 258, 254, + 32767, -32767, + 32767, -32768, + 32767, -32768, + 32767, -32768}); + + // Disable Tensorrt EP due to error: unsupported data type + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); +} + #ifdef USE_CUDA TEST(QuantizeLinearContribOpTest, QuantizeLinear_per_tensor_half_uint8) { OpTester test("QuantizeLinear", 1, onnxruntime::kMSDomain); diff --git a/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp b/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp index 55d1a2f4f3608..2832598fef1a9 100644 --- a/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp +++ b/onnxruntime/test/mlas/unittest/test_quantizelinear.cpp @@ -3,26 +3,26 @@ #include "test_util.h" -template +template class MlasQuantizeLinearTest : public MlasTestBase { private: MatrixGuardBuffer BufferInput; - MatrixGuardBuffer BufferOutput; - MatrixGuardBuffer BufferOutputReference; + MatrixGuardBuffer BufferOutput; + MatrixGuardBuffer BufferOutputReference; - void GenerateReference(const float* Input, xint8_t* OutputReference, size_t N, float Scale, xint8_t ZeroPoint) { + void GenerateReference(const float* Input, QuantInt* OutputReference, size_t N, float Scale, QuantInt ZeroPoint) { for (size_t n = 0; n < N; n++) { float FloatValue = std::nearbyintf(Input[n] / Scale) + float(ZeroPoint); - FloatValue = std::max(FloatValue, float(std::numeric_limits::min())); - FloatValue = std::min(FloatValue, float(std::numeric_limits::max())); - OutputReference[n] = (xint8_t)FloatValue; + FloatValue = std::max(FloatValue, static_cast(std::numeric_limits::min())); + FloatValue = std::min(FloatValue, static_cast(std::numeric_limits::max())); + OutputReference[n] = static_cast(FloatValue); } } void Test(size_t N) { float* Input = BufferInput.GetBuffer(N); - xint8_t* Output = BufferOutput.GetBuffer(N); - xint8_t* OutputReference = BufferOutputReference.GetBuffer(N); + QuantInt* Output = BufferOutput.GetBuffer(N); + QuantInt* OutputReference = BufferOutputReference.GetBuffer(N); std::default_random_engine generator(static_cast(N)); @@ -34,8 +34,9 @@ class MlasQuantizeLinearTest : public MlasTestBase { float Scale = (MaximumValue - MinimumValue) / 512.f; - std::uniform_int_distribution zp_distribution(std::numeric_limits::min(), std::numeric_limits::max()); - xint8_t ZeroPoint = static_cast(zp_distribution(generator)); + std::uniform_int_distribution zp_distribution(std::numeric_limits::min(), + std::numeric_limits::max()); + QuantInt ZeroPoint = static_cast(zp_distribution(generator)); std::uniform_real_distribution distribution(MinimumValue, MaximumValue); for (size_t n = 0; n < N; n++) { @@ -52,8 +53,15 @@ class MlasQuantizeLinearTest : public MlasTestBase { public: static const char* GetTestSuiteName() { - static const std::string suite_name(std::is_signed::value ? "QuantizeLinearS8" : "QuantizeLinearU8"); - return suite_name.c_str(); + if constexpr (std::is_same_v) { + return "QuantizeLinearS8"; + } else if (std::is_same_v) { + return "QuantizeLinearU8"; + } else if (std::is_same_v) { + return "QuantizeLinearS16"; + } else { + return "QuantizeLinearU16"; + } } void ExecuteShort(void) override { @@ -67,12 +75,18 @@ template <> MlasQuantizeLinearTest* MlasTestFixture>::mlas_tester(nullptr); template <> MlasQuantizeLinearTest* MlasTestFixture>::mlas_tester(nullptr); +template <> +MlasQuantizeLinearTest* MlasTestFixture>::mlas_tester(nullptr); +template <> +MlasQuantizeLinearTest* MlasTestFixture>::mlas_tester(nullptr); static UNUSED_VARIABLE bool added_to_main = AddTestRegister([](bool is_short_execute) { size_t count = 0; if (is_short_execute) { count += MlasDirectShortExecuteTests>::RegisterShortExecute(); count += MlasDirectShortExecuteTests>::RegisterShortExecute(); + count += MlasDirectShortExecuteTests>::RegisterShortExecute(); + count += MlasDirectShortExecuteTests>::RegisterShortExecute(); } return count; }); diff --git a/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc b/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc index d0ce4898a472c..feff607703341 100644 --- a/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc +++ b/onnxruntime/test/optimizer/ensure_unique_dq_for_node_unit_test.cc @@ -20,15 +20,17 @@ struct GraphConfig { bool has_subgraph_consumer{false}; }; -auto GetGraphBuilder(const GraphConfig& config, bool use_ms_domain_qdq_ops) { +template +std::function GetGraphBuilder(const GraphConfig& config, bool use_ms_domain_qdq_ops) { return [config, use_ms_domain_qdq_ops](ModelTestBuilder& builder) { const auto input_shape = std::vector{1, 2, 4}; constexpr float scale = 0.5f; - constexpr uint8_t zero_point = 0; + constexpr QuantType zero_point = 0; - auto* dq_input = builder.MakeInput(input_shape, uint8_t{0}, uint8_t{255}); + auto* dq_input = builder.MakeInput(input_shape, std::numeric_limits::min(), + std::numeric_limits::max()); auto* dq_output = config.has_graph_output ? builder.MakeOutput() : builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(dq_input, scale, zero_point, dq_output, use_ms_domain_qdq_ops); + builder.AddDequantizeLinearNode(dq_input, scale, zero_point, dq_output, use_ms_domain_qdq_ops); for (size_t i = 0; i < config.num_explicit_consumer_nodes; ++i) { // use Concat for the explicit consumer node as it supports a variadic number of inputs @@ -71,10 +73,12 @@ auto GetGraphBuilder(const GraphConfig& config, bool use_ms_domain_qdq_ops) { } void RunEnsureUniqueDQForNodeUnitTest(const GraphConfig& config, int expected_dq_count) { - auto run_tests = [config, expected_dq_count](bool use_ms_domain_qdq_ops) { + auto run_tests = [config, expected_dq_count](bool use_ms_domain_qdq_ops, bool use_16bit_qdq_ops) { constexpr int opset_version = 12; const char* dequantize_linear_key = use_ms_domain_qdq_ops ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; - std::function graph_builder_fn = GetGraphBuilder(config, use_ms_domain_qdq_ops); + std::function graph_builder_fn = use_16bit_qdq_ops + ? GetGraphBuilder(config, use_ms_domain_qdq_ops) + : GetGraphBuilder(config, use_ms_domain_qdq_ops); { SCOPED_TRACE("test with standalone transformer"); @@ -117,9 +121,10 @@ void RunEnsureUniqueDQForNodeUnitTest(const GraphConfig& config, int expected_dq } }; - run_tests(false); + run_tests(false, false); #if !defined(DISABLE_CONTRIB_OPS) - run_tests(true); // Use contrib QDQ ops. + run_tests(true, false); // Use contrib QDQ ops. + run_tests(true, true); // Use 16-bit contrib QDQ ops. #endif } diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 553fcca92aa78..dce1f2d40e8b9 100755 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -83,6 +83,7 @@ #include "test/util/include/test_utils.h" #include "core/optimizer/pre_shape_node_elimination.h" #include "core/optimizer/double_qdq_pairs_remover.h" +#include "core/optimizer/qdq_transformer/qdq_util.h" #ifdef ENABLE_TRAINING #include "orttraining/core/optimizer/bitmask_dropout_replacement.h" #endif @@ -155,44 +156,43 @@ TEST_F(GraphTransformationTests, IdentityWithSharedNodeArgNotEliminated) { ASSERT_TRUE(op_to_count["Add"] == 1); } +// Runs a model to ensure that common subexpression elimination does not eliminate +// DequantizeLinear nodes. TEST_F(GraphTransformationTests, DequantizeLinearNodeNotEliminated) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "qdq_with_multi_consumer_dq_nodes.fixed.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["DequantizeLinear"], 25); + auto test_case = [](const ORTCHAR_T* model_uri, + bool use_contrib_qdq, + const logging::Logger& logger) { + const char* dq_key = use_contrib_qdq ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; + std::shared_ptr model; + ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, logger)); + Graph& graph = model->MainGraph(); + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_EQ(op_to_count[dq_key], 25); - onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; - ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), - TransformerLevel::Level1)); - ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_)); + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), + TransformerLevel::Level1)); + ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, logger)); - // CommonSubexpressionElimination should skip the DequantizeLinear nodes - op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["DequantizeLinear"], 25); -} + // CommonSubexpressionElimination should skip the DequantizeLinear nodes + op_to_count = CountOpsInGraph(graph); + ASSERT_EQ(op_to_count[dq_key], 25); + }; + test_case(MODEL_FOLDER "qdq_with_multi_consumer_dq_nodes.fixed.onnx", + false, // use_contrib_qdq + *logger_); #if !defined(DISABLE_CONTRIB_OPS) -// Test that com.microsoft.DequantizeLinear is not eliminated in CommonSubexpressionElimination -TEST_F(GraphTransformationTests, MsDomainDequantizeLinearNodeNotEliminated) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "qdq_with_multi_consumer_dq_nodes.fixed.qdq_contrib.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["com.microsoft.DequantizeLinear"], 25); - - onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; - ASSERT_STATUS_OK(graph_transformation_mgr.Register(std::make_unique(), - TransformerLevel::Level1)); - ASSERT_STATUS_OK(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1, *logger_)); - - // CommonSubexpressionElimination should skip the DequantizeLinear nodes - op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["com.microsoft.DequantizeLinear"], 25); -} + // Test with 8-bit com.microsoft.DequantizeLinear + test_case(MODEL_FOLDER "qdq_with_multi_consumer_dq_nodes.fixed.qdq_contrib.onnx", + true, // use_contrib_qdq + *logger_); + // Test with 16-bit com.microsoft.DequantizeLinear + test_case(MODEL_FOLDER "qdq_with_multi_consumer_dq_nodes.fixed.qdq16_contrib.onnx", + true, // use_contrib_qdq + *logger_); #endif // !defined(DISABLE_CONTRIB_OPS) +} TEST_F(GraphTransformationTests, IdentityInputIsGraphOutputNotEliminated) { constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "scan9_sum.onnx"; @@ -836,158 +836,120 @@ static void VerifyConstantFoldingWithDequantizeLinear(const std::unordered_map model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_TRUE(op_to_count["QuantizeLinear"] == 1); - ASSERT_TRUE(op_to_count["DequantizeLinear"] == 3); - ASSERT_TRUE(op_to_count["Conv"] == 1); - - std::unordered_map expected_op_counts = {{"QuantizeLinear", 1}, - {"DequantizeLinear", 3}, - {"Conv", 1}}; - - SessionOptions session_options; - // Check DequantizeLinear aren't constant folded for default setting. - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); - - // set kOrtSessionOptionsDisableQuantQDQ to enable it explicitly - ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "0")); - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); + auto test_case = [](const ORTCHAR_T* model_uri, + bool use_contrib_qdq, + const logging::Logger& logger) { + const char* q_key = use_contrib_qdq ? "com.microsoft.QuantizeLinear" : "QuantizeLinear"; + const char* dq_key = use_contrib_qdq ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; - // set SessionOptionsEnableQuantQDQ to disable it - expected_op_counts["DequantizeLinear"] = 1; - ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "1")); - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); -} + std::shared_ptr model; + ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, logger)); + Graph& graph = model->MainGraph(); + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count[q_key] == 1); + ASSERT_TRUE(op_to_count[dq_key] == 3); + ASSERT_TRUE(op_to_count["Conv"] == 1); -#if !defined(DISABLE_CONTRIB_OPS) -// Test constant folding with a com.microsoft.DequantizeLinear node -TEST_F(GraphTransformationTests, ConstantFoldingWithMsDomainDequantizeLinear) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/constant_folding_dequantizelinear.qdq_contrib.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["com.microsoft.QuantizeLinear"], 1); - ASSERT_EQ(op_to_count["com.microsoft.DequantizeLinear"], 3); - ASSERT_EQ(op_to_count["Conv"], 1); + std::unordered_map expected_op_counts = {{q_key, 1}, + {dq_key, 3}, + {"Conv", 1}}; - std::unordered_map expected_op_counts = {{"com.microsoft.QuantizeLinear", 1}, - {"com.microsoft.DequantizeLinear", 3}, - {"Conv", 1}}; + SessionOptions session_options; + // Check DequantizeLinear aren't constant folded for default setting. + VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, logger); - SessionOptions session_options; - // Check DequantizeLinear aren't constant folded for default setting. - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); + // set kOrtSessionOptionsDisableQuantQDQ to enable it explicitly + ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "0")); + VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, logger); - // set kOrtSessionOptionsDisableQuantQDQ to enable it explicitly - ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "0")); - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); + // set SessionOptionsEnableQuantQDQ to disable it + expected_op_counts[dq_key] = 1; + ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "1")); + VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, logger); + }; - // set SessionOptionsEnableQuantQDQ to disable it - expected_op_counts["com.microsoft.DequantizeLinear"] = 1; - ASSERT_STATUS_OK(session_options.config_options.AddConfigEntry(kOrtSessionOptionsDisableQuantQDQ, "1")); - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); -} + test_case(MODEL_FOLDER "fusion/constant_folding_dequantizelinear.onnx", + false, *logger_); +#if !defined(DISABLE_CONTRIB_OPS) + // Test with 8-bit contrib QDQ ops + test_case(MODEL_FOLDER "fusion/constant_folding_dequantizelinear.qdq_contrib.onnx", + true, *logger_); + // Test with 16-bit contrib QDQ ops + test_case(MODEL_FOLDER "fusion/constant_folding_dequantizelinear.qdq16_contrib.onnx", + true, *logger_); #endif // !defined(DISABLE_CONTRIB_OPS) +} // model with 2 QDQ node units that can be constant folded as they are simple DQ -> Node -> Q where DQ and Node have // single consumer and do not produce graph outputs. Node is deterministic. // there are also other DQ nodes that should be ignored. TEST_F(GraphTransformationTests, ConstantFoldingQDQNodeUnit) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_TRUE(op_to_count["QuantizeLinear"] == 3); - ASSERT_TRUE(op_to_count["DequantizeLinear"] == 4); - ASSERT_TRUE(op_to_count["Unsqueeze"] == 1); - ASSERT_TRUE(op_to_count["Transpose"] == 1); + auto test_case = [](const ORTCHAR_T* model_uri, bool use_contrib_qdq, const logging::Logger& logger) { + const char* q_key = use_contrib_qdq ? "com.microsoft.QuantizeLinear" : "QuantizeLinear"; + const char* dq_key = use_contrib_qdq ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; - SessionOptions session_options; - - // 2 QDQ node units should be constant folded and go away - std::unordered_map expected_op_counts = {{"QuantizeLinear", 1}, - {"DequantizeLinear", 2}, - {"Transpose", 0}, - {"Unsqueeze", 0}}; - - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); -} + std::shared_ptr model; + ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, logger)); + Graph& graph = model->MainGraph(); + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count[q_key] == 3); + ASSERT_TRUE(op_to_count[dq_key] == 4); + ASSERT_TRUE(op_to_count["Unsqueeze"] == 1); + ASSERT_TRUE(op_to_count["Transpose"] == 1); -#if !defined(DISABLE_CONTRIB_OPS) -// model with 2 (com.microsoft) QDQ node units that can be constant folded as they are simple DQ -> Node -> Q where -// DQ and Node have single consumer and do not produce graph outputs. Node is deterministic. -// there are also other DQ nodes that should be ignored. -TEST_F(GraphTransformationTests, ConstantFoldingMsDomainQDQNodeUnit) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.qdq_contrib.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["com.microsoft.QuantizeLinear"], 3); - ASSERT_EQ(op_to_count["com.microsoft.DequantizeLinear"], 4); - ASSERT_EQ(op_to_count["Unsqueeze"], 1); - ASSERT_EQ(op_to_count["Transpose"], 1); + SessionOptions session_options; - SessionOptions session_options; + // 2 QDQ node units should be constant folded and go away + std::unordered_map expected_op_counts = {{q_key, 1}, + {dq_key, 2}, + {"Transpose", 0}, + {"Unsqueeze", 0}}; - // 2 QDQ node units should be constant folded and go away - std::unordered_map expected_op_counts = {{"com.microsoft.QuantizeLinear", 1}, - {"com.microsoft.DequantizeLinear", 2}, - {"Transpose", 0}, - {"Unsqueeze", 0}}; + VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, logger); + }; - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); -} + test_case(MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.onnx", false, *logger_); +#if !defined(DISABLE_CONTRIB_OPS) + // Test with 8-bit com.microsoft.Q/DQ + test_case(MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.qdq_contrib.onnx", true, *logger_); + // Test with 16-bit com.microsoft.Q/DQ + test_case(MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.qdq16_contrib.onnx", true, *logger_); #endif // !defined(DISABLE_CONTRIB_OPS) +} // Simple QDQ Node Unit but shouldn't be constant folded as the node in the middle produces a graph output TEST_F(GraphTransformationTests, ConstantFoldingQDQNodeUnitGraphOutput) { - constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.graph_output.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_TRUE(op_to_count["QuantizeLinear"] == 2); - ASSERT_TRUE(op_to_count["DequantizeLinear"] == 3); - ASSERT_TRUE(op_to_count["Unsqueeze"] == 1); + auto test_case = [](const ORTCHAR_T* model_uri, bool use_contrib_qdq, const logging::Logger& logger) { + const char* q_key = use_contrib_qdq ? "com.microsoft.QuantizeLinear" : "QuantizeLinear"; + const char* dq_key = use_contrib_qdq ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; - std::unordered_map expected_op_counts = {{"QuantizeLinear", 2}, - {"DequantizeLinear", 3}, - {"Unsqueeze", 1}}; + std::shared_ptr model; + ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, logger)); + Graph& graph = model->MainGraph(); + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count[q_key] == 2); + ASSERT_TRUE(op_to_count[dq_key] == 3); + ASSERT_TRUE(op_to_count["Unsqueeze"] == 1); - SessionOptions session_options; - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); -} + std::unordered_map expected_op_counts = {{q_key, 2}, + {dq_key, 3}, + {"Unsqueeze", 1}}; -#if !defined(DISABLE_CONTRIB_OPS) -// Simple (com.microsoft) QDQ Node Unit but shouldn't be constant folded as the node in the middle produces a -// graph output -TEST_F(GraphTransformationTests, ConstantFoldingMsDomainQDQNodeUnitGraphOutput) { - constexpr const ORTCHAR_T* model_uri = - MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.graph_output.qdq_contrib.onnx"; - std::shared_ptr model; - ASSERT_STATUS_OK(Model::Load(model_uri, model, nullptr, *logger_)); - Graph& graph = model->MainGraph(); - std::map op_to_count = CountOpsInGraph(graph); - ASSERT_EQ(op_to_count["com.microsoft.QuantizeLinear"], 2); - ASSERT_EQ(op_to_count["com.microsoft.DequantizeLinear"], 3); - ASSERT_EQ(op_to_count["Unsqueeze"], 1); + SessionOptions session_options; + VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, logger); + }; - std::unordered_map expected_op_counts = {{"com.microsoft.QuantizeLinear", 2}, - {"com.microsoft.DequantizeLinear", 3}, - {"Unsqueeze", 1}}; + test_case(MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.graph_output.onnx", false, *logger_); +#if !defined(DISABLE_CONTRIB_OPS) + // Test with 8-bit contrib QDQ ops + test_case(MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.graph_output.qdq_contrib.onnx", true, *logger_); - SessionOptions session_options; - VerifyConstantFoldingWithDequantizeLinear(expected_op_counts, graph, session_options, *logger_); -} + // Test with 16-bit contrib QDQ ops + test_case(MODEL_FOLDER "fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx", true, *logger_); #endif // !defined(DISABLE_CONTRIB_OPS) +} TEST_F(GraphTransformationTests, ConstantFolding_RemoveDanglingInputNodesToConstantFoldedNode) { constexpr const ORTCHAR_T* model_uri = MODEL_FOLDER "fusion/constant_folding_remove_dangling_inputs.onnx"; @@ -3898,12 +3860,12 @@ TEST_F(GraphTransformationTests, DoublQDQRemover_RemoveDupQDQ) { std::string zp_name_after_reshape_node; for (auto& node : graph.Nodes()) { if (node.Name() == "dq_2") { - dq_scale_name_before_reshape_node = node.InputDefs()[InputIndex::SCALE_ID]->Name(); - zp_name_before_reshape_node = node.InputDefs()[InputIndex::ZERO_POINT_ID]->Name(); + dq_scale_name_before_reshape_node = node.InputDefs()[QDQ::InputIndex::SCALE_ID]->Name(); + zp_name_before_reshape_node = node.InputDefs()[QDQ::InputIndex::ZERO_POINT_ID]->Name(); } if (node.Name() == "q_3") { - dq_scale_name_after_reshape_node = node.InputDefs()[InputIndex::SCALE_ID]->Name(); - zp_name_after_reshape_node = node.InputDefs()[InputIndex::ZERO_POINT_ID]->Name(); + dq_scale_name_after_reshape_node = node.InputDefs()[QDQ::InputIndex::SCALE_ID]->Name(); + zp_name_after_reshape_node = node.InputDefs()[QDQ::InputIndex::ZERO_POINT_ID]->Name(); } } EXPECT_EQ(dq_scale_name_before_reshape_node.empty(), false); diff --git a/onnxruntime/test/optimizer/graph_transform_test_builder.h b/onnxruntime/test/optimizer/graph_transform_test_builder.h index 743faee3ee2a5..63577131480c6 100644 --- a/onnxruntime/test/optimizer/graph_transform_test_builder.h +++ b/onnxruntime/test/optimizer/graph_transform_test_builder.h @@ -39,9 +39,21 @@ namespace test { template struct IsTypeQuantLinearCompatible : utils::IsByteType {}; +template <> +struct IsTypeQuantLinearCompatible : std::true_type {}; + +template <> +struct IsTypeQuantLinearCompatible : std::true_type {}; + template struct IsTypeDequantLinearCompatible : utils::IsByteType {}; +template <> +struct IsTypeDequantLinearCompatible : std::true_type {}; + +template <> +struct IsTypeDequantLinearCompatible : std::true_type {}; + template <> struct IsTypeDequantLinearCompatible : std::true_type {}; diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 0dfeb599d0ae3..a438a61cb9b36 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -891,37 +891,139 @@ TEST(QDQTransformerTests, Gemm_S8S8U8) { QDQTransformerGemmTests(); } +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Gather -> Q. +template +static void RunGatherDropQDQTestCase(const std::vector& input1_shape, + const std::vector& weights_shape, + bool use_contrib_qdq = false) { + auto build_test_case = [input1_shape, weights_shape, use_contrib_qdq](ModelTestBuilder& builder) { + auto* input1_arg = builder.MakeInput(input1_shape, 0, weights_shape[0] - 1); + auto* output_arg = builder.MakeOutput(); + + // add Gather + auto* weight = builder.MakeInitializer(weights_shape, std::numeric_limits::min(), + std::numeric_limits::max()); + auto* dq_w_output = builder.MakeIntermediate(); + auto* gather_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(weight, .003f, 1, dq_w_output, use_contrib_qdq); + builder.AddNode("Gather", {dq_w_output, input1_arg}, {gather_output}); + + // add Q + builder.AddQuantizeLinearNode(gather_output, .003f, 1, output_arg, use_contrib_qdq); + }; + + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Gather"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2); +} + +// Checks that Q/DQ nodes are dropped from DQ -> Gather -> Q. Uses 8-bit and 16-bit Q/DQ ops. TEST(QDQTransformerTests, Gather) { - auto test_case = [&](const std::vector& input1_shape, const std::vector& weights_shape, - bool use_contrib_qdq = false) { - auto build_test_case = [&](ModelTestBuilder& builder) { - auto* input1_arg = builder.MakeInput(input1_shape, 0, weights_shape[0] - 1); - auto* output_arg = builder.MakeOutput(); + RunGatherDropQDQTestCase({12, 37}, {24, 12}); + RunGatherDropQDQTestCase({12, 37}, {24, 12}, true); // Use com.microsoft QDQ ops + RunGatherDropQDQTestCase({12, 37}, {24, 12}, true); // Use int16 com.microsoft QDQ ops +} - // add Gather - auto* weight = builder.MakeInitializer(weights_shape, -128, 127); - auto* dq_w_output = builder.MakeIntermediate(); - auto* gather_output = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(weight, .003f, 1, dq_w_output, use_contrib_qdq); - builder.AddNode("Gather", {dq_w_output, input1_arg}, {gather_output}); +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> Reshape -> Q. +template +static void RunReshapeDropQDQTestCase(const std::vector& input_shape, + const std::vector& new_shape, + bool use_contrib_qdq = false) { + auto build_test_case = [input_shape, new_shape, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + // Add Reshape node + auto* new_shape_arg = builder.Make1DInitializer(new_shape); + auto* input_arg_dq = builder.MakeIntermediate(); + auto* reshape_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode("Reshape", {input_arg_dq, new_shape_arg}, {reshape_output}); + + // add Q + builder.AddQuantizeLinearNode(reshape_output, .003f, zero_point, output_arg, use_contrib_qdq); + }; - // add Q - builder.AddQuantizeLinearNode(gather_output, .003f, 1, output_arg, use_contrib_qdq); - }; + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Reshape"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; - auto check_graph = [&](InferenceSessionWrapper& session) { - auto op_to_count = CountOpsInGraph(session.GetGraph()); - const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - EXPECT_EQ(op_to_count["Gather"], 1); - EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); - EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); - }; + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2); +} - TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2); +// Checks that Q/DQ nodes are dropped from DQ -> Reshape -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, ReshapeDropQDQ) { + RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}); + RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}, true); // Use com.microsoft QDQ ops + RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}, true); // Use int16 com.microsoft QDQ ops + RunReshapeDropQDQTestCase({1, 3, 2, 2}, {1, 12}, true); // Use int16 com.microsoft QDQ ops +} + +// Runs a test case that checks if Q/DQ nodes are dropped from DQ -> (Un)Squeeze -> Q. +template +static void RunSqueezeUnsqueezeDropQDQTestCase(const std::string& squeeze_type, + const std::vector& input_shape, + const std::vector& axes, + bool use_contrib_qdq = false) { + auto build_test_case = [squeeze_type, input_shape, axes, use_contrib_qdq](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); + + auto* input_arg = builder.MakeInput(input_shape, qmin, qmax); + auto* output_arg = builder.MakeOutput(); + QuantType zero_point = 1 + (qmax + qmin) / 2; + + // Add Squeeze node + auto* axes_arg = builder.Make1DInitializer(axes); + auto* input_arg_dq = builder.MakeIntermediate(); + auto* xsqueeze_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, zero_point, input_arg_dq, use_contrib_qdq); + builder.AddNode(squeeze_type, {input_arg_dq, axes_arg}, {xsqueeze_output}); + + // add Q + builder.AddQuantizeLinearNode(xsqueeze_output, .003f, zero_point, output_arg, use_contrib_qdq); }; - test_case({12, 37}, {24, 12}); - test_case({12, 37}, {24, 12}, true); // Use com.microsoft QDQ ops + auto check_graph = [squeeze_type, use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count[squeeze_type], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2, + 13 /* opset_version */); +} + +// Checks that Q/DQ nodes are dropped from DQ -> Squeeze -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, SqueezeDropQDQ) { + RunSqueezeUnsqueezeDropQDQTestCase("Squeeze", {1, 3, 2, 2}, {0}); + RunSqueezeUnsqueezeDropQDQTestCase("Squeeze", {1, 3, 2, 2}, {0}, true); // Use MS domain QDQ ops + RunSqueezeUnsqueezeDropQDQTestCase("Squeeze", {1, 3, 2, 2}, {0}, true); // Use int16 MS domain QDQ ops + RunSqueezeUnsqueezeDropQDQTestCase("Squeeze", {1, 3, 2, 2}, {0}, true); // Use int16 MS domain QDQ ops +} + +// Checks that Q/DQ nodes are dropped from DQ -> Unsqueeze -> Q. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, UnsqueezeDropQDQ) { + RunSqueezeUnsqueezeDropQDQTestCase("Unsqueeze", {1, 3, 2, 2}, {0}); + RunSqueezeUnsqueezeDropQDQTestCase("Unsqueeze", {1, 3, 2, 2}, {0}, true); // Use MS domain QDQ ops + RunSqueezeUnsqueezeDropQDQTestCase("Unsqueeze", {1, 3, 2, 2}, {0}, true); // Use int16 MS domain QDQ ops + RunSqueezeUnsqueezeDropQDQTestCase("Unsqueeze", {1, 3, 2, 2}, {0}, true); // Use int16 MS domain QDQ ops } TEST(QDQTransformerTests, DoubleQDQ) { @@ -1066,52 +1168,69 @@ TEST(QDQTransformerTests, DoubleQDQ) { bad_float_point, good_float_point_2, true); // Use com.microsoft QDQ ops } -TEST(QDQTransformerTests, DoubleQDQ_Without_Last_Node_Being_Output) { - auto test_case = [&](int output_index, int expected_Q_count, int expected_DQ_count, - bool use_contrib_qdq = false) { - auto graph = [&](InferenceSessionWrapper& session) { - auto op_to_count = CountOpsInGraph(session.GetGraph()); - const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], expected_Q_count); - EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], expected_DQ_count); - }; - TransformerTester( - BuildDoubleQDQWithoutLastOutput(output_index, use_contrib_qdq), - graph, - TransformerLevel::Default, - TransformerLevel::Level1); +template +static void RunDoubleQDQWithoutLastNodeBeingOutput(int output_index, int expected_Q_count, int expected_DQ_count, + bool use_contrib_qdq = false) { + auto graph = [&](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], expected_Q_count); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], expected_DQ_count); }; + TransformerTester( + BuildDoubleQDQWithoutLastOutput(output_index, use_contrib_qdq), + graph, + TransformerLevel::Default, + TransformerLevel::Level1); +} + +TEST(QDQTransformerTests, DoubleQDQ_Without_Last_Node_Being_Output) { constexpr bool use_contrib_qdq = true; // For readability. - test_case(0, 2, 2); - test_case(0, 2, 2, use_contrib_qdq); - test_case(1, 2, 3); // EnsureUniqueDQForNodeUnit will duplicate first DQ, so expect one more (3) - test_case(1, 2, 3, use_contrib_qdq); // EnsureUniqueDQForNodeUnit will duplicate first DQ, so expect one more (3) - test_case(2, 2, 2); - test_case(2, 2, 2, use_contrib_qdq); - test_case(3, 1, 1); - test_case(3, 1, 1, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(0, 2, 2); + RunDoubleQDQWithoutLastNodeBeingOutput(0, 2, 2, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(0, 2, 2, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(0, 2, 2, use_contrib_qdq); + + // EnsureUniqueDQForNodeUnit will duplicate first DQ, so expected one more (3) + RunDoubleQDQWithoutLastNodeBeingOutput(1, 2, 3); + RunDoubleQDQWithoutLastNodeBeingOutput(1, 2, 3, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(1, 2, 3, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(1, 2, 3, use_contrib_qdq); + + RunDoubleQDQWithoutLastNodeBeingOutput(2, 2, 2); + RunDoubleQDQWithoutLastNodeBeingOutput(2, 2, 2, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(2, 2, 2, use_contrib_qdq); + + RunDoubleQDQWithoutLastNodeBeingOutput(3, 1, 1); + RunDoubleQDQWithoutLastNodeBeingOutput(3, 1, 1, use_contrib_qdq); + RunDoubleQDQWithoutLastNodeBeingOutput(3, 1, 1, use_contrib_qdq); +} + +// Runs a test that checks if DQ -> Split -> Q (many) is replaced with just Split. +template +static void RunDropSplitQDQTestCase(const std::vector& input_shape, int64_t axis, + bool use_contrib_qdq = false) { + auto check_graph = [use_contrib_qdq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Split"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); + }; + TransformerTester(BuildQDQSplitTestCase(input_shape, axis, use_contrib_qdq), + check_graph, + TransformerLevel::Level1, + TransformerLevel::Level2, + {12, 18, 19}); } -// Because split isn't one the supported ops, this will stay the same +// Test that DQ -> Split -> Q (many) is replaced with just Split for various quantization types. TEST(QDQTransformerTests, Split) { - auto test_case = [&](const std::vector& input_shape, const int64_t& axis, - bool use_contrib_qdq = false) { - auto check_graph = [&](InferenceSessionWrapper& session) { - auto op_to_count = CountOpsInGraph(session.GetGraph()); - const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - EXPECT_EQ(op_to_count["Split"], 1); - EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); - EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); - }; - TransformerTester(BuildQDQSplitTestCase(input_shape, axis, use_contrib_qdq), - check_graph, - TransformerLevel::Level1, - TransformerLevel::Level2, - {12, 18, 19}); - }; - test_case({6, 18, 54}, 0); - test_case({6, 18, 54}, 0, true); // Use com.microsoft QDQ ops + RunDropSplitQDQTestCase({6, 18, 54}, 0); + RunDropSplitQDQTestCase({6, 18, 54}, 0, true); // Use com.microsoft int8 QDQ ops + RunDropSplitQDQTestCase({6, 18, 54}, 0, true); // Use com.microsoft int16 QDQ ops + RunDropSplitQDQTestCase({6, 18, 54}, 0, true); // Use com.microsoft uint16 QDQ ops } // Because split isn't one the supported ops, this will stay the same @@ -1174,59 +1293,66 @@ TEST(QDQTransformerTests, Where) { test_case({1}, {1}, {1}, true /*use_contrib_qdq*/); } -TEST(QDQTransformerTests, Transpose) { - auto test_case = [&](const std::vector& input_shape, const std::vector& perms, - bool use_contrib_qdq = false) { - auto check_graph = [&](InferenceSessionWrapper& session) { - auto op_to_count = CountOpsInGraph(session.GetGraph()); - const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - EXPECT_EQ(op_to_count["Transpose"], 1); - EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); - EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); - }; - - TransformerTester(BuildQDQTransposeTestCase(input_shape, perms, use_contrib_qdq), - check_graph, - TransformerLevel::Level1, - TransformerLevel::Level2); +template +static void RunDropQDQTransposeTestCase(const std::vector& input_shape, const std::vector& perms, + bool use_contrib_qdq = false) { + auto check_graph = [&](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["Transpose"], 1); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 0); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); }; - test_case({2, 13, 12, 37}, {0, 3, 1, 2}); - test_case({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); + TransformerTester(BuildQDQTransposeTestCase(input_shape, perms, use_contrib_qdq), + check_graph, + TransformerLevel::Level1, + TransformerLevel::Level2); } -TEST(QDQTransformerTests, Transpose_No_Fusion) { - auto test_case = [&](const std::vector& input1_shape, const std::vector& perms, - bool use_contrib_qdq = false) { - auto build_test_case = [&](ModelTestBuilder& builder) { - auto* input1_arg = builder.MakeInput(input1_shape, -128, 127); - auto* output_arg = builder.MakeOutput(); - - // add DQ - auto* dq_output = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input1_arg, .003f, 1, dq_output, use_contrib_qdq); - - // add Transpose - auto* transpose_output = builder.MakeOutput(); // transpose output is graph output - Node& transpose_node = builder.AddNode("Transpose", {dq_output}, {transpose_output}); - transpose_node.AddAttribute("perm", perms); - - // add Q - builder.AddQuantizeLinearNode(transpose_output, .003f, 1, output_arg, use_contrib_qdq); - }; - - auto check_graph = [&](InferenceSessionWrapper& session) { - auto op_to_count = CountOpsInGraph(session.GetGraph()); - const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 1); - EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 1); - }; +TEST(QDQTransformerTests, Transpose) { + RunDropQDQTransposeTestCase({2, 13, 12, 37}, {0, 3, 1, 2}); + RunDropQDQTransposeTestCase({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); + RunDropQDQTransposeTestCase({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); + RunDropQDQTransposeTestCase({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); +} + +template +static void RunQDQTransposeNoFusionTestCase(const std::vector& input1_shape, const std::vector& perms, + bool use_contrib_qdq = false) { + auto build_test_case = [&](ModelTestBuilder& builder) { + auto* input1_arg = builder.MakeInput(input1_shape, std::numeric_limits::min(), + std::numeric_limits::max()); + auto* output_arg = builder.MakeOutput(); + + // add DQ + auto* dq_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input1_arg, .003f, 1, dq_output, use_contrib_qdq); + + // add Transpose + auto* transpose_output = builder.MakeOutput(); // transpose output is graph output + Node& transpose_node = builder.AddNode("Transpose", {dq_output}, {transpose_output}); + transpose_node.AddAttribute("perm", perms); + + // add Q + builder.AddQuantizeLinearNode(transpose_output, .003f, 1, output_arg, use_contrib_qdq); + }; - TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2); + auto check_graph = [&](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count[qdq_keys.quantize_linear], 1); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 1); }; - test_case({2, 13, 12, 37}, {0, 3, 1, 2}); - test_case({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); + TransformerTester(build_test_case, check_graph, TransformerLevel::Level1, TransformerLevel::Level2); +} + +TEST(QDQTransformerTests, Transpose_No_Fusion) { + RunQDQTransposeNoFusionTestCase({2, 13, 12, 37}, {0, 3, 1, 2}); + RunQDQTransposeNoFusionTestCase({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); + RunQDQTransposeNoFusionTestCase({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); + RunQDQTransposeNoFusionTestCase({2, 13, 12, 37}, {0, 3, 1, 2}, true /*use_contrib_qdq*/); } TEST(QDQTransformerTests, Resize) { @@ -1376,50 +1502,59 @@ TEST(QDQTransformerTests, ResizeReshapeSqueezeUnsqueeze) { test_case({1, 2, 26, 42}, {4}, true /*use_contrib_qdq*/); } -TEST(QDQTransformerTests, ArgMax) { - auto test_case = [&](const std::vector& input_shape, - int axis, - int keepdims, - int select_last_index, - bool use_contrib_qdq) { - auto build_test_case = [&](ModelTestBuilder& builder) { - auto* input_arg = builder.MakeInput(input_shape, - std::numeric_limits::min(), - std::numeric_limits::max()); - auto* output_arg = builder.MakeOutput(); +// Runs a test case that checks if the DQ node is dropped from DQ -> Op (e.g., ArgMax). +template +static void RunArgMaxDropDQTestCase(const std::vector& input_shape, + int axis, + int keepdims, + int select_last_index, + bool use_contrib_qdq, + bool expect_drop_dq = true) { + auto build_test_case = [&](ModelTestBuilder& builder) { + auto* input_arg = builder.MakeInput(input_shape, + std::numeric_limits::min(), + std::numeric_limits::max()); + auto* output_arg = builder.MakeOutput(); + + // add DQ + auto* dq_output = builder.MakeIntermediate(); + builder.AddDequantizeLinearNode(input_arg, .003f, 1, dq_output, use_contrib_qdq); + + // add ArgMax + Node& argmax_node = builder.AddNode("ArgMax", {dq_output}, {output_arg}); + argmax_node.AddAttribute("axis", static_cast(axis)); + argmax_node.AddAttribute("keepdims", static_cast(keepdims)); + argmax_node.AddAttribute("select_last_index", static_cast(select_last_index)); + }; - // add DQ - auto* dq_output = builder.MakeIntermediate(); - builder.AddDequantizeLinearNode(input_arg, .003f, 1, dq_output, use_contrib_qdq); + auto check_graph = [use_contrib_qdq, expect_drop_dq](InferenceSessionWrapper& session) { + auto op_to_count = CountOpsInGraph(session.GetGraph()); + const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); + EXPECT_EQ(op_to_count["ArgMax"], 1); + EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], expect_drop_dq ? 0 : 1); + }; - // add ArgMax - Node& argmax_node = builder.AddNode("ArgMax", {dq_output}, {output_arg}); - argmax_node.AddAttribute("axis", static_cast(axis)); - argmax_node.AddAttribute("keepdims", static_cast(keepdims)); - argmax_node.AddAttribute("select_last_index", static_cast(select_last_index)); - }; + TransformerTester(build_test_case, check_graph, + TransformerLevel::Level1, + TransformerLevel::Level2, + /* opset_version */ 13); + TransformerTester(build_test_case, check_graph, + TransformerLevel::Level1, + TransformerLevel::Level2, + /* opset_version */ 19); +} - auto check_graph = [&](InferenceSessionWrapper& session) { - auto op_to_count = CountOpsInGraph(session.GetGraph()); - const QDQOpKeys qdq_keys = GetQDQOpKeys(use_contrib_qdq); - EXPECT_EQ(op_to_count["ArgMax"], 1); - EXPECT_EQ(op_to_count[qdq_keys.dequantize_linear], 0); - }; +// Checks that the DQ node is dropped from DQ -> ArgMax. Uses 8-bit and 16-bit Q/DQ ops. +TEST(QDQTransformerTests, ArgMax) { + RunArgMaxDropDQTestCase({2, 13, 12, 37}, 1, 0, 0, false); + RunArgMaxDropDQTestCase({2, 13, 12, 37}, 1, 0, 0, true /*use_contrib_qdq*/); - TransformerTester(build_test_case, check_graph, - TransformerLevel::Level1, - TransformerLevel::Level2, - /* opset_version */ 13); - TransformerTester(build_test_case, check_graph, - TransformerLevel::Level1, - TransformerLevel::Level2, - /* opset_version */ 19); - }; + // Should *not* drop DQ for 16-bit DQ -> ArgMax (because ORT does not support 16-bit input types for ArgMax). + RunArgMaxDropDQTestCase({2, 13, 12, 37}, 1, 0, 0, true /*use_contrib_qdq*/, false /*expect_drop_dq*/); + RunArgMaxDropDQTestCase({2, 13, 12, 37}, 1, 0, 0, true /*use_contrib_qdq*/, false /*expect_drop_dq*/); - test_case({2, 13, 12, 37}, 1, 0, 0, false /*use_contrib_qdq*/); - test_case({2, 13, 12, 37}, 1, 0, 0, true /*use_contrib_qdq*/); - test_case({2, 13, 12, 37}, 0, 1, 0, false /*use_contrib_qdq*/); - test_case({2, 13, 12, 37}, 0, 0, 1, false /*use_contrib_qdq*/); + RunArgMaxDropDQTestCase({2, 13, 12, 37}, 0, 1, 0, false); + RunArgMaxDropDQTestCase({2, 13, 12, 37}, 0, 0, 1, false); } TEST(QDQTransformerTests, QLinearMatMul) { diff --git a/onnxruntime/test/optimizer/transpose_optimizer_test.cc b/onnxruntime/test/optimizer/transpose_optimizer_test.cc index e5aa36fc379f4..1f4c499985ad0 100644 --- a/onnxruntime/test/optimizer/transpose_optimizer_test.cc +++ b/onnxruntime/test/optimizer/transpose_optimizer_test.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include #include @@ -8,6 +9,7 @@ #include "gmock/gmock.h" #include "core/graph/graph.h" +#include "core/graph/node_attr_utils.h" #include "core/framework/op_node_proto_helper.h" #include "core/framework/utils.h" #include "core/session/onnxruntime_session_options_config_keys.h" @@ -3501,150 +3503,116 @@ TEST(TransposeOptimizerTests, TestWhere) { /*opset_version*/ {15, 18}); } -TEST(TransposeOptimizerTests, TestQuantizeLinearScalar) { - auto test_case = [&](const std::string& q_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0.0, 1.0); - auto* input1_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {2.3f}); - auto* input2_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {10}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); - auto* quantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - builder.AddNode("QuantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, {quantizelinear_1_out_0}, - q_domain); - auto& transpose_2 = builder.AddNode("Transpose", {quantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; +// Utility function that runs TransformerTester for the graph Transpose -> QuantizeLinear -> Transpose. +// Expects the Tranpose nodes to cancel. +template +static void RunQuantizeLinearTestCase(const std::optional>& zp_input_shape, + const std::vector& zp_value_shape, + std::optional axis, + const std::string& q_domain = "") { + auto build_test_case = [&](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); - }; + auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0.0, 1.0); + + NodeArg* scale_arg = nullptr; + NodeArg* zero_point_arg = nullptr; + + if (zp_value_shape.empty()) { // Per-tensor quantization + QuantType zp = (qmax + qmin) / 2; + scale_arg = MakeInput(builder, zp_input_shape, zp_value_shape, {0.05f}); + zero_point_arg = MakeInput(builder, zp_input_shape, zp_value_shape, {zp}); + } else { // Per-axis quantization + scale_arg = MakeInput(builder, zp_input_shape, zp_value_shape, 0.0f, 1.0f); + zero_point_arg = MakeInput(builder, zp_input_shape, zp_value_shape, qmin, qmax); + } + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* quantizelinear_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + auto& quantizelinear_1 = builder.AddNode("QuantizeLinear", {transpose_1_out_0, scale_arg, zero_point_arg}, + {quantizelinear_1_out_0}, q_domain); - TransformerTester(build_test_case_1, - check_optimized_graph_1, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ {15, 18}); + if (axis.has_value()) { + quantizelinear_1.AddAttributeProto(*axis); + } + + auto& transpose_2 = builder.AddNode("Transpose", {quantizelinear_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; + + auto check_optimized_graph = [](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); }; - test_case(); + TransformerTester(build_test_case, + check_optimized_graph, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ {15, 18}); +} + +TEST(TransposeOptimizerTests, TestQuantizeLinearScalar) { + std::optional> zp_input_shape = std::vector{}; + std::vector zp_value_shape{}; + std::optional empty_axis; // No axis value. + + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, empty_axis, kOnnxDomain); + #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.QuantizeLinear + // Use com.microsoft.QuantizeLinear op. + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, empty_axis, kMSDomain); + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, empty_axis, kMSDomain); + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, empty_axis, kMSDomain); #endif } TEST(TransposeOptimizerTests, TestQuantizeLinearScalarIgnoreAxis) { - auto test_case = [&](const std::string& q_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0.0, 1.0); - auto* input1_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {2.3f}); - auto* input2_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {10}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); - auto* quantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - auto& quantizelinear_1 = builder.AddNode("QuantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, - {quantizelinear_1_out_0}, q_domain); - quantizelinear_1.AddAttribute("axis", (int64_t)10); - auto& transpose_2 = builder.AddNode("Transpose", {quantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; - - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); - }; + std::optional> zp_input_shape = std::vector{}; + std::vector zp_value_shape{}; + auto ignored_axis = utils::MakeAttribute("axis", static_cast(10)); // Should be ignored for per-tensor Q - TransformerTester(build_test_case_1, - check_optimized_graph_1, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ {15, 18}); - }; + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kOnnxDomain); - test_case(); #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.QuantizeLinear + // Use com.microsoft.QuantizeLinear op. + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kMSDomain); + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kMSDomain); + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kMSDomain); #endif } TEST(TransposeOptimizerTests, TestQuantizeLinearVector) { - auto test_case = [&](const std::string& q_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0.0, 1.0); - auto* input1_arg = MakeInput(builder, {{-1}}, {2}, {2.3f, 2.4f}); - auto* input2_arg = MakeInput(builder, {{-1}}, {2}, {10, 12}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); - auto* quantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - auto& quantizelinear_1 = builder.AddNode("QuantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, - {quantizelinear_1_out_0}, q_domain); - quantizelinear_1.AddAttribute("axis", (int64_t)0); - auto& transpose_2 = builder.AddNode("Transpose", {quantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; + std::optional> zp_input_shape = std::vector{-1}; + std::vector zp_value_shape = {2}; + auto axis = utils::MakeAttribute("axis", static_cast(0)); - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); - }; + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kOnnxDomain); - TransformerTester(build_test_case_1, - check_optimized_graph_1, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ {15, 18}); - }; - - test_case(); #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.QuantizeLinear + // Use com.microsoft.QuantizeLinear op. + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kMSDomain); + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kMSDomain); + RunQuantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kMSDomain); #endif } TEST(TransposeOptimizerTests, TestQuantizeLinearVectorUnknownRank) { - auto test_case = [&](const std::string& q_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0.0, 1.0); - auto* input1_arg = MakeInput(builder, std::nullopt, {3}, {2.3f, 2.4f, 2.5f}); - auto* input2_arg = MakeInput(builder, std::nullopt, {3}, {10, 12, 13}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); - auto* quantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - auto& quantizelinear_1 = builder.AddNode("QuantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, - {quantizelinear_1_out_0}, q_domain); - quantizelinear_1.AddAttribute("axis", (int64_t)1); - auto& transpose_2 = builder.AddNode("Transpose", {quantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; + std::optional> zp_unknown_shape; // Empty shape + std::vector zp_value_shape = {3}; + auto axis = utils::MakeAttribute("axis", static_cast(1)); - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); - }; + RunQuantizeLinearTestCase(zp_unknown_shape, zp_value_shape, axis, kOnnxDomain); - TransformerTester(build_test_case_1, - check_optimized_graph_1, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ {15, 18}); - }; - - test_case(); #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.QuantizeLinear + // Use com.microsoft.QuantizeLinear op. + RunQuantizeLinearTestCase(zp_unknown_shape, zp_value_shape, axis, kMSDomain); + RunQuantizeLinearTestCase(zp_unknown_shape, zp_value_shape, axis, kMSDomain); + RunQuantizeLinearTestCase(zp_unknown_shape, zp_value_shape, axis, kMSDomain); #endif } @@ -3676,158 +3644,158 @@ TEST(TransposeOptimizerTests, TestQuantizeLinearScalarOpset10) { /*opset_version*/ 10); } -TEST(TransposeOptimizerTests, TestDequantizeLinearScalarIgnoreAxis) { - auto test_case = [&](const std::string& dq_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0, 5); - auto* input1_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {2.3f}); - auto* input2_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {10}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); - auto* dequantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - auto& dequantizelinear_1 = builder.AddNode("DequantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, - {dequantizelinear_1_out_0}, dq_domain); - dequantizelinear_1.AddAttribute("axis", (int64_t)10); - auto& transpose_2 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; +// Utility function that runs TransformerTester for the graph Transpose -> DequantizeLinear -> Transpose. +// Expects the Tranpose nodes to cancel. +template +static void RunDequantizeLinearTestCase(const std::optional>& zp_input_shape, + const std::vector& zp_value_shape, + std::optional axis, + const std::string& q_domain = "") { + auto build_test_case = [&](ModelTestBuilder& builder) { + constexpr QuantType qmin = std::numeric_limits::min(); + constexpr QuantType qmax = std::numeric_limits::max(); - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); - }; + auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, qmin, qmax); + + NodeArg* scale_arg = nullptr; + NodeArg* zero_point_arg = nullptr; + + if (zp_value_shape.empty()) { // Per-tensor quantization + QuantType zp = (qmax + qmin) / 2; + scale_arg = MakeInput(builder, zp_input_shape, zp_value_shape, {0.05f}); + zero_point_arg = MakeInput(builder, zp_input_shape, zp_value_shape, {zp}); + } else { // Per-axis quantization + scale_arg = MakeInput(builder, zp_input_shape, zp_value_shape, 0.0f, 1.0f); + zero_point_arg = MakeInput(builder, zp_input_shape, zp_value_shape, qmin, qmax); + } + auto* transpose_1_out_0 = builder.MakeIntermediate(); + auto* dequantizelinear_1_out_0 = builder.MakeIntermediate(); + auto* transpose_2_out_0 = builder.MakeOutput(); + + auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); + auto& dequantizelinear_1 = builder.AddNode("DequantizeLinear", {transpose_1_out_0, scale_arg, zero_point_arg}, + {dequantizelinear_1_out_0}, q_domain); + + if (axis.has_value()) { + dequantizelinear_1.AddAttributeProto(*axis); + } + + auto& transpose_2 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_2_out_0}); + transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); + }; - TransformerTester(build_test_case_1, - check_optimized_graph_1, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ {15, 18}); + auto check_optimized_graph = [](InferenceSessionWrapper& session) { + int transpose_cost = EstimateTransposeCost(session.GetGraph()); + EXPECT_EQ(transpose_cost, 0); }; - test_case(); + TransformerTester(build_test_case, + check_optimized_graph, + TransformerLevel::Default, + TransformerLevel::Level1, + /*opset_version*/ {15, 18}); +} + +TEST(TransposeOptimizerTests, TestDequantizeLinearScalarIgnoreAxis) { + std::optional> zp_input_shape = std::vector{}; + std::vector zp_value_shape{}; + auto ignored_axis = utils::MakeAttribute("axis", static_cast(10)); // Should be ignored for per-tensor Q + + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kOnnxDomain); #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.DequantizeLinear + // Use com.microsoft.DequantizeLinear ops + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kMSDomain); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kMSDomain); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, ignored_axis, kMSDomain); #endif } TEST(TransposeOptimizerTests, TestDequantizeLinearVector) { - auto test_case = [&](const std::string& dq_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0, 5); - auto* input1_arg = MakeInput(builder, {{2}}, {2}, {2.3f, 2.4f}); - auto* input2_arg = MakeInput(builder, {{2}}, {2}, {10, 12}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); - auto* dequantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - auto& dequantizelinear_1 = builder.AddNode("DequantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, - {dequantizelinear_1_out_0}, dq_domain); - dequantizelinear_1.AddAttribute("axis", (int64_t)-4); - auto& transpose_2 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; + std::optional> zp_input_shape = std::vector{2}; + std::vector zp_value_shape = {2}; + auto axis = utils::MakeAttribute("axis", static_cast(-4)); - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); - }; + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kOnnxDomain); +#if !defined(DISABLE_CONTRIB_OPS) + // Use com.microsoft.DequantizeLinear ops + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kMSDomain); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kMSDomain); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, axis, kMSDomain); +#endif +} - TransformerTester(build_test_case_1, - check_optimized_graph_1, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ {15, 18}); - }; +TEST(TransposeOptimizerTests, TestDequantizeLinearNoAxis) { + std::optional> zp_input_shape = std::vector{}; + std::vector zp_value_shape{}; + std::optional no_axis; // Empty axis value will not be set. - test_case(); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, no_axis, kOnnxDomain); #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.DequantizeLinear + // Use com.microsoft.DequantizeLinear ops + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, no_axis, kMSDomain); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, no_axis, kMSDomain); + RunDequantizeLinearTestCase(zp_input_shape, zp_value_shape, no_axis, kMSDomain); #endif } -TEST(TransposeOptimizerTests, TestDequantizeLinearNoAxis) { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0, 5); - auto* input1_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {2.3f}); - auto* input2_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {10}); - auto* transpose_1_out_0 = builder.MakeIntermediate(); +// Utility function that runs TransformerTester for the graph in which a single DequantizeLinear node is +// the parent of two Transpose nodes. The DQ should be duplicated by EnsureUniqueDQForNodeUnit, and the +// Transposes should be pushed. +template +static void RunDequantizeLinearTransposePropagationTestCase(const std::string& dq_domain = "") { + auto build_test_case = [dq_domain](ModelTestBuilder& builder) { + auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0, 5); + auto* scale_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {2.3f}); + auto* zero_point_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {10}); auto* dequantizelinear_1_out_0 = builder.MakeIntermediate(); + auto* transpose_1_out_0 = builder.MakeOutput(); auto* transpose_2_out_0 = builder.MakeOutput(); - auto& transpose_1 = builder.AddNode("Transpose", {input0_arg}, {transpose_1_out_0}); + builder.AddNode("DequantizeLinear", {input0_arg, scale_arg, zero_point_arg}, {dequantizelinear_1_out_0}, + dq_domain); + + auto& transpose_1 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_1_out_0}); transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - builder.AddNode("DequantizeLinear", {transpose_1_out_0, input1_arg, input2_arg}, {dequantizelinear_1_out_0}); + auto& transpose_2 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_2_out_0}); transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); }; - auto check_optimized_graph_1 = [&](InferenceSessionWrapper& session) { - int transpose_cost = EstimateTransposeCost(session.GetGraph()); - EXPECT_EQ(transpose_cost, 0); + auto check_graph = [dq_domain](InferenceSessionWrapper& session) { + const auto& graph = session.GetGraph(); + + const char* dq_count_key = (dq_domain == kMSDomain) ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; + const auto op_count = CountOpsInGraph(graph); + decltype(op_count) expected_op_count{ + {dq_count_key, 2}, // EnsureUniqueDQForNodeUnit should duplicate the original DQ + {"Transpose", 2}, + }; + ASSERT_EQ(op_count, expected_op_count); + + // Transposes should be pushed, so check for Transpose -> DQ edges + for (const auto& node : graph.Nodes()) { + if (node.OpType() == "Transpose") { + ASSERT_EQ(node.GetOutputEdgesCount(), static_cast(1)); + ASSERT_EQ(node.OutputEdgesBegin()->GetNode().OpType(), "DequantizeLinear"); + } + } }; - TransformerTester(build_test_case_1, - check_optimized_graph_1, + TransformerTester(build_test_case, + check_graph, TransformerLevel::Default, TransformerLevel::Level1, /*opset_version*/ 10); } TEST(TransposeOptimizerTests, TestDequantizeLinearTransposePropagation) { - auto test_case = [&](const std::string& dq_domain = "") { - auto build_test_case_1 = [&](ModelTestBuilder& builder) { - auto* input0_arg = MakeInput(builder, {{2, -1, 6, 3}}, {2, 4, 6, 3}, 0, 5); - auto* input1_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {2.3f}); - auto* input2_arg = MakeInput(builder, {std::vector{}}, std::vector{}, {10}); - auto* dequantizelinear_1_out_0 = builder.MakeIntermediate(); - auto* transpose_1_out_0 = builder.MakeOutput(); - auto* transpose_2_out_0 = builder.MakeOutput(); - - builder.AddNode("DequantizeLinear", {input0_arg, input1_arg, input2_arg}, {dequantizelinear_1_out_0}, - dq_domain); - - auto& transpose_1 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_1_out_0}); - transpose_1.AddAttribute("perm", std::vector{0, 3, 1, 2}); - - auto& transpose_2 = builder.AddNode("Transpose", {dequantizelinear_1_out_0}, {transpose_2_out_0}); - transpose_2.AddAttribute("perm", std::vector{0, 2, 3, 1}); - }; - - auto check_graph = [&](InferenceSessionWrapper& session) { - const auto& graph = session.GetGraph(); - - const char* dq_count_key = (dq_domain == kMSDomain) ? "com.microsoft.DequantizeLinear" : "DequantizeLinear"; - const auto op_count = CountOpsInGraph(graph); - decltype(op_count) expected_op_count{ - {dq_count_key, 2}, // EnsureUniqueDQForNodeUnit should duplicate the original DQ - {"Transpose", 2}, - }; - ASSERT_EQ(op_count, expected_op_count); - - // Transposes should be pushed, so check for Transpose -> DQ edges - for (const auto& node : graph.Nodes()) { - if (node.OpType() == "Transpose") { - ASSERT_EQ(node.GetOutputEdgesCount(), static_cast(1)); - ASSERT_EQ(node.OutputEdgesBegin()->GetNode().OpType(), "DequantizeLinear"); - } - } - }; - - TransformerTester(build_test_case_1, - check_graph, - TransformerLevel::Default, - TransformerLevel::Level1, - /*opset_version*/ 10); - }; - - test_case(); + RunDequantizeLinearTransposePropagationTestCase(); #if !defined(DISABLE_CONTRIB_OPS) - test_case(kMSDomain); // Use com.microsoft.DequantizeLinear + // Use com.microsoft.DequantizeLinear + RunDequantizeLinearTransposePropagationTestCase(kMSDomain); + RunDequantizeLinearTransposePropagationTestCase(kMSDomain); + RunDequantizeLinearTransposePropagationTestCase(kMSDomain); #endif } diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc index e9e285411f0a7..0549051bc2387 100644 --- a/onnxruntime/test/providers/qnn/conv_test.cc +++ b/onnxruntime/test/providers/qnn/conv_test.cc @@ -21,7 +21,8 @@ static GetTestModelFn BuildF32ConvTestCase(const std::string& conv_op_type, cons const std::vector& pads, const std::vector& dilations, const std::string& auto_pad = "NOTSET") { - return [conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, auto_pad](ModelTestBuilder& builder) { + return [conv_op_type, input_def, weights_def, bias_def, strides, pads, + dilations, auto_pad](ModelTestBuilder& builder) { std::vector conv_inputs = { MakeTestInput(builder, input_def), MakeTestInput(builder, weights_def)}; @@ -77,29 +78,33 @@ static void RunCPUConvOpTest(const std::string& conv_op_type, const TestInputDef } // Creates a graph with a single Q/DQ Conv operator. Used for testing HTP backend. -template -static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& conv_op_type, const TestInputDef& input_def, - const TestInputDef& weights_def, - const TestInputDef& bias_def, - const std::vector& strides, - const std::vector& pads, - const std::vector& dilations, - const std::string& auto_pad = "NOTSET") { +template +static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& conv_op_type, + const TestInputDef& input_def, + const TestInputDef& weights_def, + const TestInputDef& bias_def, + const std::vector& strides, + const std::vector& pads, + const std::vector& dilations, + const std::string& auto_pad = "NOTSET", + bool use_contrib_qdq = false) { return [conv_op_type, input_def, weights_def, bias_def, strides, pads, - dilations, auto_pad](ModelTestBuilder& builder, - std::vector>& output_qparams) { + dilations, auto_pad, use_contrib_qdq](ModelTestBuilder& builder, + std::vector>& output_qparams) { std::vector conv_inputs; // input -> Q/DQ -> auto* input = MakeTestInput(builder, input_def); - QuantParams input_qparams = GetTestInputQuantParams(input_def); - auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point); + QuantParams input_qparams = GetTestInputQuantParams(input_def); + auto* input_qdq = AddQDQNodePair(builder, input, input_qparams.scale, input_qparams.zero_point, + use_contrib_qdq); conv_inputs.push_back(input_qdq); // weights -> Q/DQ -> auto* weights = MakeTestInput(builder, weights_def); - QuantParams weights_qparams = GetTestInputQuantParams(weights_def); - auto* weights_qdq = AddQDQNodePair(builder, weights, weights_qparams.scale, weights_qparams.zero_point); + QuantParams weights_qparams = GetTestInputQuantParams(weights_def); + auto* weights_qdq = AddQDQNodePair(builder, weights, weights_qparams.scale, + weights_qparams.zero_point, use_contrib_qdq); conv_inputs.push_back(weights_qdq); // bias -> @@ -107,7 +112,7 @@ static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& con // Bias requirement taken from python quantization tool: onnx_quantizer.py::quantize_bias_static() const float bias_scale = input_qparams.scale * weights_qparams.scale; - conv_inputs.push_back(MakeTestQDQBiasInput(builder, bias_def, bias_scale)); + conv_inputs.push_back(MakeTestQDQBiasInput(builder, bias_def, bias_scale, use_contrib_qdq)); } auto* conv_output = builder.MakeIntermediate(); @@ -125,13 +130,14 @@ static GetTestQDQModelFn BuildQDQConvTestCase(const std::string& con conv_node.AddAttribute("dilations", dilations); } - AddQDQNodePairWithOutputAsGraphOutput(builder, conv_output, output_qparams[0].scale, output_qparams[0].zero_point); + AddQDQNodePairWithOutputAsGraphOutput(builder, conv_output, output_qparams[0].scale, + output_qparams[0].zero_point, use_contrib_qdq); }; } // Runs a Conv model on the QNN HTP backend. Checks the graph node assignment, and that inference // outputs for QNN EP and CPU EP match. -template +template static void RunHTPConvOpTest(const std::string& conv_op_type, const TestInputDef& input_def, const TestInputDef& weights_def, const TestInputDef& bias_def, @@ -140,6 +146,7 @@ static void RunHTPConvOpTest(const std::string& conv_op_type, const TestInputDef const std::vector& dilations, const std::string& auto_pad, ExpectedEPNodeAssignment expected_ep_assignment, + bool use_contrib_qdq = false, int opset = 13, float fp32_abs_err = 1e-5f) { ProviderOptions provider_options; @@ -150,9 +157,11 @@ static void RunHTPConvOpTest(const std::string& conv_op_type, const TestInputDef provider_options["backend_path"] = "libQnnHtp.so"; #endif - TestQDQModelAccuracy(BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, auto_pad), - BuildQDQConvTestCase(conv_op_type, input_def, weights_def, bias_def, - strides, pads, dilations, auto_pad), + TestQDQModelAccuracy(BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations, + auto_pad), + BuildQDQConvTestCase(conv_op_type, input_def, weights_def, + bias_def, strides, pads, dilations, + auto_pad, use_contrib_qdq), provider_options, opset, expected_ep_assignment, @@ -279,52 +288,56 @@ TEST_F(QnnCPUBackendTests, Convf32_large_input2_nopad_bias_initializer) { // Test 1D Conv with static weights (implemented in QNN EP as 2D convolution with height of 1). TEST_F(QnnCPUBackendTests, Conv1Df32_StaticWeights_DefaultBias) { + std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; RunCPUConvOpTest("Conv", - TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input - TestInputDef({1, 2, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights - TestInputDef({1}, true, {1.0f}), // Bias of 1.f - {1}, // Strides - {0, 0}, // Pads - {1}, // Dilations + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({1, 2, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights + TestInputDef({1}, true, {1.0f}), // Initializer Bias + {1}, // Strides + {0, 0}, // Pads + {1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Test 1D Conv with dynamic weights (implemented in QNN EP as 2D convolution with height of 1). TEST_F(QnnCPUBackendTests, Conv1Df32_DynamicWeights_DefaultBias) { + std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; RunCPUConvOpTest("Conv", - TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input - TestInputDef({1, 2, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights - TestInputDef(), // Default bias - {1}, // Strides - {0, 0}, // Pads - {1}, // Dilations + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({1, 2, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights + TestInputDef(), // Default bias + {1}, // Strides + {0, 0}, // Pads + {1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Test 1D ConvTranspose with static weights (implemented in QNN EP as 2D convolution with height of 1). TEST_F(QnnCPUBackendTests, ConvTranspose1Df32_StaticWeights_DefaultBias) { + std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; RunCPUConvOpTest("ConvTranspose", - TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input - TestInputDef({2, 1, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights - TestInputDef({1}, true, {0.0f}), // Zero bias - {1}, // Strides - {0, 0}, // Pads - {1}, // Dilations + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({2, 1, 2}, true, {1.0f, 2.0f, 3.0f, 4.0f}), // Static weights + TestInputDef({1}, true, {0.0f}), // Zero bias + {1}, // Strides + {0, 0}, // Pads + {1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } // Test 1D ConvTranspose with dynamic weights (implemented in QNN EP as 2D convolution with height of 1). TEST_F(QnnCPUBackendTests, ConvTranspose1Df32_DynamicWeights_DefaultBias) { + std::vector input_data = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}; RunCPUConvOpTest("ConvTranspose", - TestInputDef({1, 2, 4}, false, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f}), // Dynamic input - TestInputDef({2, 1, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights - TestInputDef({1}, true, {0.0f}), // Zero bias - {1}, // Strides - {0, 0}, // Pads - {1}, // Dilations + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({2, 1, 2}, false, {1.0f, 2.0f, 3.0f, 4.0f}), // Dynamic weights + TestInputDef({1}, true, {0.0f}), // Zero bias + {1}, // Strides + {0, 0}, // Pads + {1}, // Dilations "NOTSET", ExpectedEPNodeAssignment::All); } @@ -397,218 +410,448 @@ TEST_F(QnnHTPBackendTests, Test_QDQConvWithDynamicWeightsFromMul) { // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as a dynamic input. -TEST_F(QnnHTPBackendTests, ConvU8S32_bias_dynamic_input) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input - TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static input - TestInputDef({1}, false, {2.0f}), // Dynamic bias = 2.0f - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations - "NOTSET", - ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_dynamic_input) { + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input + TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static input + TestInputDef({1}, false, {2.0f}), // Dynamic bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All); +} + +// Tests 16-bit QDQ Conv with dynamic weights and bias (uses QNN's Conv2d) +// TODO: Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0040235077030956745, zero_point=0. +// Expected val: 87.354057312011719 +// QNN QDQ val: 0 (err 87.354057312011719) +// CPU QDQ val: 87.3583984375 (err 0.00434112548828125) +TEST_F(QnnHTPBackendTests, DISABLED_ConvU16S16S32_DynamicBias) { + TestInputDef input_def({1, 2, 5, 5}, false, GetFloatDataInRange(-10.0f, 10.0f, 50)); + TestInputDef weight_def({1, 2, 3, 3}, false, GetFloatDataInRange(-1.0f, 5.0f, 18)); + RunHTPConvOpTest("Conv", + input_def, // Input + weight_def.OverrideValueRange(-5.0f, 5.0f), // Weights (symmetric quant range) + TestInputDef({1}, false, {2.0f}), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true); // Use com.microsoft QDQ ops for 16-bit +} + +// Tests 16-bit QDQ Conv with dynamic weights and bias (uses QNN's DepthwiseConv2d) +// TODO(adrianlizarraga): FAIL: Failed to finalize QNN graph. Error code 1002 +TEST_F(QnnHTPBackendTests, DISABLED_DepthwiseConvU16S16S32_DynamicBias) { + TestInputDef input_def({1, 1, 5, 5}, false, GetFloatDataInRange(-10.0f, 10.0f, 25)); + TestInputDef weight_def({1, 1, 3, 3}, false, GetFloatDataInRange(-1.0f, 5.0f, 9)); + RunHTPConvOpTest("Conv", + input_def, // Input + weight_def.OverrideValueRange(-5.0f, 5.0f), // Weights (symmetric quant range) + TestInputDef({1}, false, {2.0f}), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true); // Use com.microsoft QDQ ops for 16-bit +} + +// Tests 16-bit QDQ Conv with dynamic weights and no bias. +// TODO: Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0039929896593093872, zero_point=0. +// Expected val: 85.354057312011719 +// QNN QDQ val: 0 (err 85.354057312011719) +// CPU QDQ val: 85.358139038085938 (err 0.00408172607421875) +TEST_F(QnnHTPBackendTests, DISABLED_ConvU16S16S32_NoBias) { + TestInputDef input_def({1, 2, 5, 5}, false, GetFloatDataInRange(-10.0f, 10.0f, 50)); + TestInputDef weight_def({1, 2, 3, 3}, false, GetFloatDataInRange(-1.0f, 5.0f, 18)); + RunHTPConvOpTest("Conv", + input_def, // Input + weight_def.OverrideValueRange(-5.0f, 5.0f), // Weights (symmetric quant range) + TestInputDef(), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true); // Use com.microsoft QDQ ops for 16-bit +} + +// Tests 16-bit QDQ Conv with dynamic weights and no bias (uses QNN's DepthWiseConv2d) +// TODO(adrianlizarraga): FAIL: Failed to finalize QNN graph. Error code 1002 +TEST_F(QnnHTPBackendTests, DISABLED_DepthwiseConvU16S16S32_NoBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 25); + std::vector weight_data = GetFloatDataInRange(-10.0f, 10.0f, 9); + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, input_data), // Input + TestInputDef({1, 1, 3, 3}, false, weight_data), // Weights + TestInputDef(), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true); // Use com.microsoft QDQ ops for 16-bit +} + +// Tests 16-bit activations, 8-bit static weights QDQ Conv with static bias. +// Uses QNN's DepthwiseConv2d operator. +// TODO: Inaccuracy detected for output 'output', element 8. +// Output quant params: scale=0.0027466239407658577, zero_point=10194. +// Expected val: 152 +// QNN QDQ val: 151.8004150390625 (err 0.1995849609375) +// CPU QDQ val: 151.9981689453125 (err 0.0018310546875) +TEST_F(QnnHTPBackendTests, DepthwiseConvU16U8S32_StaticBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 25); + std::vector weight_data = GetFloatDataInRange(-1.0f, 5.0f, 9); + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, input_data), // Input + TestInputDef({1, 1, 3, 3}, true, weight_data), // Weights + TestInputDef({1}, true, {2.0f}), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true, // Use com.microsoft QDQ ops for 16-bit + 13, + 0.2f); +} + +// Tests 16-bit activations, 8-bit static weights QDQ Conv with static bias. +// TODO: Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0040235077030956745, zero_point=0. +// Expected val: 87.354057312011719 +// QNN QDQ val: 87.559577941894531 (err 0.2055206298828125) +// CPU QDQ val: 87.398635864257812 (err 0.04457855224609375) +TEST_F(QnnHTPBackendTests, ConvU16U8S32_StaticBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 50); + std::vector weight_data = GetFloatDataInRange(-1.0f, 5.0f, 18); + RunHTPConvOpTest("Conv", + TestInputDef({1, 2, 5, 5}, false, input_data), // Input + TestInputDef({1, 2, 3, 3}, true, weight_data), // Weights + TestInputDef({1}, true, {2.0f}), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true, // Use com.microsoft QDQ ops for 16-bit + 13, + 0.6f); +} + +// Tests 16-bit activations, 8-bit static weights QDQ Conv with dynamic bias. +// Uses QNN's DepthwiseConv2d operator. +// TODO: Inaccuracy detected for output 'output', element 1. +// Output quant params: scale=0.0027466239407658577, zero_point=10194. +// Expected val: -13.000001907348633 +// QNN QDQ val: -13.095903396606445 (err 0.0959014892578125) +// CPU QDQ val: -12.999771118164062 (err 0.0002307891845703125) +TEST_F(QnnHTPBackendTests, DepthwiseConvU16U8S32_DynamicBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 25); + std::vector weight_data = GetFloatDataInRange(-1.0f, 5.0f, 9); + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, input_data), // Input + TestInputDef({1, 1, 3, 3}, true, weight_data), // Weights + TestInputDef({1}, false, {2.0f}), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true, // Use com.microsoft QDQ ops for 16-bit + 13, + 0.2f); +} + +// Tests 16-bit activations, 8-bit static weights QDQ Conv with dynamic bias. +// TODO: Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0040235077030956745, zero_point=0. +// Expected val: 87.354057312011719 +// QNN QDQ val: 87.559577941894531 (err 0.2055206298828125) +// CPU QDQ val: 87.398635864257812 (err 0.04457855224609375) +TEST_F(QnnHTPBackendTests, ConvU16U8S32_DynamicBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 50); + std::vector weight_data = GetFloatDataInRange(-1.0f, 5.0f, 18); + RunHTPConvOpTest("Conv", + TestInputDef({1, 2, 5, 5}, false, input_data), // Input + TestInputDef({1, 2, 3, 3}, true, weight_data), // Weights + TestInputDef({1}, false, {2.0f}), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true, // Use com.microsoft QDQ ops for 16-bit + 13, + 0.57f); +} + +// Tests 16-bit activations, 8-bit static weights QDQ Conv with no bias +// TODO: Inaccuracy detected for output 'output', element 7. +// Output quant params: scale=0.0039929896593093872, zero_point=0. +// Expected val: 246.98667907714844 +// QNN QDQ val: 247.82090759277344 (err 0.834228515625) +// CPU QDQ val: 247.24192810058594 (err 0.2552490234375) +TEST_F(QnnHTPBackendTests, ConvU16U8S32_NoBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 50); + std::vector weight_data = GetFloatDataInRange(-1.0f, 5.0f, 18); + RunHTPConvOpTest("Conv", + TestInputDef({1, 2, 5, 5}, false, input_data), // Input + TestInputDef({1, 2, 3, 3}, true, weight_data), // Weights + TestInputDef(), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true, // Use com.microsoft QDQ ops for 16-bit + 13, + 0.58f); +} + +// Tests 16-bit activations, 8-bit static weights QDQ Conv with no bias +// Uses QNN's DepthwiseConv2d operator. +// TODO: Inaccuracy detected for output 'output', element 8. +// Output quant params: scale=0.0027466239407658577, zero_point=10923. +// Expected val: 150 +// QNN QDQ val: 149.80087280273438 (err 0.199127197265625) +// CPU QDQ val: 149.99862670898438 (err 0.001373291015625) +TEST_F(QnnHTPBackendTests, DepthwiseConvU16U8S32_NoBias) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 25); + std::vector weight_data = GetFloatDataInRange(-1.0f, 5.0f, 9); + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, input_data), // Input + TestInputDef({1, 1, 3, 3}, true, weight_data), // Weights + TestInputDef(), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All, + true, // Use com.microsoft QDQ ops for 16-bit + 13, + 0.2f); } // Test that dynamic weights with default bias works for Conv. This was previously not working // on older versions of QNN sdk. -TEST_F(QnnHTPBackendTests, ConvU8S32_DynamicWeight_NoBias) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Random dynamic input - TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights - TestInputDef(), // Default bias - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations - "NOTSET", - ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, ConvU8U8S32_DynamicWeight_NoBias) { + RunHTPConvOpTest("Conv", + TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Input + TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), // Weights + TestInputDef(), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All); } // Test that dynamic weights with default bias works for ConvTranspose. This was previously not working // on older versions of QNN sdk. -TEST_F(QnnHTPBackendTests, ConvTransposeU8S32_DynamicWeight_NoBias) { - RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Random dynamic input - TestInputDef({3, 1, 4, 4}, false, -10.0f, 10.0f), // Random dynamic weights - TestInputDef(), // Default bias - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations - "NOTSET", - ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, ConvTransposeU8U8S32_DynamicWeight_NoBias) { + RunHTPConvOpTest("ConvTranspose", + TestInputDef({1, 3, 32, 32}, false, -10.0f, 10.0f), // Input + TestInputDef({3, 1, 4, 4}, false, -10.0f, 10.0f), // Weights + TestInputDef(), // Bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All); } // Check that QNN compiles DQ -> Conv -> Q as a single unit. // Tests bias as an initializer. TEST_F(QnnHTPBackendTests, ConvU8U8S32_bias_initializer) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input - TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static weight - TestInputDef({1}, true, {2.0f}), // Initializer bias = 2.0f - {1, 1}, // Strides - {0, 0, 0, 0}, // Pads - {1, 1}, // Dilations - "NOTSET", - ExpectedEPNodeAssignment::All); + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, 0.0f, 10.0f), // Random dynamic input + TestInputDef({1, 1, 3, 3}, true, -10.0f, 10.0f), // Random static weight + TestInputDef({1}, true, {2.0f}), // Initializer bias + {1, 1}, // Strides + {0, 0, 0, 0}, // Pads + {1, 1}, // Dilations + "NOTSET", + ExpectedEPNodeAssignment::All); } // Tests 1D Conv with bias as an initializer. -TEST_F(QnnHTPBackendTests, Conv1DU8S32_bias_initializer) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input - TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1}, // strides - {0, 0}, // pads - {1}, // dilations - "NOTSET", - ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_bias_initializer) { + std::vector input_data = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + RunHTPConvOpTest("Conv", + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1}, // strides + {0, 0}, // pads + {1}, // dilations + "NOTSET", + ExpectedEPNodeAssignment::All); } // Tests 1D ConvTranspose with bias as an initializer. -TEST_F(QnnHTPBackendTests, ConvTranspose1DU8S32_bias_initializer) { - RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input - TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1}, // strides - {0, 0}, // pads - {1}, // dilations - "NOTSET", - ExpectedEPNodeAssignment::All); +TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_bias_initializer) { + std::vector input_data = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + RunHTPConvOpTest("ConvTranspose", + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1}, // strides + {0, 0}, // pads + {1}, // dilations + "NOTSET", + ExpectedEPNodeAssignment::All); } // Tests auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). -TEST_F(QnnHTPBackendTests, ConvU8S32_AutoPadUpper) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input - TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1, 1}, // strides - {}, // pads - {1, 1}, // dilations - "SAME_UPPER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); +TEST_F(QnnHTPBackendTests, ConvU8U8S32_AutoPadUpper) { + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input + TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1, 1}, // strides + {}, // pads + {1, 1}, // dilations + "SAME_UPPER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } // Tests Conv1d auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_AutoPadUpper) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input - TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1}, // strides - {0}, // pads - {1}, // dilations - "SAME_UPPER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); + std::vector input_data = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + RunHTPConvOpTest("Conv", + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1}, // strides + {0}, // pads + {1}, // dilations + "SAME_UPPER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } // Tests TransposeConv1d auto_pad value "SAME_UPPER" on HTP backend (compares to CPU EP). TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_AutoPadUpper) { - RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input - TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1}, // strides - {0}, // pads - {1}, // dilations - "SAME_UPPER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); + std::vector input_data = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + RunHTPConvOpTest("ConvTranspose", + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1}, // strides + {0}, // pads + {1}, // dilations + "SAME_UPPER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } // Tests Conv's auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). TEST_F(QnnHTPBackendTests, ConvU8U8S32_AutoPadLower) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input - TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1, 1}, // strides - {}, // pads - {1, 1}, // dilations - "SAME_LOWER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); + RunHTPConvOpTest("Conv", + TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input + TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1, 1}, // strides + {}, // pads + {1, 1}, // dilations + "SAME_LOWER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } // Tests ConvTranspose's auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). TEST_F(QnnHTPBackendTests, ConvTransposeU8U8S32_AutoPadLower) { - RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input - TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1, 1}, // strides - {}, // pads - {1, 1}, // dilations - "SAME_LOWER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); + RunHTPConvOpTest("ConvTranspose", + TestInputDef({1, 1, 5, 5}, false, 0.f, 10.f), // Dynamic input + TestInputDef({1, 1, 4, 4}, true, -1.f, 1.f), // Static weights + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1, 1}, // strides + {}, // pads + {1, 1}, // dilations + "SAME_LOWER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } // Tests Conv1d auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). TEST_F(QnnHTPBackendTests, Conv1DU8U8S32_AutoPadLower) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input - TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1}, // strides - {0}, // pads - {1}, // dilations - "SAME_LOWER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); + std::vector input_data = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + RunHTPConvOpTest("Conv", + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({1, 2, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1}, // strides + {0}, // pads + {1}, // dilations + "SAME_LOWER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } // Tests ConvTranspose 1d auto_pad value "SAME_LOWER" on HTP backend (compares to CPU EP). TEST_F(QnnHTPBackendTests, ConvTranspose1DU8U8S32_AutoPadLower) { - RunHTPConvOpTest("ConvTranspose", - TestInputDef({1, 2, 4}, false, {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}), // Dynamic input - TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight - TestInputDef({1}, true, {1.0f}), // Initializer bias = 1.0f - {1}, // strides - {0}, // pads - {1}, // dilations - "SAME_LOWER", // auto_pad - ExpectedEPNodeAssignment::All, - 13); + std::vector input_data = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + RunHTPConvOpTest("ConvTranspose", + TestInputDef({1, 2, 4}, false, input_data), // Dynamic input + TestInputDef({2, 1, 2}, true, {1.f, 2.f, 3.f, 4.f}), // Static weight + TestInputDef({1}, true, {1.0f}), // Initializer bias + {1}, // strides + {0}, // pads + {1}, // dilations + "SAME_LOWER", // auto_pad + ExpectedEPNodeAssignment::All, + false, // use_contrib_qdq + 13); } TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input1_padding_bias_initializer) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 3, 60, 452}, false, 0.f, 10.f), // Dynamic input - TestInputDef({16, 3, 3, 3}, true, -1.f, 1.f), // Static weights - TestInputDef({16}, true, std::vector(16, 1.f)), // Initializer bias = 1.f, 1.f, ... - {1, 1}, - {1, 1, 1, 1}, - {1, 1}, - "NOTSET", - ExpectedEPNodeAssignment::All); -} - -TEST_F(QnnHTPBackendTests, ConvU8S32_large_input2_bias_initializer) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 128, 8, 56}, false, 0.f, 10.f), // Dynamic input - TestInputDef({32, 128, 1, 1}, true, -1.f, 1.f), // Random static weights - TestInputDef({32}, true, -1.f, 1.f), // Random initializer bias - {1, 1}, - {0, 0, 0, 0}, - {1, 1}, - "NOTSET", - ExpectedEPNodeAssignment::All); + RunHTPConvOpTest("Conv", + TestInputDef({1, 3, 60, 452}, false, 0.f, 10.f), // Dynamic input + TestInputDef({16, 3, 3, 3}, true, -1.f, 1.f), // Static weights + TestInputDef({16}, true, std::vector(16, 1.f)), // Initializer bias + {1, 1}, + {1, 1, 1, 1}, + {1, 1}, + "NOTSET", + ExpectedEPNodeAssignment::All); +} + +TEST_F(QnnHTPBackendTests, ConvU8U8S32_large_input2_bias_initializer) { + RunHTPConvOpTest("Conv", + TestInputDef({1, 128, 8, 56}, false, 0.f, 10.f), // Dynamic input + TestInputDef({32, 128, 1, 1}, true, -1.f, 1.f), // Random static weights + TestInputDef({32}, true, -1.f, 1.f), // Random initializer bias + {1, 1}, + {0, 0, 0, 0}, + {1, 1}, + "NOTSET", + ExpectedEPNodeAssignment::All); } TEST_F(QnnHTPBackendTests, ConvU8U8S32_LargeInput_Dilations_Pads) { - RunHTPConvOpTest("Conv", - TestInputDef({1, 3, 768, 1152}, false, 0.f, 10.f), // Dynamic input - TestInputDef({64, 3, 7, 7}, true, -1.f, 1.f), // Random static weights - TestInputDef({64}, true, -1.f, 1.f), // Random initializer bias - {2, 2}, // strides - {3, 3, 3, 3}, // pads - {1, 1}, // dilations - "NOTSET", // auto_pad - ExpectedEPNodeAssignment::All); + RunHTPConvOpTest("Conv", + TestInputDef({1, 3, 768, 1152}, false, 0.f, 10.f), // Dynamic input + TestInputDef({64, 3, 7, 7}, true, -1.f, 1.f), // Static weights + TestInputDef({64}, true, -1.f, 1.f), // Initializer bias + {2, 2}, // strides + {3, 3, 3, 3}, // pads + {1, 1}, // dilations + "NOTSET", // auto_pad + ExpectedEPNodeAssignment::All); } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp index 6edb6ecdcfb1a..e721ccbcb45a9 100644 --- a/onnxruntime/test/providers/qnn/matmul_test.cpp +++ b/onnxruntime/test/providers/qnn/matmul_test.cpp @@ -27,28 +27,31 @@ static GetTestModelFn BuildMatMulOpTestCase(const TestInputDef& input1_de } // Returns a function that creates a graph with a QDQ MatMul operator. -template -static GetTestQDQModelFn BuildMatMulOpQDQTestCase(const TestInputDef& input1_def, - const TestInputDef& input2_def) { - return [input1_def, input2_def](ModelTestBuilder& builder, - std::vector>& output_qparams) { +template +static GetTestQDQModelFn BuildMatMulOpQDQTestCase(const TestInputDef& input1_def, + const TestInputDef& input2_def, + bool use_contrib_qdq) { + return [input1_def, input2_def, use_contrib_qdq](ModelTestBuilder& builder, + std::vector>& output_qparams) { // input1 -> Q -> DQ -> NodeArg* input1 = MakeTestInput(builder, input1_def); - QuantParams input1_qparams = GetTestInputQuantParams(input1_def); - auto* input1_qdq = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point); + QuantParams input1_qparams = GetTestInputQuantParams(input1_def); + auto* input1_qdq = AddQDQNodePair(builder, input1, input1_qparams.scale, input1_qparams.zero_point, + use_contrib_qdq); // input2 -> Q -> DQ -> NodeArg* input2 = MakeTestInput(builder, input2_def); - QuantParams input2_qparams = GetTestInputQuantParams(input2_def); - auto* input2_qdq = AddQDQNodePair(builder, input2, input2_qparams.scale, input2_qparams.zero_point); + QuantParams input2_qparams = GetTestInputQuantParams(input2_def); + auto* input2_qdq = AddQDQNodePair(builder, input2, input2_qparams.scale, input2_qparams.zero_point, + use_contrib_qdq); // MatMul auto* op_output = builder.MakeIntermediate(); builder.AddNode("MatMul", {input1_qdq, input2_qdq}, {op_output}); // op_output -> Q -> DQ -> output - AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, - output_qparams[0].zero_point); + AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, + output_qparams[0].zero_point, use_contrib_qdq); }; } @@ -75,11 +78,13 @@ static void RunMatMulOpOpTest(const TestInputDef& input1_def, // Runs a QDQ MatMul model on the QNN HTP backend. Checks the graph node assignment, and that the // QDQ model is accurate on QNN EP (compared to CPU EP). -template +template static void RunQDQMatMulOpOpTest(const TestInputDef& input1_def, const TestInputDef& input2_def, ExpectedEPNodeAssignment expected_ep_assignment, - int opset = 18) { + int opset = 18, + bool use_contrib_qdq = false, + float fp32_abs_err = 1e-4f) { ProviderOptions provider_options; #if defined(_WIN32) provider_options["backend_path"] = "QnnHtp.dll"; @@ -88,11 +93,12 @@ static void RunQDQMatMulOpOpTest(const TestInputDef& input1_def, #endif TestQDQModelAccuracy(BuildMatMulOpTestCase(input1_def, input2_def), - BuildMatMulOpQDQTestCase(input1_def, input2_def), + BuildMatMulOpQDQTestCase(input1_def, input2_def, + use_contrib_qdq), provider_options, opset, expected_ep_assignment, - 1e-5f); + fp32_abs_err); } // @@ -127,16 +133,68 @@ TEST_F(QnnCPUBackendTests, MatMulOp_Broadcast) { // TEST_F(QnnHTPBackendTests, MatMulOp_HTP_u8) { - RunQDQMatMulOpOpTest(TestInputDef({2, 3}, false, {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}), - TestInputDef({3, 2}, false, {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}), - ExpectedEPNodeAssignment::All, 18); + std::vector input0_data = {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}; + std::vector input1_data = {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}; + RunQDQMatMulOpOpTest(TestInputDef({2, 3}, false, input0_data), + TestInputDef({3, 2}, false, input1_data), + ExpectedEPNodeAssignment::All, 18); } -// Test MatMul broadcasting +// Test QDQ MatMul with 16-bit act, 8-bit weights (static) +// TODO: (SLIGHT) Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0015259021893143654, zero_point=0. +// Expected val: 98 +// QNN QDQ val: 97.720298767089844 (err 0.27970123291015625) +// CPU QDQ val: 97.726402282714844 (err 0.27359771728515625) +TEST_F(QnnHTPBackendTests, MatMulOp_HTP_A16_W8Static) { + std::vector input0_data = {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}; + std::vector input1_data = {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}; + RunQDQMatMulOpOpTest(TestInputDef({2, 3}, false, input0_data), + TestInputDef({3, 2}, true, input1_data), + ExpectedEPNodeAssignment::All, + 18, + true, // Use com.microsoft Q/DQ ops + 7e-3f); +} + +// Test 16-bit QDQ MatMul with static weights +// TODO: Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0015259021893143654, zero_point=0. +// Expected val: 98 +// QNN QDQ val: 0.65461206436157227 (err 97.345390319824219) +// CPU QDQ val: 98.002593994140625 (err 0.002593994140625) +TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp_HTP_A16_W16) { + std::vector input0_data = {-10.0f, -4.0f, -2.0f, 0.0f, 5.0f, 10.0f}; + std::vector input1_data = {-10.0f, -6.0f, -1.0f, 0.0f, 3.0f, 10.0f}; + RunQDQMatMulOpOpTest(TestInputDef({2, 3}, false, input0_data), + TestInputDef({3, 2}, true, input1_data), + ExpectedEPNodeAssignment::All, + 18, + true); // Use com.microsoft Q/DQ ops +} + +// Test 8-bit QDQ MatMul broadcasting TEST_F(QnnHTPBackendTests, MatMulOp_Broadcast) { - RunQDQMatMulOpOpTest(TestInputDef({28, 1, 64}, false, -10.0f, 10.0f), - TestInputDef({64, 32}, false, -10.0f, 10.0f), - ExpectedEPNodeAssignment::All, 18); + RunQDQMatMulOpOpTest(TestInputDef({28, 1, 64}, false, -10.0f, 10.0f), + TestInputDef({64, 32}, false, -10.0f, 10.0f), + ExpectedEPNodeAssignment::All, 18); +} + +// Test 16-bit QDQ MatMul broadcasting +// TODO: Inaccuracy detected for output 'output', element 0. +// Output quant params: scale=0.0028538699261844158, zero_point=6050. +// Expected val: 169.76341247558594 +// QNN QDQ val: -16.675161361694336 (err 186.43856811523438) +// CPU QDQ val: 169.762451171875 (err 0.0009613037109375) +TEST_F(QnnHTPBackendTests, DISABLED_MatMulOp_Broadcast_A16_W16) { + std::vector input_a = GetFloatDataInRange(-10.0f, 10.0f, 28 * 64); + std::vector input_b = GetFloatDataInRange(-10.0f, 10.0f, 64 * 32); + + RunQDQMatMulOpOpTest(TestInputDef({28, 1, 64}, false, input_a), + TestInputDef({64, 32}, true, input_b), + ExpectedEPNodeAssignment::All, + 18, + true); // Use com.microsoft Q/DQ ops } #endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 80b929e9dafbe..a441e828c0cc6 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -260,8 +260,8 @@ TEST_F(QnnCPUBackendTests, TestNHWCResizeShapeInference_sizes_opset18) { TEST_F(QnnHTPBackendTests, TestNHWCResizeShapeInference_qdq_sizes_opset18) { RunNHWCResizeModel(ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx", true); } -#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) +#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__) #endif // !defined(ORT_MINIMAL_BUILD) } // namespace test diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc index 548f80675a622..724e9a11cd781 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc @@ -116,7 +116,8 @@ void InferenceModel(const std::string& model_data, const char* log_id, ASSERT_STATUS_OK(session_object.Run(run_options, feeds, output_names, &output_vals)); } -NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale) { +NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale, + bool use_contrib_qdq) { NodeArg* bias_int32 = nullptr; // Bias must be int32 to be detected as a QDQ node unit. @@ -124,7 +125,8 @@ NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), static_cast(rand_info.min / bias_scale), + TestInputDef bias_int32_def(bias_def.GetShape(), bias_def.IsInitializer(), + static_cast(rand_info.min / bias_scale), static_cast(rand_info.max / bias_scale)); bias_int32 = MakeTestInput(builder, bias_int32_def); } else { @@ -143,7 +145,7 @@ NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef(bias_int32, bias_scale, 0, bias); + builder.AddDequantizeLinearNode(bias_int32, bias_scale, 0, bias, use_contrib_qdq); return bias; } diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h index 1b0b85319918f..fd572fa17f2b1 100644 --- a/onnxruntime/test/providers/qnn/qnn_test_utils.h +++ b/onnxruntime/test/providers/qnn/qnn_test_utils.h @@ -266,6 +266,8 @@ inline void TestQDQModelAccuracy(const GetTestModelFn& f32_model_fn, const GetTe std::vector output_names; InferenceModel(f32_model_data, "f32_model_logger", nullptr, ExpectedEPNodeAssignment::All, f32_helper.feeds_, output_names, cpu_f32_outputs); + ASSERT_FALSE(cpu_f32_outputs.empty()); + const size_t num_outputs = cpu_f32_outputs.size(); // Compute output range(s) and quantization params. @@ -432,7 +434,8 @@ inline NodeArg* MakeTestInput(ModelTestBuilder& builder, const TestInputDef manual quantization (int32) => DQ => final float bias -NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale); +NodeArg* MakeTestQDQBiasInput(ModelTestBuilder& builder, const TestInputDef& bias_def, float bias_scale, + bool use_contrib_qdq = false); /** * Returns a function that builds a model with a single operator with N inputs of the same element type. @@ -479,9 +482,10 @@ template inline GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_type, const std::vector>& input_defs, const std::vector& attrs, - const std::string& op_domain = kOnnxDomain) { - return [op_type, input_defs, attrs, op_domain](ModelTestBuilder& builder, - std::vector>& output_qparams) { + const std::string& op_domain = kOnnxDomain, + bool use_contrib_qdq = false) { + return [op_type, input_defs, attrs, op_domain, + use_contrib_qdq](ModelTestBuilder& builder, std::vector>& output_qparams) { std::vector op_inputs; op_inputs.reserve(input_defs.size()); @@ -489,7 +493,7 @@ inline GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_ty NodeArg* input = MakeTestInput(builder, input_def); QuantParams input_qparams = GetTestInputQuantParams(input_def); NodeArg* input_after_qdq = AddQDQNodePair(builder, input, input_qparams.scale, - input_qparams.zero_point); + input_qparams.zero_point, use_contrib_qdq); op_inputs.push_back(input_after_qdq); } @@ -503,7 +507,7 @@ inline GetTestQDQModelFn BuildQDQOpTestCase(const std::string& op_ty // op_output -> Q -> DQ -> output AddQDQNodePairWithOutputAsGraphOutput(builder, op_output, output_qparams[0].scale, - output_qparams[0].zero_point); + output_qparams[0].zero_point, use_contrib_qdq); }; } @@ -563,4 +567,4 @@ bool ReduceOpHasAxesInput(const std::string& op_type, int opset_version); } // namespace test } // namespace onnxruntime -#endif // !defined(ORT_MINIMAL_BUILD) \ No newline at end of file +#endif // !defined(ORT_MINIMAL_BUILD) diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc index c3c2b578a1bd0..57252f93492e5 100644 --- a/onnxruntime/test/providers/qnn/reduce_op_test.cc +++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc @@ -648,4 +648,4 @@ TEST_F(QnnHTPBackendTests, ReduceMeanS8Opset18) { } // namespace test } // namespace onnxruntime -#endif \ No newline at end of file +#endif diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc index eed12af3c703c..63498982930f5 100644 --- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc +++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc @@ -104,6 +104,7 @@ static void RunQDQOpTest(const std::string& op_type, int opset_version, ExpectedEPNodeAssignment expected_ep_assignment, const std::string& op_domain = kOnnxDomain, + bool use_contrib_qdq = false, float fp32_abs_err = 1e-4f) { ProviderOptions provider_options; #if defined(_WIN32) @@ -113,7 +114,7 @@ static void RunQDQOpTest(const std::string& op_type, #endif TestQDQModelAccuracy(BuildOpTestCase(op_type, input_defs, attrs, op_domain), - BuildQDQOpTestCase(op_type, input_defs, attrs, op_domain), + BuildQDQOpTestCase(op_type, input_defs, attrs, op_domain, use_contrib_qdq), provider_options, opset_version, expected_ep_assignment, @@ -151,6 +152,17 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Sigmoid) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Sigmoid. +TEST_F(QnnHTPBackendTests, UnaryOp_Sigmoid_U16) { + RunQDQOpTest("Sigmoid", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use MS domain Q/DQ ops +} + // Test the accuracy of QDQ Tanh. TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) { RunQDQOpTest("Tanh", @@ -160,6 +172,17 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Tanh) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Tanh. +TEST_F(QnnHTPBackendTests, UnaryOp_Tanh_U16) { + RunQDQOpTest("Tanh", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 13, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use MS domain Q/DQ ops +} + // Check that QNN compiles DQ -> Gelu -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Gelu) { @@ -171,6 +194,24 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Gelu) { kMSDomain); // GeLu is a contrib op. } +// Tests accuracy of 16-bit QDQ GeLu. +// TODO(adrianlizarraga): Inaccuracy detected for output 'output', element 5. +// Output quant params: scale=0.00015259021893143654, zero_point=0. +// Expected val: 10 +// QNN QDQ val: 9.997406005859375 (err 0.002593994140625) +// CPU QDQ val: 9.999847412109375 (err 0.000152587890625) +TEST_F(QnnHTPBackendTests, UnaryOp_Gelu_U16) { + const std::vector input_data = {-10.0f, -8.4f, 0.0f, 4.3f, 7.1f, 10.0f}; + RunQDQOpTest("Gelu", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 11, + ExpectedEPNodeAssignment::All, + kMSDomain, // GeLu is a contrib op. + true, // Use MS domain Q/DQ ops. + 0.0025f); // TODO(adrianlizarraga): Accuracy +} + // Check that QNN compiles DQ -> Elu -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Elu) { @@ -181,6 +222,23 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Elu) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Elu. +// TODO(adrianlizarraga): Re-enable. This works on QNN SDK 2.14.1! +// Inaccuracy detected for output 'output', element 1. +// Output quant params: scale=0.00011093531065853313, zero_point=8992. +// Expected val: -0.99751651287078857 +// QNN QDQ val: 6.2726154327392578 (err 7.2701320648193359) +// CPU QDQ val: -0.99753034114837646 (err 1.3828277587890625e-05) +TEST_F(QnnHTPBackendTests, DISABLE_UnaryOp_Elu_U16) { + RunQDQOpTest("Elu", + {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, + {}, + 11, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); +} + // Tests accuracy of QDQ Relu // TODO: Relu does not set negative values to zero! // Could be due to ORT's ReluQuantFusion! @@ -208,6 +266,24 @@ TEST_F(QnnHTPBackendTests, UnaryOp_HardSwish) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ HardSwish +// TODO(adrianlizarraga): Inaccuracy detected for output 'output', element 5. +// Output quant params: scale=0.00015259021893143654, zero_point=0. +// Expected val: 10 +// QNN QDQ val: 9.999237060546875 (err 0.000762939453125) +// CPU QDQ val: 9.999847412109375 (err 0.000152587890625) +TEST_F(QnnHTPBackendTests, UnaryOp_HardSwish_U16) { + const std::vector input_data = {-10.0f, -8.4f, 0.0f, 4.3f, 7.1f, 10.0f}; + RunQDQOpTest("HardSwish", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 14, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true, + 0.001f); // TODO(adrianlizarraga): Remove additional tolerance needed for inaccuracy +} + // Check that QNN compiles DQ -> Atan -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Atan) { @@ -218,6 +294,24 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Atan) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Atan +// TODO(adrianlizarraga): Inaccuracy detected for output 'output', element 1. +// Output quant params: scale=4.4895936298416927e-05, zero_point=32768. +// Expected val: -1.4219063520431519 +// QNN QDQ val: -1.4220787286758423 (err 0.00017237663269042969) +// CPU QDQ val: -1.4218991994857788 (err 7.152557373046875e-06) +TEST_F(QnnHTPBackendTests, UnaryOp_Atan_U16) { + const std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 6); + RunQDQOpTest("Atan", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 14, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Atan domain + true, // Q/DQ op domain is com.microsoft + 1.8e-4f); +} + // Check that QNN compiles DQ -> Asin -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Asin) { @@ -238,6 +332,18 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Sign) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Sign +TEST_F(QnnHTPBackendTests, UnaryOp_Sign_U16) { + const std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 6); + RunQDQOpTest("Sign", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Sign op domain + true); // Use com.microsoft Q/DQ op domains +} + // Check that QNN compiles DQ -> Sin -> Q as a single unit. // Use an input of rank 3. TEST_F(QnnHTPBackendTests, UnaryOp_Sin) { @@ -260,7 +366,7 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Cos) { // Check that QNN compiles DQ -> Cos -> Q as a single unit. // Use an input of rank 3. -TEST_F(QnnHTPBackendTests, UnaryOp_Cos_Inaccurate) { +TEST_F(QnnHTPBackendTests, UnaryOp_Cos_InaccurateFixed) { RunQDQOpTest("Cos", {TestInputDef({1, 2, 3}, false, {-3.14159f, -1.88436f, -0.542863f, 0.0f, 1.05622f, 3.14159f})}, {}, @@ -326,6 +432,18 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Round) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Log +TEST_F(QnnHTPBackendTests, UnaryOp_Log_U16) { + const std::vector input_data = GetFloatDataInRange(1.0f, 128.0f, 6); + RunQDQOpTest("Log", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 11, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Log op domain + true); // Use com.microsoft domain for Q/DQ ops +} + // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that the default axis (-1) for SoftMax opset 13 works. TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) { @@ -336,6 +454,18 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) { ExpectedEPNodeAssignment::All); } +// Tests accuracy of 16-bit QDQ Softmax (opset 13) with default axis +TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_DefaultAxis) { + const std::vector input_data = GetFloatDataInRange(-5.0f, 5.0f, 6); + RunQDQOpTest("Softmax", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, // Uses default axis of -1 for opset 13 + 13, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Sofmax's domain + true); // Use com.microsoft domain for Q/DQ ops +} + // Check that QNN compiles DQ -> Softmax -> Q as a single unit. // Test that an axis != -1 is not supported. TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) { @@ -410,7 +540,7 @@ TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax11_SetValidAxis) { ExpectedEPNodeAssignment::All); } -// Test QDQ Abs op. +// Test accuracy of QDQ Abs op. TEST_F(QnnHTPBackendTests, UnaryOp_Abs) { RunQDQOpTest("Abs", {TestInputDef({1, 2, 3}, false, GetFloatDataInRange(-10.0f, 10.0f, 6))}, @@ -419,7 +549,19 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Abs) { ExpectedEPNodeAssignment::All); } -// Test QDQ Ceil op. +// Test accuracy of 16-bit QDQ Abs op. +TEST_F(QnnHTPBackendTests, UnaryOp_Abs_U16) { + const std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 6); + RunQDQOpTest("Abs", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Abs op's domain + true); // Use com.microsoft domain for Q/DQ ops +} + +// Test accuracy of QDQ Ceil op. TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) { const std::vector input_data = GetFloatDataInRange(-12.0f, 12.0f, 6); RunQDQOpTest("Ceil", @@ -429,6 +571,18 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Ceil) { ExpectedEPNodeAssignment::All); } +// Test accuracy of 16-bit QDQ Ceil op. +TEST_F(QnnHTPBackendTests, UnaryOp_Ceil_U16) { + const std::vector input_data = GetFloatDataInRange(-12.0f, 12.0f, 6); + RunQDQOpTest("Ceil", + {TestInputDef({1, 2, 3}, false, input_data)}, + {}, + 13, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Ceil op's domain + true); // Use com.microsoft domain for Q/DQ ops +} + // Test QDQ Floor op. TEST_F(QnnHTPBackendTests, UnaryOp_Floor) { const std::vector input_data = GetFloatDataInRange(-12.0f, 12.0f, 6); @@ -457,6 +611,26 @@ TEST_F(QnnHTPBackendTests, DepthToSpaceOp_CRD) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ DepthToSpace. +TEST_F(QnnHTPBackendTests, DepthToSpaceOp_U16_CRD) { + const std::vector X = {0., 1., 2., + 3., 4., 5., + 9., 10., 11., + 12., 13., 14., + 18., 19., 20., + 21., 22., 23., + 27., 28., 29., + 30., 31., 32.}; + RunQDQOpTest("DepthToSpace", + {TestInputDef({1, 4, 2, 3}, false, X)}, + {utils::MakeAttribute("blocksize", static_cast(2)), + utils::MakeAttribute("mode", "CRD")}, + 11, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Op's domain + true); // Use com.microsoft domain for Q/DQ ops +} + // Test QDQ DepthToSpace. TEST_F(QnnHTPBackendTests, DepthToSpaceOp_DCR) { const std::vector X = {0., 1., 2., @@ -489,6 +663,22 @@ TEST_F(QnnHTPBackendTests, SpaceToDepthOp) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ SpaceToDepth. +TEST_F(QnnHTPBackendTests, SpaceToDepthOp_U16) { + const std::vector X = {0.0f, 0.1f, 0.2f, 0.3f, + 1.0f, 1.1f, 1.2f, 1.3f, + + 2.0f, 2.1f, 2.2f, 2.3f, + 3.0f, 3.1f, 3.2f, 3.3f}; + RunQDQOpTest("SpaceToDepth", + {TestInputDef({1, 2, 2, 4}, false, X)}, + {utils::MakeAttribute("blocksize", static_cast(2))}, + 11, + ExpectedEPNodeAssignment::All, + kOnnxDomain, // Op's domain + true); // Use com.microsoft domain for Q/DQ ops +} + // Run QDQ model on HTP twice // 1st run will generate the Qnn context cache binary file // 2nd run will load and run from Qnn context cache binary file @@ -561,7 +751,7 @@ TEST_F(QnnHTPBackendTests, QuantAccuracyTest) { ExpectedEPNodeAssignment::All); } -// Test QDQ Add +// Test 8-bit QDQ Add TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) { RunQDQOpTest("Add", {TestInputDef({1, 2, 2, 2}, false, -10.0f, 10.0f), @@ -571,7 +761,20 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Add4D) { ExpectedEPNodeAssignment::All); } -// Test QDQ Sub +// Test 16-bit QDQ Add +TEST_F(QnnHTPBackendTests, BinaryOp_Add4D_U16) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 8); + RunQDQOpTest("Add", + {TestInputDef({1, 2, 2, 2}, false, input_data), + TestInputDef({1, 2, 2, 2}, false, input_data)}, + {}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use com.microsoft Q/DQ ops +} + +// Test 8-bit QDQ Sub TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) { RunQDQOpTest("Sub", {TestInputDef({1, 3, 8, 8}, false, -10.0f, 10.0f), @@ -581,6 +784,20 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ Sub +TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_U16) { + std::vector input0_data = GetFloatDataInRange(-10.0f, 10.0f, 8); + std::vector input1_data = GetFloatDataInRange(0.0f, 20.0f, 8); + RunQDQOpTest("Sub", + {TestInputDef({1, 2, 2, 2}, false, input0_data), + TestInputDef({1, 2, 2, 2}, false, input1_data)}, + {}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use com.microsoft Q/DQ ops +} + TEST_F(QnnHTPBackendTests, BinaryOp_Sub4D_LargeInputs) { RunQDQOpTest("Sub", {TestInputDef({1, 3, 768, 1152}, false, -1.0f, 1.0f), @@ -656,6 +873,20 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_SmallInputs) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ Sub with small input values. +TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_U16_SmallInputs) { + std::vector input0_data = {-10.0f, -8.0f, -1.0f, 0.0f, 1.0f, 2.1f, 8.0f, 10.0f}; + std::vector input1_data = {5.0f, 4.0f, 1.0f, 1.0f, 1.0f, 4.0f, 4.0f, 5.0f}; + RunQDQOpTest("Div", + {TestInputDef({1, 2, 2, 2}, false, input0_data), + TestInputDef({1, 2, 2, 2}, false, input1_data)}, + {}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use com.microsoft Q/DQ ops +} + // TODO: Enable when this is fixed. // QNN v2.13: Inaccuracy detected for output 'output', element 2551923. // Output quant params: scale=4100.92626953125, zero_point=126. @@ -680,7 +911,7 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Div4D_Broadcast) { ExpectedEPNodeAssignment::All); } -// Test QDQ Mul +// Test 8-bit QDQ Mul TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D) { std::vector input_data = GetFloatDataInRange(-10.0, 10.0f, 8); RunQDQOpTest("Mul", @@ -691,6 +922,19 @@ TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ Mul +TEST_F(QnnHTPBackendTests, BinaryOp_Mul4D_U16) { + std::vector input_data = GetFloatDataInRange(-10.0f, 10.0f, 8); + RunQDQOpTest("Mul", + {TestInputDef({1, 2, 2, 2}, false, input_data), + TestInputDef({1, 2, 2, 2}, false, input_data)}, + {}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use com.microsoft Q/DQ ops +} + // Test And TEST_F(QnnHTPBackendTests, BinaryOp_And4D) { RunOpTest("And", @@ -711,7 +955,7 @@ TEST_F(QnnHTPBackendTests, BinaryOp_HTP_Or_Unsupported) { ExpectedEPNodeAssignment::None); } -// Test QDQ GridSample with bilinear +// Test 8-bit QDQ GridSample with bilinear TEST_F(QnnHTPBackendTests, GridSample_Bilinear) { RunQDQOpTest("GridSample", {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), @@ -723,7 +967,21 @@ TEST_F(QnnHTPBackendTests, GridSample_Bilinear) { ExpectedEPNodeAssignment::All); } -// Test QDQ GridSample with align corners +// Test 16-bit QDQ GridSample with bilinear +TEST_F(QnnHTPBackendTests, GridSample_U16_Bilinear) { + RunQDQOpTest("GridSample", + {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), + TestInputDef({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))}, + {utils::MakeAttribute("align_corners", static_cast(0)), + utils::MakeAttribute("mode", "bilinear"), + utils::MakeAttribute("padding_mode", "zeros")}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use com.microsoft Q/DQ ops +} + +// Test 8-bit QDQ GridSample with align corners TEST_F(QnnHTPBackendTests, GridSample_AlignCorners) { RunQDQOpTest("GridSample", {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), @@ -735,6 +993,20 @@ TEST_F(QnnHTPBackendTests, GridSample_AlignCorners) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ GridSample with align corners +TEST_F(QnnHTPBackendTests, GridSample_U16_AlignCorners) { + RunQDQOpTest("GridSample", + {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), + TestInputDef({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))}, + {utils::MakeAttribute("align_corners", static_cast(1)), + utils::MakeAttribute("mode", "bilinear"), + utils::MakeAttribute("padding_mode", "zeros")}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); // Use com.microsoft Q/DQ ops +} + // Test QDQ GridSample with padding mode: border // Inaccuracy detected for output 'output', element 0. // Output quant params: scale=0.046370312571525574, zero_point=129. @@ -751,7 +1023,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_GridSample_BorderPadding) { ExpectedEPNodeAssignment::All); } -// Test QDQ GridSample with nearest mode +// Test 8-bit QDQ GridSample with nearest mode TEST_F(QnnHTPBackendTests, GridSample_Nearest) { RunQDQOpTest("GridSample", {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), @@ -761,6 +1033,18 @@ TEST_F(QnnHTPBackendTests, GridSample_Nearest) { ExpectedEPNodeAssignment::All); } +// Test 16-bit QDQ GridSample with nearest mode +TEST_F(QnnHTPBackendTests, GridSample_U16_Nearest) { + RunQDQOpTest("GridSample", + {TestInputDef({1, 1, 3, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 6)), + TestInputDef({1, 2, 4, 2}, false, GetFloatDataInRange(-10.0f, 10.0f, 16))}, + {utils::MakeAttribute("mode", "nearest")}, + 17, + ExpectedEPNodeAssignment::All, + kOnnxDomain, + true); +} + // Test QDQ GridSample with reflection padding mode // Inaccuracy detected for output 'output', element 2. // Output quant params: scale=0.024269860237836838, zero_point=0. @@ -801,4 +1085,4 @@ TEST_F(QnnHTPBackendTests, VariadicOp_Concat_2Inputs_2ndAxis) { } // namespace test } // namespace onnxruntime -#endif \ No newline at end of file +#endif diff --git a/onnxruntime/test/python/quantization/test_qdq.py b/onnxruntime/test/python/quantization/test_qdq.py index 3c5f516af4846..5c2db435d7fb5 100644 --- a/onnxruntime/test/python/quantization/test_qdq.py +++ b/onnxruntime/test/python/quantization/test_qdq.py @@ -566,28 +566,30 @@ def construct_model_conv_relu(self, output_model_path, input_shape, weight_shape onnx.save(model, output_model_path) - def verify(self, per_channel, is_quant_type_int8): + def verify_qdq(self, per_channel, activation_type, weight_type, extra_options=None): np.random.seed(1) model_fp32_path = str(Path(self._tmp_model_dir.name) / f"conv_relu_fp32.{per_channel}.onnx") - model_int8_qdq_path = str(Path(self._tmp_model_dir.name) / f"conv_relu_quant_qdq.{per_channel}.onnx") - model_int8_qop_path = str(Path(self._tmp_model_dir.name) / f"conv_relu_quant_qop.{per_channel}.onnx") + model_qdq_path = str( + Path(self._tmp_model_dir.name) / f"conv_relu_quant_qdq.{activation_type}.{weight_type}.{per_channel}.onnx" + ) data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]}) self.construct_model_conv_relu(model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31]) quantize_static( model_fp32_path, - model_int8_qdq_path, + model_qdq_path, data_reader, quant_format=QuantFormat.QDQ, per_channel=per_channel, reduce_range=per_channel, - activation_type=QuantType.QInt8 if is_quant_type_int8 else QuantType.QUInt8, - weight_type=QuantType.QInt8 if is_quant_type_int8 else QuantType.QUInt8, + activation_type=activation_type, + weight_type=weight_type, + extra_options=extra_options, ) data_reader.rewind() # topo sort check check_op_type_order( self, - model_int8_qdq_path, + model_qdq_path, [ "DequantizeLinear", "QuantizeLinear", @@ -597,9 +599,15 @@ def verify(self, per_channel, is_quant_type_int8): "DequantizeLinear", ], ) - check_model_correctness(self, model_fp32_path, model_int8_qdq_path, data_reader.get_next()) + check_model_correctness(self, model_fp32_path, model_qdq_path, data_reader.get_next()) + + def verify_qop(self, per_channel, is_quant_type_int8): + np.random.seed(1) + model_fp32_path = str(Path(self._tmp_model_dir.name) / f"conv_relu_fp32.{per_channel}.onnx") + model_int8_qop_path = str(Path(self._tmp_model_dir.name) / f"conv_relu_quant_qop.{per_channel}.onnx") + data_reader = self.input_feeds(1, {"input": [1, 8, 33, 33]}) + self.construct_model_conv_relu(model_fp32_path, [1, 8, 33, 33], [16, 8, 3, 3], [1, 16, 31, 31]) - data_reader.rewind() quantize_static( model_fp32_path, model_int8_qop_path, @@ -617,10 +625,25 @@ def verify(self, per_channel, is_quant_type_int8): def test_quantize_conv_without_bias(self): # only test cases per_channel=True and reduce_range=True to avoid saturation on avx2 and avx512 for weight type int8 - self.verify(True, True) # per_channel:False, is_quant_type_int8:True + self.verify_qdq(True, QuantType.QInt8, QuantType.QInt8) # per_channel:True + self.verify_qop(True, True) # per_channel:True, is_quant_type_int8:True - self.verify(False, False) # per_channel:False, is_quant_type_int8:False - self.verify(True, False) # per_channel:True, is_quant_type_int8:False + self.verify_qdq(False, QuantType.QUInt8, QuantType.QUInt8) # per_channel:False + self.verify_qop(False, False) # per_channel:False, is_quant_type_int8:False + + self.verify_qdq(True, QuantType.QUInt8, QuantType.QUInt8) # per_channel:True + self.verify_qop(True, False) # per_channel:True, is_quant_type_int8:False + + # 16-bit QDQ via contrib ops + self.verify_qdq(False, QuantType.QUInt16, QuantType.QUInt16, {"UseQDQContribOps": True}) + self.verify_qdq(False, QuantType.QInt16, QuantType.QInt16, {"UseQDQContribOps": True}) + self.verify_qdq(False, QuantType.QUInt16, QuantType.QUInt8, {"UseQDQContribOps": True}) + self.verify_qdq(False, QuantType.QInt16, QuantType.QInt8, {"UseQDQContribOps": True}) + + self.verify_qdq(True, QuantType.QUInt16, QuantType.QUInt16, {"UseQDQContribOps": True}) + self.verify_qdq(True, QuantType.QInt16, QuantType.QInt16, {"UseQDQContribOps": True}) + self.verify_qdq(True, QuantType.QUInt16, QuantType.QUInt8, {"UseQDQContribOps": True}) + self.verify_qdq(True, QuantType.QInt16, QuantType.QInt8, {"UseQDQContribOps": True}) def test_quantize_relu_conv(self): float_model_path = str(Path(self._tmp_model_dir.name) / "float_relu_convs_model.onnx") diff --git a/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py b/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py index 3df127f5d356d..f74342403f4c3 100644 --- a/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py +++ b/onnxruntime/test/testdata/transform/convert_qdq_ops_to_ms_domain.py @@ -1,59 +1,154 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- """ Loads a model and updates the domain of QuantizeLinear and DequantizeLinear nodes to 'com.microsoft'. +Optionally updates zero-points to 16bit data types. + This is used to create models for testing QDQ transformations with the contrib QDQ ops. -Usage: python3 convert_qdq_ops_to_ms_domain.py +Usage: +python3 convert_qdq_ops_to_ms_domain.py --input_model --output_model --use_16bit_qdq Models created with this script: - qdq_with_multi_consumer_dq_nodes.fixed.qdq_contrib.onnx +- qdq_with_multi_consumer_dq_nodes.fixed.qdq16_contrib.onnx - fusion/constant_folding_dequantizelinear.qdq_contrib.onnx +- fusion/constant_folding_dequantizelinear.qdq16_contrib.onnx - fusion/constant_folding_qdq_node_unit.qdq_contrib.onnx +- fusion/constant_folding_qdq_node_unit.qdq16_contrib.onnx - fusion/constant_folding_qdq_node_unit.graph_output.qdq_contrib.onnx +- fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx """ +from __future__ import annotations + +import argparse import os +import struct import sys import onnx +from onnx import shape_inference QDQ_OPS = ("QuantizeLinear", "DequantizeLinear") - - -def print_usage(prog_name: str): - """ - Prints the program's command-line arguments and usage. - """ - - print(f"Usage: {prog_name} ") - - -def update_qdq_node_domains(graph): - """ - Updates the domain of all QuantizeLinear and DequantizeLinear nodes - in a graph to 'com.microsoft'. - """ +QDQ_CONVERT_TYPES = {onnx.TensorProto.UINT8: onnx.TensorProto.UINT16, onnx.TensorProto.INT8: onnx.TensorProto.INT16} +TYPE_TO_STRUCT_LABEL = { + onnx.TensorProto.UINT8: "B", + onnx.TensorProto.INT8: "b", + onnx.TensorProto.UINT16: "H", + onnx.TensorProto.INT16: "h", +} + + +def convert_initializer_to_16bits(initializer: onnx.TensorProto, target_type: onnx.TensorProto.DataType): + byte_order = ">" if sys.byteorder == "big" else "<" + byte_label = TYPE_TO_STRUCT_LABEL[initializer.data_type] + short_label = TYPE_TO_STRUCT_LABEL[target_type] + + # Do not support external data + if initializer.HasField("data_location") and initializer.data_location == onnx.TensorProto.EXTERNAL: + raise Exception("Do not support initializers with external data") + + # Need to convert raw_data bytes to 16-bit values. + # NOTE: For tensors that use .int32_data instead of .raw_data, we don't need any special handling + # other than updating the data type. This is because the upper 24 bits are already cleared to zero. + if initializer.HasField("raw_data"): + num_byte_vals = len(initializer.raw_data) + + # Extract 8-bit values as int32s + int32_vals = struct.unpack(f"{byte_order}{num_byte_vals}{byte_label}", initializer.raw_data) + + # Repack int32 values as 16-bit values + initializer.raw_data = struct.pack(f"{byte_order}{num_byte_vals}{short_label}", *int32_vals) + + initializer.data_type = target_type + + +def convert_qdq_op_to_16bit( + name_to_initializer: dict[str, onnx.TensorProto], + name_to_values: dict[str, onnx.ValueInfoProto], + name_to_inputs: dict[str, onnx.ValueInfoProto], + name_to_outputs: dict[str, onnx.ValueInfoProto], + node: onnx.NodeProto, +): + zp_input = node.input[2] if len(node.input) > 2 else None + + if zp_input in name_to_initializer: + zp_initializer = name_to_initializer[zp_input] + + zp_target_type = QDQ_CONVERT_TYPES.get(zp_initializer.data_type) + if zp_target_type: + convert_initializer_to_16bits(zp_initializer, zp_target_type) + + if node.op_type == "DequantizeLinear": + input0 = node.input[0] + + if input0 in name_to_initializer: + input_initializer = name_to_initializer[input0] + input_target_type = QDQ_CONVERT_TYPES.get(input_initializer.data_type) + if input_target_type: + convert_initializer_to_16bits(input_initializer, input_target_type) + elif input0 in name_to_values: + input_val = name_to_values[input0] + input_target_type = QDQ_CONVERT_TYPES.get(input_val.type.tensor_type.elem_type) + if input_target_type: + input_val.type.tensor_type.elem_type = input_target_type + elif input0 in name_to_inputs: + input_val = name_to_inputs[input0] + input_target_type = QDQ_CONVERT_TYPES.get(input_val.type.tensor_type.elem_type) + if input_target_type: + input_val.type.tensor_type.elem_type = input_target_type + else: + # QuantizeLinear + output0 = node.output[0] + + if output0 in name_to_values: + output_val = name_to_values[output0] + output_target_type = QDQ_CONVERT_TYPES.get(output_val.type.tensor_type.elem_type) + if output_target_type: + output_val.type.tensor_type.elem_type = output_target_type + elif output0 in name_to_outputs: + output_val = name_to_outputs[output0] + output_target_type = QDQ_CONVERT_TYPES.get(output_val.type.tensor_type.elem_type) + if output_target_type: + output_val.type.tensor_type.elem_type = output_target_type + else: + raise Exception("Only support Q/DQ ops with explicit zero-point inputs") + + +def update_qdq_node_domains(graph: onnx.GraphProto, use_16bit_qdq: bool): + name_to_initializer = {initializer.name: initializer for initializer in graph.initializer} + name_to_values = {value.name: value for value in graph.value_info} + name_to_inputs = {g_input.name: g_input for g_input in graph.input} + name_to_outputs = {g_output.name: g_output for g_output in graph.output} for node in graph.node: # Handle subgraphs: for attr in node.attribute: if attr.type == onnx.AttributeProto.GRAPH: - update_qdq_node_domains(attr.g) + update_qdq_node_domains(attr.g, use_16bit_qdq) elif attr.type == onnx.AttributeProto.GRAPHS: for subgraph in attr.graphs: - update_qdq_node_domains(subgraph) + update_qdq_node_domains(subgraph, use_16bit_qdq) # Update Q/DQ domains if node.op_type in QDQ_OPS: node.domain = "com.microsoft" + if use_16bit_qdq: + convert_qdq_op_to_16bit(name_to_initializer, name_to_values, name_to_inputs, name_to_outputs, node) + def main(): - prog_name, *argv = sys.argv + parser = argparse.ArgumentParser(description="Convert Q/DQ ops to com.microsoft domain (or 16-bit)") + parser.add_argument("--input_model", type=str, required=True, help="Input onnx model path") + parser.add_argument("--output_model", type=str, required=False, help="Output onnx model path") + parser.add_argument("--use_16bit_qdq", required=False, action="store_true", help="Convert to 16-bit QDQ") - if len(argv) != 1: - print_usage(prog_name) - sys.exit(1) + args = parser.parse_args() - model = onnx.load(argv[0]) + model = onnx.load(args.input_model) has_ms_domain = False for opset in model.opset_import: @@ -64,10 +159,18 @@ def main(): if not has_ms_domain: model.opset_import.extend([onnx.helper.make_opsetid("com.microsoft", 1)]) - update_qdq_node_domains(model.graph) + update_qdq_node_domains(model.graph, args.use_16bit_qdq) + model = shape_inference.infer_shapes(model) onnx.checker.check_model(model, True) - base_model_name = os.path.splitext(argv[0])[0] - onnx.save_model(model, base_model_name + ".qdq_contrib.onnx") + + output_model_path = args.output_model + if not output_model_path: + base_model_name = os.path.splitext(args.input_model)[0] + suffix = ".qdq16_contrib" if args.use_16bit_qdq else ".qdq_contrib" + output_model_path = base_model_name + suffix + ".onnx" + + onnx.save_model(model, output_model_path) + print(f"[INFO] Saved model: {output_model_path}") if __name__ == "__main__": diff --git a/onnxruntime/test/testdata/transform/fusion/constant_folding_dequantizelinear.qdq16_contrib.onnx b/onnxruntime/test/testdata/transform/fusion/constant_folding_dequantizelinear.qdq16_contrib.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8fc884024b00f22f1b7d95cc7003004177cf784e GIT binary patch literal 3938 zcmb7{ZBSJA6~@mZnC(qXHydLtIqQHUm1O^i)5+P8GlerP{*+Rn7onSSl3&h+KO_IEF8Rz=&)&fUHD z|3BwB&w0){ckeYnVO`|Yv^raD*rA0yF*P$|wJpcCdi^`gl`fO=jMJw@>=hnI(0g>!GBfX;+Bd3qF^W8z>Ih_69$d+OQb~8z zd4fw#6Q;EFi<~~M&FgUnd_iATNI7p(o;ARts>kIH%})qA_SMRgRJ)iO@Y(%7j~22n zPhEd7c;;hwKO@;gCMCn8`NN@9aaJ4TEXKM$h1TM^mEt=km{TWn-qoz9?zeiZVnZfn zUXcHX(~>z(4~g`M_0K3zv+@WW<*`LjK5Z=PLSHz9zL|4Nny*sLO5Z@YW`ADEa0Pr} zztKe5&njj|I79eSB_X(Sz#J*8#cNSC-c_REy~QGO(KAZ?MT+~7DQTjhYv za71Mnm^qxqVcY@7dNS#f6L1^>=_fKk4#(u@a#Y5_@{!#HYwgmHU9J3DZgV)vteR{a zxH=|Y`3TE{G3Za({^RkGmckw3#`2i7C_`sEF=vNkEN${M+d7F#$UBA;QIM$H*< zkWVKOPr`nX&p3YDxc-{#l{}f1F%WJ_4^}}r%i0kXdI>~b?IcmkL3#C%4dQ)bRWr(aN!FBga;_7m)2m)SPC&4(nF5a?9J;Cgfe}xl_ux z9+G#!+(i`gGgPb{jHto53H6$YvK!=1Y%AdO23hyXDi9{In}*dc^z)fNUbypI5nri-(%wDDgqvT&lv}sUp@rlR}(IOWX zV^sW?RI*Wi3->V0R>9>kXHEEM#P5gUO?Xz&FJU@$2$fxA5vL>8f!HRmz%VX*=#Cp8 z3!V)I(rx1&HweoiFdO0D2a6%H*~7gsNKPYYc>}DA_;<78f@=%-F&NbITaU_4tWTlv zZg4epchNPkVSR|)+UTwi$$JOLZCI+ruLU813d88T8^1I7&!YbZIB8V!0{6K_kjGFb zj)t{p(#?Dq<1>Q#qp<%yb<(5s!lZ_39Kv@yT@hiwoxPppGA38$W8$`wNt9_mN8Azm zYlK)IGhbD7UxEAv>jCEfO*FRSooiqnKszVB=V4mf={$jJm(GDFQoj!}_Gc<*6W zs$i67=zA8lLb6^*{Tfi;fm#D(!MFR*fgDXS%-e$VM7 zhl5N(50S%I^rA{GJYq0EPsS>Fc(CfE0(!T0a@T8Mmd9Zir54-KWFPAuBiaV^uLS7^ zD<{dh089OT-Hv80%)&7+6XZ5R^z~F|3Z|`mH_!u(MC{@Ic>!KQ`0JBZhu0Whe?XPP z==NPQTtuG%UnBAJOaBJ7;FOXAT|00kw?k2kG(d!dO-e44Ucnv9Gpw|yodDx zSPwAgJw&OcQwFGYF^b1fvlfm=s6(9370!2&`43QM0@X*zWFx(+-=7kAC0IR<$93*m zz3`84tf%(t$ZZDP&0M>oj)1LWMF^=w)x5-)jC12dUjQI(zO3%2&1~H}wh1cuvoL{?239Bc9i@OU)L`!umRee|3As(zJDi^|1t}+Ao4D2e`oEhiiqhS z$z=92v&mvg*GQ*le;|lr*!6!_4tz z{iyhoa_9N<0Sn!7g??1q=6^Of?9(4ctmds|{zvi$BHZIpz2@-`(_)OYPquX?y5haMm%cNma_Y_a{r4X4oj8AM%Bd)nL&2w7wn)^D6CC=iaIR3&*TLD*ylh literal 0 HcmV?d00001 diff --git a/onnxruntime/test/testdata/transform/fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx b/onnxruntime/test/testdata/transform/fusion/constant_folding_qdq_node_unit.graph_output.qdq16_contrib.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b9cae7f59f8e8bbcb9a1e33e1b7ba6ab1c7f0447 GIT binary patch literal 1433 zcmbVM&2G~`5cZ}ZsZdM&j* za^MxX@)SG-kHDRm-~#KE7_xDQ6j>v!XXodeZ)V4`oE^^CMK3ENCg~M%Zv?%5FF5)E z@Et*i3z@%<1c`Bg`|6vU#c*4{$|up3fUB#oh~PNolu75Qes@gf@m^6zB7si|nHKVK z7H7DSbeiM1NO9l>>l}3t&(=d9?M!&mOXx)KoS#Yfj9^nZPAlcP#0ATH()n#LqYIJ`i~$-nzMy7!=S3WoxJy}AiiqVZ8tL*>(nRqu;41_O3Vux;LK}{|8)Pl4dMNlEGI%LjHj2~@$vYz;AwOoNy^!q67d-q2@yJ_ z-w-Ti-30pnEJw-89O%I{&{;`w=DMp4;%vF`8y1s6lQK1nIDq!w>3gD{Kme`7@DTqq z*G~DrNTFM!(`nZE`-q^%Ykm6NGQB?UZmo8i9v#F9)H=etrZ|?Gr5RstQ;jzc`v=~Z zIymiW#vdg-CR6{Og=}-Udb5lghT-qqXjs2tMkb~7neV9>|JgO8#xKL~s33atM&^T` zpPu`B%3W(?nfKk@s_o7m>{`gNpUdR$zLFe-kh#u_2yiDOG2 zSG-u``tO?4mQGag#@hW~pPoAV(p-Ttjr-lq-uJ&e|7U-m|MP#(6)$|z*Y+%5KJeWuR?S(ya?!eZSAEqhzWbW*e#NWz{mPZI zzI5rV*Sxvsu0>1dEuXjYhvqJr_pZ0y^745Dy>ph&oi}&(-95`!&wk|--(UW^e0$~V zXWzAGPXFwwvF6Tt-E- zZwd)FFIqltPTz~KeEOAFf9>5p%f5TrqPzQg`g`WDob@M{&ia9_GxhfMES`7w%4flL zcStYT{|R)z>A68M`#gMKcQHd<^+Ola>mo2d1NOf5xv}?!yU&oly9{z3y1TLWbR%_T z@98>S1hQwq-q*hTg6uh~8+&)@KSTEJHp+SU?#ABJ4%L;tr|oqS7@q-qU-#VD`@-F4 z$lhHBIS<|4*n7H>y0Z6loh|~|GhpxQo+EpA=|4mE?l#JK`0mEu(+<^@y{GMU5g7mE zC9}RcE=d2%+4Fmr&Yj)UyK?sI>t6Ay^PG=wn(lVF6yGx4t@D=t(&_F$-GW~=qgLH5 z|FkQ)=6f!`E@oUDrN4cjeZFS282+$|z4???tZURo7jRCb|7= zG|8UkLR#b5XL=XiQl_R-5`JrA?~V|Pyz4K! z{xVI!Vbn$h)hnO0{%HP0UiC%Yv#C=F zlqa2pXZQ?PeI*8YPOSI@*G{owbA9_i+&OCoZqZYSdG;lCwXQQ6WQK0hjD|W-YPvZS zGaBz{;`Tg@*-cJo;1*rPO8zkuuA7~i(Q==z2d;WvM(wV|c6GO#7qy;Q{~&(Bi(cB* zY|fcG_o^ASdhuniojZ5d&P!*_sP(L?XEO5>s^jG|YI~k;=!;a zk!Kn&YnpszR{~A9we_Tb{;C=H=T8p6-+WF^(Tx1_fb^sfoX=15&s{lBn@su9E&uF>N&_&P7sNI#=A79cnuXR!Dne`R1?62tRfM5Ia_!(Ut@V3>A9q_hwSNExH zHA4ry+MXx;-5k2=@HFYZm`>fqcLomlbg(?X*y-k~6-3Vvp^NL*T}Ec$fd9kesGEFE zkE8$34)~0QdFB7iIJ~B79mo`*5J$>|J&nu}P;KW4&p3qNMG+-K;b=VjFHN^Ax}%|)%}D`S4I=^E5r zH>04YZPf-fFM8q1`LA@g=IX1iyyV4~|IFw# zQvY7f;MFfN#ufj3;`?uSW?5@m)?n|{=)_gvdF-|?rN&cFG)PG`@5>vTTy+nvrI z{a&ZD?hiVhfiHAAXMd~HnVWuZcuCWIF@0Y0;ZEnnuW6dk{!yp%-`?Cb@4T{UzW;YS zoqIC=pMPJ|Z2jh@dF+2}n)m;Fr*riTSJH(&vXTJQ`RJuh^UHbvS9u=_wBHp*`Otsw zbWY~|%lW<}t6Y%p|2?1I7&f{+uNPm^H0yHz_j2tg@_K((_}BUV%URJE{%fc6OKHFQ zvZk4ndt37Us;_ODEopOE`hCw|cRDZ3eEZXWh78$-(yb8Qc==~;dHGBQ=c%pRuPpC8 zr8A}-HyFPu*tkAedBrz2&0GGa(|J)a_3ioo>VMKS-y950eqGc2Wib9Xp~mkAOYh3- z`h1@ASDnto!S0{@ZKw17d9BH{eYyAh>HF2`_p-cRpVypV_tCFznmt!E&0GIVr}Khf zd}XfvO2&WV3ucV*C%0{;UB{FP_|ePX`0A@?ru@3_;?#&Q`JKOc_0{jY{)%tCV)G?e zXkdvmfGY<7h)$Mbo2Gu)hLCYyogP_wkz zl>5hZv$wgu?X|JlpF4Zf??CfN`W?xqA8GDM|FLE$*AL{=_GWXlBR!7f z|5(0FWQ6|aR6dP0XVPL@bFdk0K9un{HEWtxX}LeM|3b4qeecQDzFgUrJ{tqsU|LQD zvYzH>dhO4(1I_%*vpW!u<$YgXn*!s8jI+79HT{leT|+G#hnr=&zBw)SXO!{G`=P+F zEq#xs-&o#v=Y2ANhXe0;Ub`|E%Y}aYb zJ&u4R^K-Y(v!^xvZ@oo%aP`j zX#-;)ZI*<_Kaw^#hDxW>{=I?h4Z+dTaLRb|Q1H1TQ1pj?&ZNhJ^u9AKMl;^F=4H8a zJkV|mz19S_GlAg^;fdbRY;oG|%-;j~HrhNIhzGNdBk8{^kUXCDTY|?$;m?uqPH*P^ z@ys|o*In%mg+JKb*W8gg$C`hg84l+Cr^9Kxnwy(n&8WxH>P$FnRk#IJCWGlM&G$45 z(`#FJ>u7pD7VNBT-j;i(TJAZS)g5Wx9n6nqwZqL@GS1%ozqOf@Rcz1b`0!Nv;n49w zdLUQG+xF1oM00&sw5OSyb~x=w)5*;5$XrXavSq>U)UwZX-*K=F7pFZ$-ujB$)@R{GqJ(Et!2=#y!}4B)vWmZrK^U?atVrNS}dr zoV8i)j`SP}4JVtAg*v;Np9_W8XY8ZF%KH327arS`@lOPfErEJGu#AS^MnZx8`F6N@ zAdsI9HfW=f;Ho!RAt^)Yb8qN48R@$>ux!fz$+S3>(U%1S`!d4v+#3odKA15M<{qgW z%=^L2zAX@M4d(6(tZOpo+Vt+v=i?b+e|oLV`_VwWDpwBX&cRk%`m@p_fod?XN7L)E z^!l;%N7u2)2Q9WEpC;3PQ6LzPBz_<>q2sdj*_n|KWehrDSK8bY3J&N0ucgONhC0Jp z>E>|K_Q10zJ?ZIn>ANL&_GPx^;nm~e`GMTooKJ_d!oiIBVCb_XvcqP)E7X0krQJ|k z91T#aYe%@C@kZW)-{AXJ77Z&%6V{ z{=PteAoGyf`*L?A>sc6{+>`HXL-mpTKNOA_%-_kh+LO5+iEN!pyRnSPPHfNpiF_Vw z-k82SGvBVjKbD?b({6k2Y)h|wfni(jEY4r_(R)Y6I~bhN00%>ny@B?0*0w*rwzb$L zPpsrb@W^%?2{uR5>Tu|~E!Sv#d`q@=wYWZzzsDje$Fn|Mz}^*qo(pBh(~8~d&)<>U ze<+f)IZ%&>;-mRQe{Rg*QyFnjJMR8mJ)Is8wfwcM<%;E@{hm;AJW_g~xh?Rl4|cyZ zaI%oY=`oQ}ZV$h03J!KeZD_P!{yN9FG$D3aaKOPA`-=F#S zXY~8izO3=KU}keTd%VT%@!T5bN_H4+mtokm$i>%lzqVs zUBf@XQ6OHM*GTX{Q{$LrX?r}gvP`$OGo4Ah^})`*%!D6s$cDUI?ZNaJ%`AIze_cLr zPH!6MTzhYSaC<6a?+p}ut-XPXHQJg!JJWVB)Lxy@2Xk*l;CU$j+2hHKvOVK3$owmF zZ!}okmHF4@&QNBVZ1rbfdOX%*t#sW)=zJoxvv$L|K9V`;0^Z=3^dHK|+cMwE+?mLW z^`6KqTXS`!g>6f|t;p)=-=(>SqsaQcKzKNGa`zTa`Y?gT0A- z+L&vkWLy4kPyY{RW|;cESjN4XZ*%7P`Cw#K*0nY>@d^i&zM z(aFQ71NFSp-qNka21GGqHXT1eOV~}Y0Pl^J%2yIJGD@W`61hi56Kcrf4A2kKM#%O0=D)jjz}Le2yi>#~-I0`X|pD`q?t z&V5Jbn#|aTLWkaP)$z!e$ga1A`MW~*-J!;@=KBNp+cMVvz$3;Q3)hYX;v-q}nS5Ro z+-=UR_vh37(V%NH((-(MF!VYb$VWr3eJ!uxzdPEsJQ7Yjmp(t2wl`kgRL{^ zc`ST>HrSb)84d*YM*`<)^S_7gWOs2^u{>ie%E~u|s$=;+89(KN`L-jpqj3i^+s5FL z&$uRQ=*_)jv1A(p>0sbH*h2VV+MJ4PZA!}rLoE``54Fa7^ZlXpq=C=n+PZuer}A;Y zem38KBiu3)OizYV!&%FM;BYjOzBe$_Jf{Q82ZLRH|DND|B=8Id;>q;e5ZoTi`;I^) zc3K~L9tmW7Tdv`M-0dZLnQ?fO2iiL$`8<(cw6b`fUSJgtrtj9^ z^hD-7lFtXz?s!^0kbBm4Apb>PhjWicJD!$&Co%1sz`HGYKa+PH$~wtC@asgQG7!fz z)4p8W6)f|8jUcuzJdI|o{@mG{xpwC7K)$VNX~X|`Z^qe{*TKxbAtQ?9hSPd~up^rO z@m!gmE9IAwWj^7_tmiDIy=t=sVJOYxLB?d>3CW z3|G5G5>G~ww4{kiveB<^U|iLz_*=~ydsBjJ}5;UDnIZQPou^F*|5e_-2?nOUDZ z1J6hsjmjLY3m#5qyjNx=JJa(-sIoPq4MfAAY-Lyc!iEgx|2q$iT6OqTq^7nO#*fs@+OClNl!PmoyD|xVMVgv8Z^}V?&6Lfblb~f{@jOAIFRlYA8 z^H}bl3N(8H!|BM-8zSFB;p3yt7h9Zc$>+nlySl|D&umMys0@Yt4E)i#pk?3IW_H;h zF_}1OZK%8|ck$pz-uVeUdhxrwP35HA5xMbm#@vr zDkG4&Bl$-A3}mkNg)(c>dwXUk^&^>+kIX;Nf?ht5Z>*`6_l5h;WGs0vSwC4Xd9*FL zwl*+sYS%&si=WP>?TOqw7rO5WM;s08EQzR@S1Us!PdS;-<7qz}$k(Uu!B`Rzp;2)i zk9}|E`dF?k&P(oRZARipPo($O+})7tMey^-I^{Qs%+oXkA( z66-Vjy0n_i`@zgPlJ^IL_pMopv0&pwS`>uhPw@cxT^%}aZ`aAjZizIZO>w?#50}WA_$(ibBbEkU5e!+81-&pSy*B6!*#`@A77 z@62_X8oVU4+LzWM6TJB=`ONPh3v43Z;mo}|axL#Ci?J@FFGw5MIgn9BD z;nnHCHL%HLA5Kda0;e6%S`H>x@j!ILeSu_MMi((33@mGdyH)xAfz0w)dJngdkLUW` z(Sqc6V~h8rX-fhJGma?vBhf!+a_7Fl!P}!j-``x*d~fD|W3J+#eWBJw?84^U?~m3w z9nWA(Am1K8si*m^eBT`kug-V=@ZnavRs>hFNhI-}@Z8R5WYHKq+84Q8l0IvrQ{_Tg z3|Q#P40i>Z@r?V^8Bx@FD(mDy$lCmLKG9lhf*&>*zwXcLOs-63w28pBB3?cZZA-8P z0@;m)X~S;uTg852ZygQR>j{#n7HZAF$eFN>p1`C{M% zbHU29km0wN@xJ^uyNK5~MzaRd!*FQ%!Nk7XqbFB~cD#x=g%aC>=iTW~lZYihn01^D zWp)LJq=LWn&b;pp7W+a!S!Uh@eKOg6uGMJz%NidHZ?KtL!wW0I8TV#3oU$RF@~Mn} zHhtmkSX$G^_heK#{f&X+NH}?M#(W?=bVuNLB$V#U^)10%Us~}0@Pr+hrCGz__#m5d zb$_^OM|gjGIN)He9L*J0Ttsj-qmSp>f$%>|Y}ZAW;K8haG%(?>!L&dhdm(3n4PMk_ zypFfExa-LbAmiKc5%)zdSEmP0mB+dx-}(Ne^YMJ!5*ol9-E?oX)rvr}IlX!^-&Rk z!T4vq$d6_2eeoeHM(`<>+ST(`N(qhS2p$R!zUJPHX-t3pN-mjt17#!N#$y zU?_h6(tLVr+R=IUr1k#5EA#(wpt(JF9}cbGm(jLlh32gt#S?*BtZ{#4rBB4vyFz)n z<=(X07aGgz9tbQWu{6iC;*Vv-_hhxJL#=xPyPZ*cM<;`c^;zfrx%WWedw*u#7kl(r zM(Yne^U{MibXRC`Abn0Xzn_?PbD$C-tq)v3nmOb&>;@eUL<=LEYoc#opVkkB59~-z zWRyAKrQ5RFiO_#CGY1zhmWM);pTVK_TJ2aA8!jpM&Fjv-W%MC0$v+_+8@50n>8E= ztrvv1w?ry8=KG?oo>w*&{t)L@PM4KFm054eHM^H~Zhs(nuoFCxu~1fSU#4L&u*+ZA zv-?11*_K&(2qgS$`q?cWZ9j|n?JdC9%3PD1vlqHI-)OI)tnA+O-4_~@S^ZdjzL#w2 zv3%MbYRa$6kH9)FmwvuKW8D*IXbK!6g58<%WFUtEp?FwEi4EBpjF-1`EPb{FCiZfD zM&N@ErQMIDuXSt=MdWW~tBq(^W30LQlb5GjAeBAO?McixZod5Cl&-wy|LJtT{2w}< z?|ebi{4duu%^#(};$PT4f*~Jf7a>zUHbmr=Q^DmFKe1dQ^4`l`F>Nzx;g!>%Usu|_~UaK>re_gK9z!> zKS<%no4=IOo80?@+&h`#jmOe=a|%ALOrKj{n*yL*TabC)o_YRiR@2;+em|A+nz#H# zr}HO)AWg!?@zyfk-0vec5l!1&jj|jU92d=G~Tcd?|%2X@3BrZ8nV;|~VDH)pJYf7|K2 zGX1`s@lO6q*@Ic}!eHpvQ?B#xLyhkY^xu|i?+%s@ z1dE>zhSp_ufBc7?&KG{I(|Jewen;B=a8~;Ld4E|T{+@8@AE(WG!&UFfSf9@7e=l?W zSuk`}u&^cbq-p0*GVT@mygOt3tF*l{6u&9$KbMvF=Kfy=#>X=M7ed8}jJqc7Cvtyt z?tMOR{B7oZP2d_$Y1_Bw`lA^K)<2Qgn#?&A_+J`Ke=!(cnF(JInEpEJ4ADE6GO-rX7dH*#%X+FTYm|2*^E9Xu%(`?-AHllPy^9KV}6ZqMA`k@Y;3c6VfqRk?pI zFdq&)M?=j88S`_2;kLm3@6zT=f#F+lwtJcD(tKZ^Hop}Zzbe-z)9(jD{dZ>k-wO;s zo_ohK{;B8jJz-(op-F`iNeU4)mn9_LYu0!+Hu3@Q!kg|+Dis* z_zFMU@aneA_Ofd}-o4CrMo&$wGTWII#dcf$Gm2toYCbMpg&3i~CJcOEnBg1p{+D5l zt@*4j?{~tOuMM+KhOv9|`qePi4~O~Q9o2u+pQo}iO!+Noe>(3s<^76(-|2k)FGe`S zyeHH4V`13U8ROfd5WW;fd>~`o7NPiJ81d`F$bT5-eR-~}2;;ses`4F~_tzsZJsInf z2-re=@K#ZRhg-N15yY4vxMx^ZeI*{=2kMFL^`8x+CAu z^`4b+fQYjD?)>K1D!ef{1<8em+7}JYkN5J#RF(M8S@3XM>${p zZk6g2mvoh;r&#pa4+Nbj{8ich=R1Dl)zehA=c#)o9c`KX&~#<;Y4Q})U;XIExJ#+9 zyzGk0rY)~b^Yh9pKX$ojymr*p7mUWk?2E^(p5V?zwcDc?MFaMyH^$?W8<0<3pKoLF zIv2*Hv#0K^{go~0S-Tx-(kkaP5+8C$>w#{K_cxdp18Fbv98KSBiRU9d2Rb|H>t-%5}Lw(fOuaIUC4i=Qrltct({4ku4p{Y;ey% zR*A*a-=3Aq8o0Zr{Vp#c4=gWj1{vgy`L9yK-klu}ch&klkZLyBS@l}t5%=LpegSPn z1?v(;%LU6!tGD96t7B0`V8r!lyCK)@Te~(dan94!%EE1_C1*;f~?}^-T6p26Gn$MNjfZ z>k|{L3S25$%#TO#njFd$5BFs(++|;-FMI{tKI3H>7dv#QfmXi5FdT~k$qCxv22{i& z_*funaP)>COZmY$0fS>kN8Qx#-4+h=(@ zl~O9{VNpn(LjT@-&Y4BdfK(s!H7CYI7ca&TX_JLX0QyZb(slQXd!dFGrjC8kr#L~ z*SfSvf9qCFgHlFN(@#sXFLF|2nH#6qo}L}V?a@ziv-o#Y>YG#;>`G5Nd!YaJgj~gFz;3b=sh)Iy)IOHim@LAoyNAoouXH z2o~af$tmG>Rdq(+k_;8g)|;{9w(QEGyEaC!%RU?nVpnfvTFTw{KO9*m`7ipYls zkRIO5()6>BiBf1k)SS*Nn=`_Je5RRdKgC{&dJR&tKlxSK&CbSqlk2J-F>=AGFAg+4 zS?$lH?O^5`NCZ1KBfTX(HU}1ZO0E06RJol@Myo%Zr_y5}y=q^%w9<)IGS&rF8WAjZ zCG4-S$Q5`YA$MfVJ<-540Kettw7VzS&)4OuYWKZuzI`B=+sBp%WmQzWkT8DTyIU&P9yZKPq}QCx zxI8b?$WpD(HJRT>!UJTMKA4}GU{lSGs)vU%i;7j6<#5{6KK*d0_1W}X(!#Bda6#bx zOvc8SRPEFwL z?Jmulv{I+HB3iB||8Gi*HJQUclC0?bz(_{yJ{^rdr=Mr%Guh^Os9vr5y#0X!U$co` zRW&-49`-WkBD`vm9OrWcSo;-mxY$Gr~OxFJlY9XX+D*=R!zS6!L%=p08?~eX#l5Z zXcS%(OKDGmFTu9+C{cyS$D_d;a?WK z>;ni?$DuKg6$SCf_RL-$qw@t;M>~SJtOTugS6Z{V_>H$Pn0OG}_+l~<3O#hV-6g0< zg0|)@js)I;Kw=jTr--w_4F=I4%2>l&-|!xA5q#CR^1)O| z6vaV>eyR?5qvab046wk0X;c0d&QX6& zhIrVumtK~O6&(tM?~0T!jL)ySjqZX&F%9m=GrRyY&le!iI|B8}+=l^FH(yy@Pq1CG^L`_(q z@}hXNdRhUgB`NN!g(h*Tu=}E;=m=M6yRv}rs`jJm=G6GB>jynAodtwtenOp>aGpTD zC5>dAv>OcqV?1CKLOJ?;ZLAjDEzWAitN2f3jnnYIS>!iZTULNvikDGJWgX9l9J16b z7_C0h{^yayH$A;NSmM)@-{H);Cf|p$;)=g`YbdoM*YKlWCtA5uv4#`SlWyCP`}mE1 zVtYwiU!X=Ya)cs$OgsDhE|!CBQ}JgUGR10ch-DTN^MdR!(`_OZTFYIu_LrnO?F#0K zJ*}6Q&FA6yiDlRm+M}{4;tkpxr-8~zin4>SgpT~M^6|Bz{nUrE+p4T-V7zx%T6qTv zK5!D!8qw2PCwx0U*I`5CCvwEQ_zZ4TRiog-e5d)XeSP{ZjRz_Qs;i*Ek*q#Slc|D= zM4$|wRY`XsuoVVr5gcdTq?FYw9-tY@GODH<4J|&Hma0*zS_$9rL@W9zd$1$Ah1W>C zXk~t$JWkmX-)(I)XkTC^!JEP*qI$U#Q5x=a4wp{ArKEvIz?V+P;STx5C2ebb&jyGn zNzRJgTOA%)o*rU*X9ZVfWFCHh`lx@~7C+ee*psmbB7Z*OrdUvZBdzN!)TxY4gQ?H0 z$n-?ZIcM@+)v9Vcbt6sC3$mnq1E7c8bf^LtR))bd)1iga>Pq&_FSQntD2Rh z$gSZzT&2HW{A`>i3V_SXc(7_GG8djCdvJ;G{2$1eDoYn;<*x~amgaR&OWC?x+5*1R zX048|CE~LS-yhm63#Yy**ZI9?vZmQ-wKsoh-lc(n%^1r{s=kY~^v8?t3B;;URRXC{ z8qJjrsZm%LsAQekL;4YK(bBRB3v-u_-kP_uD>8{|Grrci zqO=)~r7L9qRPFH_O4`gK3xK2OIsVwj@Pph6X=k13W03<+rK4C>HSs(?x)c>jL)ot3 zw2<8_$*>wLqxoIqFPW2U$kMC{CdAMDMN){0vc1}?xUL!ROG^;)a>#FQJf-3YHkkZZ z49`}8n4MQ!L9W!2J(WR_@d6EOfrDpT_O(uCIvECUD&^E&xF`2RzO1!8IbIDf${Nci z&?Iy%uTovd`rKziiqLORvEI|3r-`Z-T6{msSS1s= z4O!&sPs5ZH6)f~1n^$A90PtRGE}uoasnNh?ZJ@%9@KS5Fj^cf=fQuBC1i5FPdN&gq z6rS|fl1(zp)T+^h^cDWVSJ(hQOygGbg9oLx^WU5-hBXw$iiS9Q3U*M{8VdTU6-+@m zMXBM8s;!mQ$nL;2%#-1gD_32q89-rvd`@O%kj+5D(DJJzeI(FnA-;qxxHy$x%aZ~b zi(T7PGV#F;!4J85i}DA>7B8k;}KHxomwZ0)D3E7~#CdD4+~A~%IDr0%wi zAg)}K5#F2?tVnhaeDo-;=cz9VX6;GAi2XHZ1}oc&UUcfZZ40|p6Is1Tgij;0#E$LGEF0oI8AXm@ zG%>>WSsJR`K%%@+R1ZR zb_Tz!+{*Ch*34>eifqYe;$=FE#h+-`r4E{YQNesmuyrn@EzCW$lS6qCT#e(!|EKfG zKFN`GHqoFg!_H)n@c^Ao^6bxn2`^M^LnDaZODbWutQtL5)_ZOsJ{A1(PRSDqA-5}A zdY8>{F4lTZrk6OX@{^)QE33#wJSf&+!%^NCqA)T-2Jph1wg*lwo$qAWL<!@nCU4pAPU2`FoW|&F?IoY5i~rR+!RTnUtxz22Cl_v zi>g~glggvW&+zBV!jbuvX;GC3AZ2@KSbHCA5S=&{9P&hIOj%=lI#y3-!^E+?Zw$Yw z7GPKCUz*8zYF>je&mGs@j&7;vd^7=AjsipCVsNg7szOn9odp8Uy5EJQC1`8V&|40!Y(^ySlAG; zZ;qa@qsm9cLo_%okKSc>;g+o`f0@05GgS=u%PQnjRJ@d?6|Ef3j9cOXJe2uxkaGs& z{B4m6`voi@*zvZLC6)VRg{lHV54l(6IRzL1l&}1 zQ;bwn#sCD0vWI{pjkhE?wFH0Hvz!(H?VIVu?i)s#(V zL5vPVcu)oaf8Y(44@H~~mj~i8XibN(6SOT`QBi>%VgF@B@S^-YI7OD_C(tu23N2qg ztqKTy&o-7Gr}s!V`N3iIo?INAOJl*G|MVL9u9&eR0JNO)xd(z_ye6i_E5DKF$T^kXi z)3L0mc05e`59p5CcH-qHV2am3XH>44EY+1o8EY_gO`g7H=ZmECZaljUE}X_9jn=6` z1ijJHzQ()KiX4kmRMt#}f9}d>xsReTYiAdjUV>%XNTz^~=-edvQ`sbThHri8(jUxq zPs-tMxq~_=Cxb;vtl$LBQ3)W;2;l88lo@$$!tqezJipqfGz2<4mFLFHN9l^%1m(iORmF0yR681;OoTh8SZ z-NfF}4};-n7?4r6N_;E|EvXvL9hr5KD)QxZYRBq(RbHSxKVE7@@n#o8@h^;N<;D>y zla(oqqcR&}FO3&PQ@O(KitgP95uAt`(1teM7>Uw)OIR!Xfrg(#Z=6am z&&phqD>S3bwH>iXT3A#k@fFl8tMe|OLHuG2`EIzAkx*x`xIM9Ge0(G8Ko5yn=^GfZ zGJ3E4m>^>@`otU($Ihy>U&|0JEQLv6ahjh>4fC96__f37f)3uqJRv z#}x0oi~C6<9LU?lFmF)}sJMU*E05X}>%{*&lo|y;pc2o_h_x57FH!!UT*rA<@2LU! zvFt-t{mCofC@=XZys|$!r9T*E!ATY=7qi&O8P0dre{=H*4M`WtqLcBey(0D<*QxLp zyRc^^rM(%&GY&z3f9O+IiEoa7owQZ^fQF(7*2q&d3qnr|;aPb~L0|6D^V+F_IF^~o zoHee<6>&E!OQTc-f!-z6=FxLq_7M3|r9;pHFUUpklh6;2?HI8{D9irSBv!6oQ-*VE z_*4a9S#R*rP3ExX>L(^w$6odCI|424t=s|*G~!saqEo`O5D&Tbx6D9#SPPs6TDrjb zRL|gDo&UE74m)h}30BUt@pMHa(66)sX!t5*1Ba{+jXbAQ-cw=;V;`tWPNAi8?9)=mp*xi_3$6ds+FCeE2O*+#Bx^ZbatdH+0}j zcxHzzGV4+iI{m=wASrBGSp^b9R?0_r0-8?XGl*wZ&U&&HZa23T$lFcz_MBzDAo(f( zlzpY&$TE2)wP2D_109* z$o^Pfo^pkvs7=QCY&E}UvpQ?;%Hgz=2U3-}CVk-uT)3<_TOasy9k<{VyuyFx*RqZ* zg_r>cl;4hvaFv`T38W!p-FBy?Crs(<#0T#4x$?(66uJKz9kub5vE+y3GEs$>S2Yp# zs}E!ao)AN`^N$zj{-LY|R`_aGN`^h#*UmLAA^Y@aW&cD42Q#m{%-YBd?XAkBqIsNN z85LO*x`_7!(v8uNxJv9sl6gHix8fL$ zC<#Gfw&}5UCbG!pl8~W{DeClJyui=CFZV&g+aSB1jK=3X7D#9vxS^LyH{%&&k)+}! z62+UcbKtY=mH(c91eZJ~eaNP~lX!yE*eg?2jd~({T!cqGcb6SO;r{gR4>W8yJy1C& zPb^iPT6waiX|X!b=)mVH6=@52z(IU7tLIzF45I*DUl5_ScnfspL9UKYR{tq4;R-n} z4Nprt`=+M*u3WE7P5ERjrV%Pm0=Kxf^e8`<)OgCHI(c3i9)J&;nRK!0_C@)To@r&@ z4fhov;tLvt%z$1*02)>d6zae$o`4T~X5<`A*m_YHU%hrDjl^$|vlbiENY<}~>+%ig zD#E4t(XRe_q7RIybLJbdo1&AkV8n{q2;8C;kzbCVa5~;sX-x~-AtQro)Q2-J4P7=% zU)*NJB?s>G<#SorvessXKlAX|X9lO!kdufL`>Jp8nz;nR|Xl@geA}lw_b|`D#`S z^YBgv%Clluc@6l0l=Ek-tuz0{^FXzCqBrDFHxt+b$!^8$Wp0IwPi zge~C7R=hLE~onHHFem6s$ zUMR6Ec1(x&7Q_bf}lt5$Nz>2J#U8o3sZ+7BPpM6-JYy_%S6-Vu>lTSQ>f9V(% z;q;<4h(c&***+B=wP(jyEGeMla4$)ORUAt$%vIW&KWu%lst@>i#lIC43HZsL%e0D?smju=v)_wBPm_%F1QkNIzZzAsMSU zWiWH!8?Kb~pc&BA^S1UxC%!Aa>=1b>+ap;y8D~rA4*T0Mm9}Ep54F~P zS>`XBxHz!P3BB%0j0$2k+IQyfnZTpgf<94&;ukZjRvAWcz3gZ0Z(CXYh6i53ad6<7 zh6AZ?v`d3x{G&O!TTd#kOsl6-P35ZLna_(FY?wV38Q*R3ZP0&VKG``}C%v$RizY!? zltUYymC;B*Puif$9g#>i#(W3#I+&|6GcswV!_2?~5e1_&j$W3(xMU!>xHA~q)1EWp zZL(;zD!CCg+6$30*%3Oba={zJ`3xf);?q>jOqZ)S!U0uJ&(~g%S@^g(o&ILWMJ$z< ztjbKe7}(?uERB>o6E{EC)Uj*7E^x@;h)C#9+{<6Y2P6`vXlPj*+-)9l3r!{8Tsxb* zfBAJMCiqqS16c_+pQaTXEYAEeKRctzY=}{o=2Pu_;>5CUXuxN$eH6BZT-0xZ(A@l> zC4obhPAi!!=MjA7Emf``pZG-;Q6u|OkpNlsY+8Ehy}^icb<6Wjd@Z6DA=zQ$$<-4K z*5}looVTnDn#q2z&d4l;`g|4cc3|wVlhm=u6)XBcB)}7>ooOKPBgxi*l~$gzqK7g{ zNrI@6FX?0_K9qB_Upku8dLkod%SATiT?WrDb@4=0l2qlCtP;-Uv8WN6pZmNICsD=4 zmB%Bq>=C-LEh0C(K|5=W0<4g+S#i82R+M4oo51)x(yvbQs7k=C$kaZt~TgQlB~J1*Jw-wI)Cpk|B(e3OW1jQN=H`qoOLK4Mb^NM5lha; zQVWFso}gPk9(#>9m*&O)i|2_B&>`?Cf%0%w9}ZS*`lMY4Hka#VIwE20sKuo+P4f&^Q(8+vR7^MAoqGUdbQsffJ_y?5OZ1aJq_Sdkr9` z6Y&s9VO=X;=EK1GiQIKkMr8(kgN-hwcb|%6a2OtQwWv^f6)d~~BUMI$zH!dWuOn{B zoYpNe2M2vwlEBxJkE7MqOxQu->#>MP&D{j6=7{%-~7698_@9RE-}xdv>qqO>a&91$L^Un@$&3sx74z>|pWzWaK@g zm2Q#u-4^f1h&}mhjG}%;3H$<8y7Ed^sYZcsh|csG8u~8Fq*BJ-pNvWAE_MkNbedcp z|I$9FCl=!)zKT)Qvx|JmA)jnxo{x)*?7)!*Sl^YNWMCjYWgU0K>yurOfj}j2dGdhu zpex-;7Z)Fk9(nVdWAkfg159ic8N-n{P+bqI{9^8u9YkYr@!(N`MlFAgT^`AL#J9ME z?V+3T6Pp3*cc%wG32*bAJQH8MAZEgY{GEr=o;1P&9_M??rm;^nE&cLn+Bjh^IzpE{ zi5=+Qs{O9mRi&%!nV#y+{cfI`7f(Vidq_5Tr}E8aGpBtz6f{%4^U{pS7UOkKbiv(h zJ>1e474wrecxTW2#sNPW?%_g2>k18tUp)tZZ?qkJpeZanEdes}S2-4SH}2C!Xj}S% z*WlMq;G#4DZjzbvTpn<(%5_|5PjX%;)faDmNv_dmxV+-qMfuDcl=k+k88n7^FuDz% z-xF_FG{zH_Em#o>7HzE^y*$Ot+NNxE<#Ng&1D|!+v4l-oo#DU*T2G?KQEElhKYRYP zYzmlF)8LsUS-thKCj6TE?XBUK?^##U;S>|uvtD^Yzd;3ud>T18wE`$^-DoJo<3wTY zL(%1a84oYB*|d}$1^EVXDLFzRe$B3MCJiHM7|y%Dl?N>CXtmBx(kQCs$T_E18YG+q`@g2YqnDS z#Gk|A##xi=d~dvEG&QKKB)?as__k!R=tI=Ou|@`aQF2b&^DAGQzBoxtfKI$6Yp_Op zbmC=CKeLY_V&V-|MyBGZin;2ya#&ov?Y%ygEt6d%zjA0ei-v)zs$aFcSy;AeI4qf| ztj2Jz;9k6>8ij>5kI`HugS@h`jWniN{Q4`e3FhQ^EAIq@A`$n=b6HT3Ssnhs z?Yv+7;pugBJo$zhTI~J&!khd#N}^k5}BUi-_h)6vD3`Z25 ztpfb!7i$$qky~B>PlaWw^C{@bSEAQ>;35o=ZHiyX)3Be#pOd|}iqSv{6XwDZ;!@mU zr8pKgXj8IA;?aUcu%X&kore+RH>?t*@QRA=cm}(4M$eR3W1vRs=CyNK=C z5xvCRvJrB0aI0dG-V%@76{VHfH@rs|vFrS*^4V|@YWe2;+TyHnQ)u>RuE}bY7V!MK z#Tf;?K|zzy2YeWwDmpFBxcqQy!F8f%`Cia2%)AGq%~&2;7#=w647U~SIDdOAD?PP4 zYhTjJ*uvb04^Lmf;doCZyf|0(G@3~d>{`5`<@bE?^zG^7rQe%KeI3G(Vjl?dtdx! zULUwUkrn?==EalqtIUTp!h*E)426Y(#P72CRh9W^aV+0(nBT~HPkQ;qaZf4PpBX(J z(_DT*Z&jq>WUejDI5PHVQ4x%+v)mr;$6MQ?p;hy!pz&-i*_Ye$?QlNhdRiAJ@D=55 zop(ImBl9-}P`JH-@ zYqIEiNh^xOwr~E@>Qx6)Tx@)@P*DVJE#jnc$QTVp$7;*gSsOV*F(XZ#!J}1-j`z(- z3f!$x`I}(F5oR(&;fO|}d;E6={2&6e&t*f+UzWhTnXQoz!8W;D_-2{$Rb^00E?Gx9 z8YE?>%jQ}$-AG4`hd;{3fSRn7C9Rl>e?ueCNvxi^3(oQ_3*V&Q%!NVYls=^I$*9?Q ziKV&elA@&crQ_jDTl57Ld>hj~3lH2FS`;PGpnL#hRV4x~qW9)_?R4Mq#9W?r$b-Bw zt5}+LpU!u`p)DG<$3XYUchMwvRQdL|L>Az3d$a=2|EI%~%OV?27A($Hn$a)l_zj|+ z8Bf%*uH6yvRHX90>2gv-kD$cy{5=uxn@ni6BAR1yX8cs{tquQgYJMT(spy@Xdn)do zEA&K{!D!IM>EqOjeO145Lne7ae%((!1I&v<@#H%rV=y)+Yg2va*P+A;huiyTxGnl& zURuz|hjO(vhDsjKc$Dz~resgZ#bdwFK;3v8l98d7!p!)bjecv%=|4`n{B7DwLq zMAt0LJ=W9j1MW{w`7Mtym--h$@(z_&Gflg-Dt=s)JE~9EK|3h*RK)VY_?7&LO~F9%|A)13T@FYpSNp7_2=nSiZG% zu(PDNh6gMkCwp}?SfH87S@~3=Fp>{StC2&lN@B6*RPv-?64Fd!13@^dV@?l0Y@*%Ez$+NIDWMDc0y?e`8qT! zAKnZy$|~K=i;s98)(Eq*jJ&8txyS!NPu5_4GOXx<+Yd$3coKdskAK4pu-~Z`(Kv8s z_lF%*we6fCob%j_%D)AnClGGT93RcvYoAICTlocEJ&IK%P?434AYahE4bd(! z85OqGb>eEVp(=lz2@hym^p>0C8Nm=8M4sm6osVBUEsw^7g^P;4#i%GuAF?z!9}QsJ zs4_LQsjF%caGsb6X3LYt1^i#t6xR*(z&VtcSE z@)qTaUU9Z(M~IJ$)-(*i7gv)*xSWU<<=u$1K}o_vhTBD+*0n3vQl3zR3)}XLkL0^M zI8t;&55f|7#SwDDkK`MR%rfAw(d>eW<#-}h=U~?mZS488=+^4l!SL+lkSAtrjdidh z(V3hmd9(J?YsS+0ed$^8Z0+#ykHI8{n$o*GPZULexZqomS(JuJoN_#4(Wo+tYK=Ur ztfZKZP+LOd!ah!(@-yL%A6=vI>&xRI;jlt4kdM;MtPzjT?f^SYj`5p}I#04<)~b#? z)b^^HY%rp%7*9L?hLgkckQII588DH9nh*VXgCggOM%jVVuH`xNHQ`}HxJCX4FR2(*pfSB4ZHv-aW~Ssa+;Vcn3sEQg$zPvTTsNFE4Q;J>s8 zDHEBy%Zo9B79|^M35tfG(!!dou%HCn>9naQao{o6`Bdf`4&0@uH#v+0Tp3Az)`N20>y%#;`AMpZ7C5J4es2Bgqm9tLH!>Q}W6}%cdAm+E9 zkN02?1;oI1T0KDmL{+`zS)1h6xhVhZe47&to=|SidJc5uk?kTGdr!RWEwM&8Xd)gS z3RIjRZ;e-ALk-!^$i?Bzjx*}{x|7k5B8BqAM3VOPJ&|7Bqj8K$llr8moQ%B;(9pt# zH6zkvFlYq0v7v=hK%RW*gwyD(Fs9 zT7Duu0Sn}AY5EVRc5Ghmqb!d@-p*6&)d$kYY>%F>iw<%we2V_aDakUfTnxQ`NB%l{ zh(pMkoG6~8)zqSn#WSY4z(D5Yzfg)-Wfga3)RoQLcuFKplq3p30lK9sRMb7l$%!D@ z8L~(I#dNeJoy4nVFL*@m4~5h5v!}AxQ>R&Od}MbTfAULf7I8_{HCQd*W@V^^VjFZe%ra+I{g8Kn2sagX7s!YS2k0XlUUxfkQ3`Woral zusq?}ZHtNUsj4a0XnNJ;;;luYnv)g%8Hs;EuV_hswMQ>2Pui`WwK^2&_J&3a zLLoQ@FHG`z;Ti|s+K$e<9Zx)I57g}L$;#0fBjJD>a<-B_mxVi>yYiBLtG-SO;GxPW zki+uy$`7-H#UF+X`*){XiouJ@Dt;_o?R-vU+Wn5t@nFLK9$YO>E9)g`Wl?xUWJz|0 z#0*A%s&ix~=`5LS+~mYFAB&_p^QF$xQw5giUQez(l-JJC<#0w>m0tb9v>fU!;Z&!E z?7%q{sNTlTKw%G6JP&ng@F!ECLeBp~Ee-1FL}X+~o0UN~XXWnB`_klI?`qrQbNj*8 zLr0Tb{KeOj%dAs0vbH2(VeSrRUGU1oqMKw6-dgrkyAow7^` zYs>c1cRVlJrA}YTXQ-4$S3W6Q%NnxIcnt4*)+;HX0qhTo8&Bs&lk)uRll)ZkO}YM1 za1P@#8Zd?{QLawT_XaX@CR%5GWRuumc9HK=R+|n*Tb>i_&@4Pf^2vu2ztM|RzNT56 zT@`V&NLGk{7v@F!t-#21A#Fj|dWIM;TJeBQ+ghA@N*)=eZ%6?9p%qM;$!h3PI|n%RBioBdD{5k$%da9; zd{{aUpM&~WBX@i}mW`hCOgYb=#=W8=e0p2fQqM2Kd*Wm@9b^D+u}FS1Ob z1+Q|6PW+b!SNF9)*-qHT5&UR9So5-9c+zLOhUDt$)P`J;@##&gV?ku9wJkr%DSf|l z&+pPtM#htXSyi6uL^R&9tV}(&S@F$Og)A;YL+4tH4|zOzhFwJ&^mgW`Y%%*#zf>UC zcr-QwUTNSHSqsl@e!FV*^Oe1!XDSBgH-QYMWjRDWr7QeuAq$D;=rIypSzi`h@2U#| zt&FsYX)ylYn}So7PGGD(G}M-hVkOAk-oT|^Z$bXzSN8p(NVA&sqb;q-A>RxaqbqL! z-g$6%oCd<5{5a>{!AQPw3!k!ht#p>(_;yx6CIvUJZ$|^WSb;Bw!z<5)!^x?5L5-R^ zUpyf;#Iq!SQRc%>6OoF)$=#E&6mWr0c-m^(kL50#%2Od*?44Lo#)FkOoH6}I32wG? zdrxeV$lj{x9N7xlTt2qgttP$*2=`4F;yRaXKMC$RnEJPuV2QfI2ygv0$_Q(xN4 z=<;~%aTtv^NAqvZx~xbov3W#TwUf^a2QfJDrHHer!(LP*P`hpTUj4A=VDYC*lWfeD z%HGLEmd4;)$^-Juc|}WuE4e5!HUCvDrkXBVl*Xz&iBovE&vQo*4c?bZJ3-)8#O80Lm@u4=j+mJX0iVv`vo(LV*SSg&5fDQoD~JoY;Xc1 zrjkU zDKDJvE>BvUMQKT=@NVP}Ws2Bo5!lhdI-I}mk_tJI<+-vnKC2vo8W_~Uv#`N48VpCG zsb5e)OR_D`PLkzp3(vHhSc%6V3ug@aZGH4G??+FOHlL0aKpB{4*Vq}}g^UpD@~jJU z_=6T>ac|2#zre{pv3xs|d=|DdBWt`!B=fSOtH29Snn1|Qx`DVTaS$$=;%crh0zI-!0 zBYI$O$vGb%NAM$QBob2j5!CQ3Bo>Mu=JmlXeJeiD1J#a)rntxXV>^msJKu3K2`m2e z%nEh2Y?hqE`_kK!8tH8sb#o|KT*J4AZ)xPahjNBtJ6Mw1m1_N(}}DNJ^8}2GxLKnTCZx&OFr?iClk;n zMKh3!0_=nFGo3QQ!!pQqQb4rqGtMk~x;k8^>P2n>7nCnv`b=(A{zueq)r-RcveK*{ zDyRggc#T#wFK=-;W7GAOORMK3!6S?7nc;Yb6xXhf_3=8`W;~0sWgGY|tft%rkDvcf z!^p>ZqJx@q`UHh&5xLFMbFh!vPSC4ef-hJ`EBoBM6A=2f(Scx^W~*0a&1y%ud=B~% zH^D8M;U9N!k2!IM-vwh8_+{lAgVcH|--0jKgZe(pp6qtjo+Lh_Yb*N$Ui3m?r@zUm9C~G!<|l7O zM_V5VNe6x`%MS+hBxk%u9#UmEoWBPVYl1UrS>9Ulg_Sy0$JWxtqyfgsC5gc8?&&37 zL05RPT5U^8jpkcD=d>gejP;aDuANe0^dxY)eqnUfuIN-M(yY|z4`$M`ZRxG->4c2nhmxLZ|V>3uv+ z@;#3Zto&&sktUHPXxR}sWi8Za$?Bj+`88tR;zYR#7!eoJYv3-O!w2H!qXhmc?8$bQ zhv+nbHSyhx*XfwjlxTofu*mzPt6>X{?Oc!$SjXQyL=iFWuH3zF=L*qK9JS_mf0&r_nAa2+nx4~}4%yr?IX znSssH@|FMLE%2Pph)?)*G=S&{?6{j=G%HM~43VvL;(bnw6LuCSJBNm|Wj=5!ezGE# zUR<&>HeGhMCs*x^!^?_bvGz8~TN;g2^4H)8FO{zbV=#@|<;%_n3c3^>#F4C$GiRP& zvMK#HM&rA8EGOW0{=1;i18& zw)l?L6eHmhIOn^eN!6j_+-=#RqnC`JwflSwW0s`T<>gsu%|Dc9e<0(rYcN1lsjm_f zl7Zqfd_{MaKB3#?ZA+^g2Y1T&@?B^Ux{Mx_Vf71Yu;aBcv(ZSEPvyfOYI&7SpW4OS z9t@TalW{3KGMQ2ED!+}4i4IA0<%aNPc{HR>hS7}9VbCp=HQ~#b_J=w8sPdNOhm%CU zJ`CWl;%d004?H8Q^n|;^8AslSG}#BoGw|XQTw6aKv!uO>$F{aBT9s@eEg_#vC&?14 z`qG{~E_&XV(P?s#!qmFR7arhKqEPV__)EXgk$l?HV{{~W6Y z!;#>I6V&k{6BR4t1<|WkurE)fo)|}}@~1b%lvazXC@jbAT!|`1aFJLumrq=J#hfzm zAQrW)2!43+D>AyZ*YiPfn-#MeAgyduMO9>=o_l0I8lyBXTGVfJ(t^(T@v{7y4_n3x zqLDV{Fg{w96;l=N_vY$6ea&4SQPCEU!2}7S<9Vay>&SYSXIAsO4r2B7m1)C+yu0!; zX&m%7!qI3EeuH!TPAT`N4KD((m%U~qtk5%r)UTD-YbB-k@S=zS^*~29Y0io~D>gY@LTP+(9^VstR~n*lc0b*QisocE-doQWMx@zvQ@bR z9=CkFJc6eZdKQiP*V=_6U240m`g9<-US1U)%R(E4JgiEe-gtD!gYUi392Gf;RY}B@ z6+&TIU_8kZv*qYjQ5Sp78lnLHEtzE{cno%iJXz9u;SWTjZ5Z-YVG*rrVSW7qf%AM~ z32U=DTn5W9!(Nqqsi@|2v83)9)2<@A`7TFFONh^1K|`7ap37I(3Jk0(sYH{S0}q&q zo+IPzvKh*sE!@$}Wx1`3g|lk&f`?AwWB2FXeqmXr(vIk2K72*$**U#@WrgEFL^LK?$#KbpcfCs9OON{u9;+aop4kX%TH-4aF-%k) zlf5%s#GV+<>R{Ix{u>1rXtU~xQ)xEqtq8a{wWx)^U{Wty+Rg=8LvO3X>oPVnpJ-DW zz-aup;t^12TU^Mh*dwFC)j7Z>I-nR!Zl(1X=kdgSqmkXgNic3*R<%2l;dHXBczIPI zC_P)Wg9Ubi{FqHHK$f1)C!eKvK!ZZH>o~>36%kvx6`)If9?thVqsXrDpv7K=+wwF^ z>R83HxwX?l_lY?~>Yl?P7B4^7ex58hPE%FX8+^(f%ZbRY_GcdT*q-W2MsZQ`D*htL zr7_`_7Q&w>rv_Lqh&F^ln1cs<(zu&{!q=1;vpWC5W-r9kGDS62(ZoA5I$l%%MSl5< z=C|trrruZ+TxK3mfZ-{ZCD*Hb7|&|3TR<|vEQ_}?dT`38him&r@K$4?22Ty2psDf2 z*Kq8+ysikB_eFaa=Np^W)t8rL&1I|Tx#A`s-efXD^e+o_AnW4+&d)bH4OSuNqcYj9 zQ)P}wqw}r}Rr73vaGzDH| zgRBN`x<3^a?TBY@f zW@h#U=E)3RJCPaJhF`^*uBoKj+m6Fhh`2mcuB<`%x}^(cF=>AOKTph_xX8yI>FVIY zldR|DZ*eXi23PX%_`oOC1AIvPM|3!i$d0nGWlwA8^kC{m#J8*=8*fEq)EdYNyD7^l zbA&6@$c%(ub21`W$%P6YBj5p?2FHGd=X5(3zGZ_*w9G*15xC%0fLT@y=b|<4(4#22 zAoEvE4=lqFOK8VnZ8*m%|9bY&y2L4-f>(JR`qi&z@)r(dG+q(h;hLJ)H%}oun>*m; zS<|TUHr7mIOzBp#j=uO+e9c!xJrrFTXzY~13?G9vm8ZuGFi7TTRNO$Xpi}wk{vOVK ztAbs=qa2-BgU>?q;C`7g`FS1}obZxCL0@>1ohbTjzPXCpq?hkqI0=0Dmz9$)s; zna7TdiYVTq%CSZ1Au3QkB%kh?8rFv+wj_IscSsN|R@FIjYx1}JtL=%At@*B;XvZOJ z9yrJ{t&1YFlbu_bt2`7rG&oQty*;nRxmt6HEA8#l@8TD`3@VgVEr=K3jrQD5xV0{Os1JrmSWP_PnVFstg|B%Ao;pR-@wrG9X<8R( z>x^?*JsiyzI5moEX)3zNPRFtQMcKWP0=lO+7~++(NVm80*_%5%)5FuP9?f4p-!nDL~YL!_h&p1@wX$Q3YUcOLYVrOzUKds7=9t>1G@CB_lm+ki} zhify#iQvF4Ep1EJ;TrbT&bgK0cG*<*2sBLPfyt96-IJKI-lU1=L_@HvD(2`Kcg6W~ zkSFt5yz059G}-DFPj)!%%F>+UZOtuyCanjt%AsrmOP!_Z-BAg#k*g6>Gx?$m`nB)h)+0dXN2LHUXeDn@}tHiln8i;2u= zz~Zp-Tv?s6ycM<4{P>wPsjIKL0a~M;LCKpa%PVt4`cx;1w0P1apFc_qz@4lZI-3{;F)XHM+}hHwQ|B9E2i(@?B60ml&n|6`mbSLo3o}vIBD3d&B*_eb$$+1?HX> ze!DFoW&g-7n+GfW1Dsdhzwv6fWPK~q;KZG9$QF}WeiV4hY8peF4F^-Y*-BBH6$3Y{ zSr?hd*CZbn@IMYLU0wNXxf3(GN5A9775Th0`p1a@r!QGoy94w{<$lrHeC0`sBw>)f zvujqe<=N4F8O85cu8CJ87bN3dej3l+TJVo*O|x4iyU7z{MMybVjXj#*7bCB#r+393 zY>#%P+vQY?m)NdlEmSAcs=OI11KrY$Qanj}<6g3+NA4*vSKaj@%@Udz_^N! zd-500Ra6b9c#nM*J<%8}k=kXEGFhg5cs3-^sPw&OVYmw~r7L=x9|-S}I1-_^>I$o# z^72?Batp(FkS6e(PAFUO;7L9TPRAv*!LGdja7IF5-hei08fXTzC@qKbWDd1u%{<%X zt!ZhmmCuEbco^(2DWd0m;w==ODxc?f;pt(m;RB6mJaUg3d}PmyTNTH^|^7G&1If{;|>z#r~4^S~41po3m=rpCjYMX~Y7 zCEBuwCA}ai>qv{(=FKz8<2|@0l+VP;i;T-CtX2i**Y3olxS7jn~j{6hafvvI~yxoiL+{+Kwnnr z*B-^x=|!wcmx83k;11I1S) z0sphQ#^O=(LP4r6X`7!`GE`+3X%pEj{LSA-PrAFPYOQu5)mj?=Xuk7`i(~%>Yj+mp zS9+d@y&3a@cTp}}Wjk@ovMswrB_b(Pwj3=MM<$2jY#d+)#9(VQx*NUk*tfybzyO17 z2Kx@oa2axj!=bo{B5gUeWm&dtmn>IeJ5HR7l&exnDwRrAu96Gq`JIOy^MX~$MGDpE zf1m$3-|{ZcyL|8WonoOVk>oWZthbHsXbU?U!F5HnXduULj(kS%l_qaR3l!ud(8_vK zV*l4ieh@yt7O8F2yPi9~82&dVBl)mN1w7kRsY7X)!(VBfZx%Agd@;S_=hdA5OjjoJ zSMkkB@N=2VRQ`6pt19D(JmE|gHly^Zl~n~ue)!VPLWV8IuW$aWT^K53aJK8y{6dn% z-WrKq98I+@imNb1%qcfUa(Fgh4Q_SKvECq#HM78;RswE?Kk`YuL6vY527T^xIheNw{_)`zXGi zSW~^KahjR^J|YThh$yK#L`=*lpBZ?IdVE{{HV+W`@WkOYYl(Z!XtVDJ(-Pjbb=8+L zF)RSPz@}TDBrSBF^{_|#OUctTDzR&r6)r#6zMgtvM+R&WA@|HhCnee~$9`IE@5-{h z5WHOelhuBf2`}ZzRx&nm_l>lny&J($dsCJ&(?j{zI*J_U`OxSa(M2a3JxMm>PDa#` z(;k?D+kC0D@NzA!k*&KqWr7UTQ(n0>-0lB=hjECr;43}j`HN!U z8_VRB4yQeRHzRe*5lVKSL~ClHKsAO==%cvF^Q)e?(zskNGM_jqivVNb4*H1xn*VD> zjeFs4k+&5#yb~UYecft%_)-v%cHp;|TqaS*2~F7qatV<-7CvR~f}k+EnSWTKFIJjt zuux&|+3qJkkiq9Xxb&_8B(QzQ)8|PmpQl7xPcVizLfz+zWw?(=~XH zGIR_c*tc$_Kb{3l!xrHV-9=IxVZJI$HzdN;ohM&i7MNSrC^ zsqV59{RgEQ|M`Y&kyjQ%zr6KkB#Mc(9PF3~axr(@8D;{1%Dd)@P)TOe`Q1D$`A+zZ z*RotR2GZLPd^C};{VtF4N!HA4Rx@(v%{PBU#$mPpwSg+=R!QX9_|Cc>{8gLP{DFSR zKOebgx2bfLQ)-1h@5at7u`F5Qm5U{ry%2ZAfN8$o25`reFu9cM*)NJ5Wk!Ra6e<%mv%_2wG(-V?hb*% zR{kz#rexa=bMb)4?s2~J9!_$OljNvA1DWBE+9AeTo2xv4KG_f4=8?{291kM*POM`^ z?&N>EYA+|Bo9%@4hazYHAR{2%Xyh&{gl4pqdwBcorf7pVL6@E1bUcx!^#X0N8uq?z zPYdh>&Ep%$rl?M3UyQ#tznL=UvU{)1{_K9mv*OxnuB|1j&q{U&rfSYd`SwX#ygRhc zfhsox)F_`H1p} zs9lUkCB6i7GXuNz`xkzTyF3kZd9(8Q)_yLA?r_xpGIdzK>^R{meI*LA8nO20x z9W|^hy_mh6_4%`DkCe;X;293FY@U-HFfKJn&8Fh8@yLmz2AVY2Rv!mP1Eor$3PR0eFo3*KQ z$f?Q})MD}X(ZZbS1DP>-CPVxTRDk8=gapBMPxrd{v3ih>eR5SxW{W~RQvRoZ&6xNG z#kE+wuf6&-B(G0uk)>Q8?(x|y#q6w|yIAG=i=HqCazf77t*)EGcD>4pz+1d#ZE#aQ z%{L^*H8ruto#ub~)*^GDPo=Z!Bcol<^Hw#jhp>r8z~bZd8__s3L`_mrj|FEc5tTo+ z(>yhmCM=?eu|BrgpqVfGzvy0f|Mm1JI4v(u?qqZ9JJS!3NXAC2!80&Uwwxr$L%5@P zi&|0G4`)4xt}LX#d~TeCo@}xfX*qP_A(93)o0sE-$T!)mLE}(`hCw*?!cTME-hRLn zUkrZP-T3*)vJ)G4ylkwxXzOG1xrc6`HtnMY=0?AqCDZo!i;bu4DG*PPJ1ePnZki9j zuc#-uI zY~!}i;?!_DC?5hni3_4B$Ff6Kn|viw3rVfA*9sY_F+a$U8Sr+DSB4XR=^IN$ zPM#aaN@97QtWZB04HT6d-;!%Itoy#u{>ETCyq%R~9}m@e?@oy%D~ow{HG8f}igCbd z`b;~}yAmvaYJ{v64~ti#0*n6BdYL17F`E&y7BCYY(FwdZTS!2Lpt?DmnSWXp4?Dj| z|D;(wNgn<48&2^dco;k|(qr!^U4e1>cS0jsg1gY(7@eMH{h&TMOw^Y#vMxLqYre1K z9^5uYu@L<1-Z1{Tsw4OXyGUu*mw1LENH{}Jov+m^=!pKfA~$LMfppZ)lO9)SoLaHB z6J_!X_o+1BlQ*`^bmKV-?4b&sKFyLUo?P2=A<&9*}vvP%$9$}PVdTR za;!F|tG&(hi*ej1mjJ)SYKystJi*s`d?Gt|bTKP*5Sc}5tsqURAHYsR5bIq=0VT*L zoWC{9Q4HALP6QLgQ-9I5ekO^T-Y&Qr$7YS`iIZMwx+ zxhz)fy|me}c=L{o)~gUp`wqZ618B=Z>9>~na%SVVP(<^9jmNYrw%wX z*7*}l=@9%{5ZGHC8nJZ$Zx6%;g!H55IO zD1I+%uU>ECtyQR2duM&y$e@AvAA;=;YF7qell>OAGIH1sfz<0A4;RD->-llI9!RQLB#x4o-<~eDUz&Zs&%Iq zJdm!+;+-0&ONlFI^MrFdW#Z)-ojuO4=b;+C{@6cl!VH$cE@WBYS~JM;D0i`V?ET@` zR(e3oRVdgU>;1eJQXhd5%lY41hA)r&jlomsTSwtm{U9xosXL*(Qx@>5^|~;!Z*(;~ zv9{AfMJ)+JihH-qSu= zuup@$z`yg!JRJ;Pe|zZRR=$^CQE^6^n=g}-wel?!LhjK*EhI0oD_>n*+K5jei7X6G zlNfuw(ccL42^UBUoS;?L@|%>vXZ-TR_IgGphxbOx#kKrezBmfPOO~A{y+30?Z}gRa7nU9B}U5Pz}h!HB#;DWYNuCUZ>J(4Uv2_-l0X!wpsDK6Uxq|4~WW&d6$hE z%nc9ic<3%Kv=sg2{iz?i7K$zA895vgoVnsD3yV(dhs?%wR&&lon$>}zfpa?XAC4CV z79&aD&7J=7p;#m`N`{RQHd`~B$|ttc`nk5)5_4ijYkSbl99gffNn01A9dJrzk*q0X zLPt>zPmb+C6+H2OV;+pfJ7ynI-rrj6l@7`kc zV*2DgS=Hfnz*_RbYY<<;KL=jog9=Q=bH|WTQqvzqSAMXCa+D56#ey{1>aos?nWFr4i86 zdak^=qO=*}A5mB5blX2cVYlh17toRC#X$YR9p1C;7tFb4~p0|pA9Sv*abNvM)*JC}0 zR_~aPu{I}T%%qFFvEnFB=H25rFCAaC3w=crW@qG$mOSTMeHoof+_phZR3D=S6f58T zFWU_J@Uk8dn@`*D%O{vozZq>dV&O_UtZ*++g|t_)R`%J9 zp#Fb789qKAsaY>NFCPxQWJ~bH^D6hrmbi@v<1E*XO5f-qh9Lz;3)wx#9{3Hzoaiqb zAm0riNFX%f`86hy!RH5W`4LW`EY7O3LOog_?W^)?RpN`ETHFzxB zW%Ocqdc(`Z9r8l+%-NcmwLTbVMUmdtvm+g>)%tM1kv~cs)c|}pXB_vvXCN6bna^(A zVqx*GToo?4m)A0#zSRodi2qBY>%W>a?89U8WU2IF1xT-|GWaN}2iYpIyVk{P>IAe? z{A^}P9mnNZ2UhoHo~(trlHTnUptC9;c{8%~#Q|E#s+>qPK{9x1w}%zt8F`mc4(VAIzu}&0ADT`RRFATw zBoYT}8Jc%;-QS*MQRHTPHUj;L$ZJti8x2`6GU7827O(2N+Nn@$D$X)bz0(ic(zhU3 z7hqk;y*G^Td}~qQH_fJH(Bxt6dNVQ(DS9@t+Q&=DN2zPRH^`H@YMD+#R^FIA6RTsq z(Bw{jTb+Ut;sVIBAzh@|eBCdm$H&s1MBprH70IlJdajv%bkh!+Rb%kQthHDfF`{~- zJf7yGyWY^SlCr zFF3)TxXb!F-pLcl;PAPtH{^kbAY#UM7-Tga4zn*byIzTQi}~)PDU}`=(3aT^RPNvK zg=82Z=|Er9?MWuEt7viZ|#TMX6uA=t=*u zV+67rmlJ71YxNp*sA%AScb998OYxYl$c@`qQ;!_2*)vGcoCWz*w<5P=uOWNS&!ClR zhuL`+jBH#<9+!`7$BHb1>s@IrEog~)3Gb2SAIp`nk#Gt&wSKT85pRh|$f}Y1pPy1& z$p*qgnF!y>MnKla9Yw_ERB0+HG$Ou(Jd=7R_sU50v{P?x)l)rE*%Ek;g79614j++> z7^R(jOPQw%32z5`kTD!fU6YkUJB^K`wNmy9$Yb8m(?-F^6Bpp0m0f#Ed`p65EZF1e z{3WYMUfF2&nzdn%aMH>)dDD~i98xIbRIkC^H1KMqP|gbiiGXVxMAY=pnaOfXs@(Pt zRGuGtYs7+cxTSnS~%K=NE1$s|sY+=dw!;`MMMmg5 zjihgI!JIt>+syA=-hRm{(>%xsE3W2uZ52U&=SEeRHI6*&Xz%{+cnj(c?ZrAIUaPYPa=MFPMZ9?*`hN3%c*+*M{B6adc6vA z$(ukWeT(SG7yAo4MQ~8Xx#==ps#HiS-Lk)6f5zn0t;PJ7DP`G2C{|k=kB|=%iGE@& zBjWe4`TE8~_gL{ENqy@VyiSA z4?f9fh^Nro>SS{ll@;i&%A^@sD240jh8JiOKbjrFS_{pSQ!rY1 z3NIi$U0TT8j^(N4TqCDva^=?lQR#9|x=tB<9H?+p5B2U*WPvnZVgq!J! z-6f+RgopMbm_PeLn)#rpPybv00j;c&(kVD?PXf!%d$WV#UfN_Iy#Inw0}K0nq z>>H}e%~>;T&SCU8y)RoJA%4m!2 zSf#%EP6on1qjcSz_`Kp{Bf>$JjHQC>A}&1Pee!Hy&Dk1ku}GqGsCQ2f*>U}!&N?h? zsWzbxNs@`>ouM@E&ib2i^Axnia}oXX3}7&6f#JL~nK*n#JN~$NLMeIqxxord1ZnCU zSgSE})(G$6GMmNbnYB^WbLIb#16WaeA+q4v^U{w+-+O|oQ)5Y;r(m-)Bq2JUi8%3@ zB{SC3sbYc+w2PH73V9t;W!|((+bE4*e13P|&u{jZcc`uIaD=_aPpes^`ljvIH}I48 z<45JN_O0s)>VglX<=H&zy#wZ6&k#Q$Yj-J3>?A=^CocpImo664@vIc-w8B1lR2Rg{bTXkAx=Zhg?ESGFJ+d7--uo#VPVq zPZ}jf&U_m>uO;`0v{7G=^cI)Uv%bICqH3c^wDKHj_jJ7@`oeb6Hd4R?{rA2g8LDs3 z_c1z>&RX+Y&6vmOIeLy~{xx$*e%AAumn9NqM`^LOEB@d=$|tqu_=i?bIdFfoBzTM7 zkowvspFt3Us2qVk-zmLtivs9%9m2t$m5b%;0y32`38NjeDe>dQax)oOREOR zh&5wK;oU1YBVG1G{$QwGVR_Mq$IgG|OL&5I9EmSsWOyhSVsiLvcpcZg7^ny)i z2dioi2We*0?7-uF^4Qoba=es#d2PldD!Cdo;WP1U>Lnb?t|RzBYkBBKPX6>yuTkUG z)E0_Q$h;LY+#pBI3X%oB$U^Rwn}GL?Q&}@Du&*$yr;KZ-+@nEDxr1EN*ydXMEZ^4WT^D1+(VM|Vm=23s`9XS6(rR-@dU0n9#u6% znrWrc*fHB&R&#-H=yvu>^So#z?V_9Q1B%tAIeWxsss^Iy;8OqN>^znnCN{ zNcM|_pa$IGV_UsucN;O+PlaCigUV>m`|*$5vl>V83}leI5#Oqj*&j-~CKIeS*UGi~ zSz7#OO}?9Ne4;&QAl5Sr@?-2!>G{y#XvsxACDMiuGB`3NaDe9FzN!zH^dvK|&qYim z()T7(JF1P>+BNCgJA6+@dB3om_kxCv1;h-GGTQ6$c%2T=)uC^tFA+<mlw$L#t&h5eoCVc)&0YRq2G0im7-*WQ^6U4Uvy{eaP>USf6PnaM~S7fzIB`Q%zKXrY%-XEORc>R?R1mq(~)?tgu$}4cmJ%q_(f!LXVYzFKeqf|1k0eye`I|VyazPO|i$mlOr8?5}p&#Qw zZQ)Ub>bFEE#QE3nXEu<2hUl zmFTF*xOBOnd*}-pSkM3RMi4=cdN~@v1GQJ$swFQ){`FR@rTEx7p;|`O*uOFKL6=3? zo>E;zPt~oEhDYhgz6>!5oa*~sdBFO{%`=(9bo2@8v$xiePA9X)j?;bn`B=MG^67BC z)ykRJg_(RW#%UXT5HSd!L=6EXxSi)-9Oh;95!Q>r`&K;X{&p47yW?6#N_N#OUmbRK zoXM!v9z&D+=?^#MXT8;CJ=~F#gdno+J#`WitHFi))@vuz^6B6s+pPL|A!D;DWj~qs z>he-#t$BPfTjU_uBo5?{LUkG2*K&yBcmBw!9*YE7!zR?O zED^a!GrG(BIG9snc-wq~>Yn#n+m&)EK8-iEd_B)BWNa+Ld`3T?C)GAQ3?=&>1(*ay zP;bLZZ0I8`lCdEJ)@14<@;aMcgKs!`J>$QVd)0lgypWnqLsOUntEZw5PQSswCqs>v z*7?73ANGnmXKFcghHT!%`ED@qW`3H#hxNR#dLYH >Qkb*Vfcd&tU2)CW)BgXpo zab+{h-b})li)+NbYEo_w(g2B?>$o0ns~ka{o}=C!2FB9Uztqv%^TjuD4dST4_jVZg z@L=Rik-=M;@6GU#eO!nvuH*@Cut6mf_f~kpzmN$)QEO=`V5T!JT1N}4$(bQtWa-HS zyD8q|XOV|SUS=UYpTu7U@jor#W9+qegmr>D4!e0wl<#qgc+W?Q6 zjes<*=}_xpkMJkbn+yj(iB#Rp{PpOqG^9$71K*6k*|YKyk8qO5#h*qIcGZmd$D*Fv zS*?(8b1)zLYy@*@c;{wkiwf|>nmDABV-xXM6VXC3hW5AGl>H^y(ENO`j#O(wg`hp* z{8ekDI7gmQ7_FVip#@raD_0n^Jy^6WA{P6HvHhg+8k|^1tAY~t8ots)zd|_ zgSnH3O*_%xaO6~!ZXBe6w_ZP{b3-HkiQIz7Np4L1g-d9oHP(T5&L`yevsm>7%)%!t zKGtq-GO1Ceo7hc1~nPYM0qZ??R#)`s%v#T?C1i+WraJD4jggo}O#JGxY;I0*}7`*v^KZR`rG#A%gVxJ);Um(;5*ZoI_<$1j@2 zdv>OdR!Ee*1dj5JV2L=zVb!PE4Hg^9md>nGBlP-_st2s&sq63t6m{YDEs}!T zL>Ognwg?sW8EbdxT+K{Lt#MWp%pc|Ww$|KGz{&Ew6;Whug}Yf_nu{iEu2%KvlruU? zce--b+!w4SOJ+$k$tY`WJ}B5&mc-+~e17RmyPCE6V(bQ;NBOzv6dwV_{ZE>CAtDHT zAbrrW@eYen-C<=bHANCyd_Jvh&5scs&TMaGHV~+~sRfeyc97gI2$D7xf2JNdN;?;h zEqg6^`OTm`4aSMNPy+tgJE#iC8CCF*oQVTom?aH;sH9zy1~T_%Sp#%HSH;o^O5mVEg>7sH#Cbw@fWs) zODn;~mHdq+*Fy*IVftErtgU%J!A$OvJ&_5(H=|N{`EGERv}>7VFKX~Yc@*v!m6Jw3 z3_O*8dneT5cf()^O(ytk8+@jZk8&UW$k3?x;77G_)jREb_mUCh~dJ-V5rq`sE7NRJ-Y84pf7&pD6fgZqMc11o2Za{N0 zClxZI%}mBiNXQv_k0ieVLpuP^ucK8o5MKWxj_~4uy=+v;gi&^TB(Hu zPTxTnh=n3NOI&mgI*(4hg#8+0`Oa!Q&azow&y$bSChe$>UkZm|y}A)nC{t&xr&V#R zo%y-O=IBd?q&fL|bRxXJp$H)-s@5RQic(2tmnwwl5-=V3AEL_)%hQu z#yh7}ux`*;CIou3b?iPbn}?0Fb_Y#mc3KlJ+kIlyubN$ZBl@J{vdH_>#>4#8RXv)Z zHzTJ#stcMcx$2}x*2wVL_zPmR)gZBOwfr7~$nU>28!X=Y6eaL_1W4^Irz z@NPY4?xf1N{p~4zp@;dmWxgfo_mrVA>1&*TMzD;}M7mi@J>V9ZHCLQ>Hu%kfOZW^)(1%uvQjL%&p#so* zJ>)v%twdV<+O;98)@;&LdOkJsojkz;;T@0i@W{W*H7jNG{b1&0ZBs<*4azE8@wF8~ zjZ1l1jnQxw)|kK3bLsy~{uedU1(EA|;xgI1R*KSX{*ipYvrNv$qj)b*pU({N6-Bq@ z$GJ(OBO`!ydNQt4kxVod5u>0Lh6kaKObTkyv#t$a3(DO~FSM4YAn%0J;z71bl%$gI zX0E~lXO+<9CmA6$$6Xc`il{X(AKr<0dsFT-irM_8Su#;_aALD!9m?YoWPt>{U6RfY zp%%|d?E&8g?vpz!J@pu7lXIeHR+Goli>y+$ABEovmrsPMb`O)M3&TpWGpu+$k8}5W zxQS~I@)XU14(K3%KsL;uw4$pq**kMI{p=j%dT-jVmwzKxMJ+t;^F>9_57UPVAS+b% zS6s~-#ON0ZSc#v=bDYUH2lCl$WFDR4PnPL0^q?dB1{9Wi<;|mS*8<&lD?gWW`WY(l z#YALIHWtgZ#sK1wG}TaAVnf(m5+(j*-_4vnv@bmEx&5bdju{VGmc}|iB$bKg-N-NE zt&XvUHiChn4a8iM+3VDKF@DAF(olj0~m5s5xMdk$e;iR6S&qA)C zG-`+_%u>||56L||{)Jqtly2xZoAbuN7xzGL{u7j>QL3v|9(zN`TK=zei5_Ve?_Jdh ziF=U$%@Ah8I~g*c>6-Z5?C=vBu$;v_nn|xoF@J>xt{2Q&(pSjg>^s;CJxM$Lhp4I$ zctQSeE=OQ8&&qFPaJg+PbKXk8X@4P88)?-UeTZ zCYd$lHEvgFsP^iC)kD{^JcE1i3`Wvm{-gQ|b1z@4Me2t()Pt+6;U>PIzwv1UMK`1# z_l+5fm?dt~&Sp@`cTe(====s|^3L_3t#a0!T!Ze$4asPYG1oS`(p(ZNiu>f;Dj>gB zUns8!Sshf?G89B@h-7S9?w!5bNXm1@?R&_fRrMlfPxI^f>CuRc6L>5_Vd87o=!xf4(DeB*(K_l6}xGy2Dd5}O+yLtGj`meE_ydTT=UP1|uy3#5Q$lUC*?Fq z-sSwZe)=R=uBKy74gPd!`f zeW`MC5QQJ8J$@|S@^j>9_eZkKrYJz?p(0fB4CzR=k8WwW&N zd}b!H9eYgw#=IR}P(;Z1AFoaBn87&&ga4t385qi7Tf6j|W6?7nu3vk(y)&~xaqo~LD3$pvbl zpi@Er^&m0q0kk3W?37uvKH5VKcx)`5;QP?_IdOC?3PoXQVuokkMPjt%_vL|;( z6GSmQ4Y?|tWfi&;S;~J^34yDaj|dIc<{JdYSc1#_~)%-BH7)Ee9>Bc$oX8sU-pr_ z;x}5j+j|x84)?4m$?>6kR{~T@Ear)3L&!M)K&@3V$vWV{%zrFb#Y!?Z@Cyw#JQZ;t z3%)1M(97bSF{rYl$#^avDVJok$b&qkOp2b!W^vovruqpQY8T(}V9l+3E+UeZ^-z*z z;15jl2}enTd?2~z1JG*Hc|OuM8EVhv>}%(}%X*1y*k0$7kx~+%Mya(&wHVMAQj1~O zH?nMX-_CLNLT-icf-k(G%D=n`I?NBhaZa_C*m--Na8elDpq)H{<^}9^yqEj#CD(I3 zW3~=rhBVa~p}aJ4DEVnNLd4n~1lE)0^K2`pWd897O`jx-ax0$YcalN5JY)jtVo?$X8Eb7bj47{iMplzv ztf%*859BsjUr5>Lh}?$Br6KZV-LgzijU05+zGZ^QUQ$-P7c(vd!aQt;Iw|QI^>#MB%uK z6V{rqQj7ZGFRMY&)6bQc2@&UX3_bPQ|Wob zKdg1GKW;`e6Q9W-#Ix?CuAz zw7Q+YhT86yQ!qnOBY#~F-3bVTS{-(G;0}(_a7YcK#CJwdH@iAmJFh($gc@Yl7_HFy zZ53R*@_zes&HEfwfXcMWkFh@!xyyX~)1zz~kF~f4$yxhaNS147#(gkohBEc7NO;$X zTH~Z16q?!5LmprgF9YtG3B=-S&@BE#*BtDKmLXIj*!&?GAVtM{_ZPda#hMrY@e7vJ zhbN=vzHTAhx(`7!$2zv@k@rn0U#Dz_=kY)|Z zv`CvsAiNcO)u}jPj{M;(Z71p4sOMfx5o1&r*+?;~ zIu2+pzf`Y>_v9X^r%zG?Ye}3j@^RJIipb22$HIDvq^#?~rJkm46$YxSHbUFE{H)d>zoGBiRrE1a#nGgH@gr49T- zWj@b=;F{_Q*USzdWow~GD{9K=QjjlqJ^#)m+Ocjx$E+F2-!#skRWuNi)fe&?oYKOk z{BP~KsH2wGT2RlivbIH{?qse`Osd|P9h5O*F(dl~kN9cz9S%lkVZvm*QF#m1Hg;ic zU?gc#$IKJBoBL#K_WpBG-+EH+*6gd4P*u8IWV54mBnc#tZtVKJIJ0SB_Hlj#Wv#|H#gizL#b2Q0n# z1MNu~OeIHJ;o<4?a%7RVt!B2od+Ug|GxGUdvuXrE&P3+Cq0pW=PtZAWGSudMxO#P< z8cDQoLl%r|oXayP%LkXEfRGRt57lb2g58D5H-@}u#n!?U+Qyerk0yWQ?RU6L6X_C~ z!+Gx^QMWgdD=K`nBI+S4sM*R(>zCuXTECu$yb&d7r&NTu_1E+Om5WRpKAUO~{o0F5 zuSh%!7!m9zbuS7Ez&9{z`ysP8#i&fjAV zI?6_&<_D3XWW5{c!Gf;jA0O!H$O}56iIKN@kX1u1dO?nH+Wh4zjjWM#v53T|-aeEu zk$tkkH$Y>W0dergnGjl_F{n@4pA3J=fzcv8+|Ov)i`pYqCS8qyw6wvKyeAsYTC;2y zq6bAs*Kp!c-dad%c)R4vb21OC2XsRdIL`M&8GjossYfkzko!@yy`DQ+7yg`keMj~; z{6?7)wz*f#`b6HsV6?1-A90d;26NDs47~g)uDibxFnu8(df-d44n}D=u@U-&L+B~G zBu!c#8{`L#y|YU`lEo*pM(Dn?v0PfmXEDAL5xV37A zu-w;vb+>qKCgg^PRSH3QR#psxK5&er$qVuuo1uZMq^ugM4Rm6S*&ueW2um_OXC%gE zZs^jL{0%FEOZuujh&AOn{WIkCJ!zZ+LB8E?L z^gObjeMDbzjm+`&Skjxh6HRbizD?gahcYCB2faT!1NY3DwxBu5mG{AY+-}Yoc8R>8 zuAMNb4zs)?h&-y~>)mJ5CTigw*?280dz1NWuN-c>+bTo#+#QhWZfr*7z!;2%9e2O~ z#cldsNo%Z>T&U;Su4)eKS1&r9Z=7LSR4`Yc*bl4!bkC@b3jKJxWO-(g-qwV$hAL$A zXq*s&@8aYz@@!skQqEpXWYnrFSyp}zv@ixJqX!i_Fx~%+d3u_Z|55jB5F6#3yM-5( z2lOYeqCc`>PZMp1`m~oGv2Hl^(f{P9KK#)SjXd=yH%-p(A07R~FZ>rDA3ZX;x-`1H zG_$zxqkrb7KJ@25d}ZswUw!JQe(Z*|@v-^Ik8a76|H0i4J@x!^OM7PKCl@F8eR^VQ z@}F1YL7e4lx{cDpG|IeRW z{>qQ{xmlkV{;w^+_SA3w|Iu;>`xi92_(wnYD^LCD3~FuP=-#FIiP5FyeWRnF{P-`9 z_NKpNMyEdX)PMSuZuv2)y#MPT`EwsR`Js>G-%ow`sXzOZ?q1nHwzzNR$mGQ5PX4+| z%;vpFoOc?$A36DQB*6KAJ;7orc9QpaAt%(^4=1ms5{zy# zU(Z|6*uzKpp(QZkZtl1{@|Pp=Ytg4$Bfpe>to)0I)LQ>Hk z&6GLh-CWLWttOH`=m%|R6dhn08(-28SO_U$DA^Vns*a;w-c41T;{-n6$~lp2z8l?! znfhR5?bVb&g$&x4|0`0ls(cu@#}AQFX&lxQ8X$e6GO=3oevJmY+D&_{-}#G|(5x>n zRxYPmQPuV|c~9O_`stjA^uwIhaXAs?-u(6T&~-L+S8Xpty%x^S2JgIe^eg%2w1k=D zwzsC`**rC#ofB}=p3^*b=Q8Sr^!iFh z^iH_>C_gV`wDvOdxYXV(XQkkuqz!TBw=$B8k-@#0{oNq8(;9ZC^@&jAR-W*Viwk+P z3F%T9vLkc2lql>;X7oGZ^uHPT>$!eEPskDC+~v&c+qv`h$j|4VyW!EhdE#)M_@`;{ zAEnJtr^PdA!}`+mp}9=_yLtLZbnju>dXVwG96E?Hujltv{@Rz!%2GylX5>%jNuB^r z`ir59+>W^1sS?xS(v^{)$z4BQd|I9#`9gXh&-kWt=f917e=Ti% zFWj@+jBk(X@;Gm0WbdV}!pY39ga@aFd%S(rx!QA~>P$X;HS@LRL}z3sn>BhqGd+^g zKFR!lCq4dr#(FgE*gLI$(0Iu?Z&x19xv=~4>BaEwK&W>&{o7Bym^%*UJ1Yb*pm{4^ z$auJ`N=BWsDv+tnlK+1+cOMPqZicpMZhtR#S=kzo2JFZv-%GD*_a0?jyVLu{;HLbc zGXq}A&s1joO#V6)eNsPgG&FlB?K~Q0w^`N_xK>efa&f!J=;pZ|J9% zyI22O06-yUrdcOPoJ&Z zoOWo`%E(CWmSxrhYx-ZjBG&*mOHy_{Lozeo9W zG1@hm&#G>`S!pJ^F+1`yp}>$dGrg>z?IMgzi>$1wez4B((9ve zxBT@+T75Q| zJ-gDKqx@c;g89=K<8Q~K_-xwbkA5}3@8z4fqaRKmni=`qX?atk^o7vE-b3drsG+-; z{^_2#U4JoG-_Iww`)WpUH{)E+6&7MVv$D(KO2(?jZ!AAD^fyA882x^^-#vGj|7?TXlZ-t_J z)83izUfw~BYR|3+K+cwCMG5*_uCeAOP%gm32H!G0^k?V`m(W@DcQ*9oGUiUMW^&rIG3?Fxgzq`_cQ@hV) z+&??WmN$<=2vWHoc@aIcEHWc1)=}@v`Ms7AObyb?I}=Ys!CSe<{$Kl^}%q-W}+r#lFAi*vQB7=~Q@jCDf3EITwFz zPo8@>G=luM((2v({;$H9-^yI}Mbkc?`<@Q3F2ssl3WwCx{xHw{QSkqTNUv4AxAOaD zaDQ(`^B_;%i)0>%W<$@VaN5Z;PPRFnu|3SQ@?x`j>ZgO6XL8@)j&(Z~-XG4mWuY#I zqfe(58IMcR37KQ{AhV$fw1Jb_R4u8-i=4igS?|nTPvw3w!dG(dDOz${Fp0o5A*k%`NV^B?iue>9{)*xKNDKK9{Qio z%Yf@ur|Aq9nlwSB^>b!R3 z+UIkRcX``i_K?Ux{X03ptci^YQt{aPI2)Sjg3k{7O)#S$#Gi1-}{!|97F=tvr7> zRCG@3)6p{8>pWw%-*2YnUyr3^-MlH~EBX6We8i=U>1v{~?`GciQ+xM>jLTwZV#frz zujH~|!9ZOZfC&F?$82ai;%IsZTgnWV9@+jJ|z2PB=7Mr}a4xcRY8`=j!Po=(hB6KY!oN->jB3etySv=yV~L+^Wj; zcrnWv*Rf=Hys7(qJb-JVr`im=t1qQr`&7L7fTZz>Z{^x+@xx>!`CB{l_jo={=Kg!R z`um}fH67;=yqVEVq@}&#H5Bs3^si)ecmRivMZ13^&uz-{w<3Y+sb(`mnHp>3H`3zu zjCvunkQ>zEfwbpIW1ma^R>AiTXuK8;QO#jz{?no9gGk8T*!`~$R@yo{AM%lP%*jxXq}n=fV7&!!(EkbitSy^3Ndqb&>RnFo6gRU;6g@c(Vzs4yzY@PVU z+U6P?+Jfl~8`QPQ){_o@7OlAEY9a z^llGl_&6Kyqgc%R79*opxF$0KIJ1x^j6jZg-!KZY`}28zF|si}NG(Khp3qvPa?5}l zqF46iWb{ML;AHw_twq|$GfP(5Dwq{g$OxOey2YMY$?EEBcjU;8$oYtF+%H;?WxkQW zAv}HPIy&#c->QcA5>^$|u(XQCdhJ~PMo%`wx;ig~KWzohnhC6?&sN&H9$-BJhRd$H zekPyJ=BMkqdNeMhv-SXeHmYnm%J#e&IiLU zS%*WRGT+P&F6+)tdT~<2+2EZwCO*vfd!rY$%=^vwe&#imk-&EqNHUAwAlQ|37;FEL zigmuF)kbSmu>ExAwUV~1Gs)b_G8_#Z_^|w8h$xpQ*J?$lRZ^Z{p^Xv$&6`B34U(1E zm%dQdDvP;TUtP*GGU_Y@AJ)39)uNgFWd}NMw6zbac2$iY4KAQiS9$!6&+3V9hKl?Z z^Mm%sqrvLtn$2is5vp2)wKImN%o9W_+Cb0QZ7s|2pB(u!p|0v6*}6YS?;nH$cQZCC z>@Q_>TXO#&4%~FsIO*J_XekFIZUT>w9PeliKxaShtiD<3C|90^1 z6B*41=}8?=_rty)p7ZGbW^~c|+hS;TCB4cuT};n7emNPwtC_>5jKLZ6FN8iP)7JNM z%}z~ugOBE}$|+tt9mF|2A?`8|sNdnd@dH7z-@^lH#wq-$5ul~{#e4g$+;-p-TOg5;iF zPaWV}X+xHBUq(f;e>>Lx`OwPB`k7!Vi~J-b-xXiu#XRGU=1z${5L)ieILX7IL>j0q zw>}$6iIlwQTcl-;!*$umoAG?*;pFDMx#!r(U&)x?%d_V)S1Ysk^Xy`5>aC3Lsn8iV z|3N;BY`rs)5Am&V?8U)KUr%qM$=i{bx6<0rg*FGH>3=Dd-IMXFW!)9*he?O>+^%S( z9b%6&e<$|Ngi11iUyL0<;mw23BzOELGyi*e-uWa4<5|9vF&U-m-_3~^AoaC;zd3lb z7S8NR3;!Y`Ih;HHaYjNab`CAG=(|F3C+1!X#SUcEu^#6{=<<8{{Kee29_l}rKEId0 zSwd@Vto<92!E+gdNcU)Dfu|{gV!@&6foKQcX)>H1%biY_li9Yjwb_*JS9+2W*psxJ zxwBf=lci!MC(<_yA{ylTosC3z2cKM<_kf9Sv;Q=zk(g4sN=nE#!5b1YZ9t79RU zusM<;*5&bgv$O29_jEc#S8rmD7xUdjKFf)#;FoV-&i|vi=A6dq^mQdq?9G$nE0V{0 z!)O-A$u&;!ogR9b%6*e*aW4N)=eL~7M84mh>nlM(Eg#CyOy;wcKG*WexjD1>SsU7N zM$~vl=035eRayDZ`Lrod$~P0$Pvsbqlv zMMl0i-yF#m)N)$T^e~^rw7PX@$K89<a!6(*c9VU<1C-O6lSHTUEIDz4 zqM1z&G@Q$*CiCr7?lNOul2zErT-ly`CqpMZ_iniPe745DKX;N#92?8CDp`AarI&LMQ0hLpRcCLC-xu=Rs)adQ zT`Z^R)ci0bx-^zCkLEA`Pv)Ps%lXV=C7&UvifTNSdp(@HCWbL>&G+MZ)(F(4%%@Kh zP6n)OtPJBpQ95BBj>6*>GViH=uD>I@&&ZTqW0(xr$kDPjjMzWjqcOg2tH&U=W-%RAm zSAy6R18T3vLzoY(p35D(@*7{b<_f8rjx23Y3#-X*puYNn-Fb=zuBLaRw#K_HW7v_a zd(yf*FRYtQ`<2|?`NT_vQ7`8De4arIanV|AzrJXKItZs_@-Ea=^IM%g+w3h|SkBns zz*10)lpF0*KDmOT@C-tZ=ig{7Pc?Qvz4U!7H***5*_+lOl~dPe@|#w}go(^#AwH-IA#JcqKS$wRb7s!*y@J@y{EJ{o6P6 zF`K#1r9YY|C$*CHH)LLP4YP~{hVrywm298CoiVaE_w(-eq+b+yCgT^87KP|G%w`wJ zOh>RiG;m@gWH4Io(k#7LtD$4nMYzKfq0)G+!UWRPeC)$uB;xH9@>4!{{tf_owwkjm(!g4eSy1-fTzY@6<1)&QRr`yx{A=k~ALD1#! z2tsw9aDd0lvJ@X7DhY*5`ebR*wsuC=jTHKBJU_VZ4!Y{xG|~;dr}B@k;W|7LSr)zc zGpZBzMrTwCK8OcsmUQkjgQfmVqAKHG$hYW*r)(2+hIm_s*2nWnA2PhFk?yGDlfS?$e^-}{bd<8=l9;A8NF49rGE&s zDP!|C@wI&3nf7*NRQ`u5{3ffvTk|}0oF4k3pF0OivIRzAZB6chdC`?e>Aj3V8-+S`++5EMbo_1!=?D_Hhh5>TS^n(@W9gZgM_(u95fzRf* z(_*Y!?@I3|vX<*>85>IS{#Yqmvt^*??zCesowY-BA?KuME?5oCctX3=2Rn=oq={S` zvo!^nCm%q!Rx^KpnP6h)m8Mam~62EW{&)J^s`5S*^kJa3gNE+N(kOlK5q4GjjDdF1&*75x6 zKThVUL*a$hN#pSzd6i@^#3y(T<*j+|%Wv_bdOG^bJ6O$CcDWeK5k2B#uOvcT66uTo0{0$O6o=-B7c4j*(gvU<; z;4&JK5LO%|x97LDG#>Phw87sngV6yKcz9^CE7y&q;}i8+wS;|;ixpMBCz`Bx45_tV zl)L8Gz^VUSSM<&D+d^$2Tmr+4qQX#tlxwhvURt{F<8XF4v zX^DMGG^_qCv|;JzlkZs`XyloN%+HGOwqXB4Muo$8JD%~;33`dg=Fc;MYUCUj_hz)b z$M(uU@jOd}u6Aj&0WxjkZF#$Z;o$~yLzp&#;Q#4Ou0c}{O+W_{o|$(P;6BO}|HIgl{XAf$nrFXwKS z7t+@ks2^6ZpAY3tonOdz>?suIxh;i%bQM1M2F39~-%A-APK~7p7L7%RrrsP(v;6Hj zvQLxc#z?#vfp5&SqUo#oYb^cqI7qZHi*xUCaku83m=#dL<-qBbLPp(=|Lp&Wsc%= zz6>h=YUV1=Cm-z5E4d~@gllt=BzJGlCmBw0SfezOPNHS(*uIxz3s&=)r%^4K9Z(E9 zz#@G?M6|5;zz;V|mWR|p6Zxu5xz9-l+TqiX999ACoEJ$ijFZUpw&H{*GlOrwJlHUorvXv6!W*`gV>q2vk{ zX;JR4{s%te6nsJ#v%<;pQDs$8jbx#LZ}e&u^~P|F*1-U>Y)<~o#%|Dp=hC+tANfie zU#U75E*dW@D)Z=n8jhZ#=GBauu8_FKKu*R(0Z3$xQk97l$gHQ0<*xc5=;{tFi1tX+ z=hHHDfSlSKP20w2rexabLcWK^bVVD*1=kMeXC`-iD%THZEIgC?(8gL{0Iz1#uF9Hv zsP!>b;GiFxY|FReFZA1%URFaF7=)s&=iIOob`X&#@<(5wD1V0^!Wubeg+B6<>El>N z1;K6w0cgy0s3OjLIqkfWUUua!J~H~Q<=2#L47n|imwnGdb29#FNX!S$c~Y+F#cjg z3auIZtp7_fr=ty_xGQ-d2 z(^{e{m`_jZ;j%^M2hC}$tk0&Q2N*RQ4vVtrHYBR&I~_lwa%JXxR$Bj3DB_(os6CoC zwS`yh$*zO>=m})w6U#;mK@GR8tj-Ox7AGcwD!`P?LO+Lzr*%7vdF1?f%>>`9!ujOj9W_&-|tXCRk^v+n~Gs_^cnQS3DL9&d4^x~q? z>RoJ52{C&9G%BiCkcnca%mW@<`Dh#|ug@CS4;2fsDG)$RByuf3^`=#FLOzb>)8~h| z*XLdxv|9X4|IGlOVHWD@+b+`kX=6|7KA;+nl^cPZWR$#E3lj<3f#kX$78N?zCqPqm zEX}$YI~kNU+@Cf+ohOW!)z%(4(1(~&d}bC_mulgR+u}F7nYNK_s@2Y#JKUta_}d_i}z}tH*|xWl7bDS8L_<)SbWx zZ9`95t6IujDi8Gs8@JLVJ}Iu&I=ibV*lZuKQKTrl0`W^{agn&+Uq!^uNp&LJ!$Daz zGskiK;Ag=UQbb0IbZia(49e7ES*O08zxigKVypOV^;lp8xng;(K+?LlUNo%Kl2#{5 zS$S$asA5u^v<80DqDCXog2%$I5fj2>EBpAvw#ZWzb5I7Fs9)rZ$=ztL@c~-KggPb>OVXZ+BJ( zp9z)6qaU(@ERj79Ea|R{)AO>HD6U@~q#23n7E+xgX0W=7Yob3bkOq?KtfSTVBx*hQ zU^GTFhm%F&g;+bh_(VL$`UbKGDj;|h{7UkO@h-}g`J}qy`4dBtb()+mfH6 zrMcK6st4bYw5~PFyNLy~qefFkft7#(8@`)2VAC-ig#32#(p=-jPrjBmr}7&Q;g`G# z{?f6Tj8AK97Eb|9w&jY-SQd&VG-nI5AeMh5y#9Ez7BGpf&;eXzwb7-PhV`n&r4Rf% z_lgBb|E{#eKJXD5+v2H=2buO=<3?7oF-?4zl#kQ%$$f(1q}H5B2x{LOMh!v7BE7J^_K5`ZUd&ZIP6BBfOW{fU zAW20m76#S%=cJU6Qf#lKawkdPWx-k_K)o09yDO^jhV+WOSWj{!Hj^RV@UN^BHV4Hj zH`dVBj`V8>>GSziq~p6lZKHcR-)zra^xG&Z+q^8_s&G+NBFBunuoFfYD-JjBBXjJt zJa47VOn6S@ioKKn70EQO$)lG=7*E^irDa;ZB|p|PNrV+hb*JcTY<}Ys+e(9tmNf7% z$iCT#2aTF9;cnOQ0LIWY)IdMT{)Isrt2d}Y(qM=G-DOnu1w;y+J6>3YgcoM)DKpmR zTu_odc_#ZU>|i%8RtLweE8@EmkX_k&T4fapN{O#gm{iyUX_h_T%$Y3ba$Q>@TGDBs z4lQ@Tte5k3B4}e_L7Fvg{uZTJ7ydm=wp-RtbeaV{#31BZ-jaNd4l?6T7IJs` zB@tgpE4+O2#53m0uqg*}rFqo8Rad0W8e1vj(^`X6#YOk6Mbb^uEUQ177GzPyA*>06 zLZ9j=jzLD1*;bS%BgN`Ztre*cLScJ2WHV)t&;FZK%;DNZjdE3)WN3)1lN-+XKM& zf_d=XJb8Z$x$n7*?wLsHL^wL0`JlR9)R@Rt;u31qUnY6W0|!Qf80+EIXt3zTJgGJB zDMVqmkZmq$!qwD>}NX|)kg<;wQ7Hx`=Msi+2Nds-qrunn52k%He= zROL2g&iNNcB;QSvx95{ORQW;_QY%Pe(VkrK=1?4(vdMVK7DEXcCEtpRRhGdAx)0Im z_s+D;kDE=s^3Hq@d$cNorO9U0IFk>-qi$>P2^Co2<9VJ3!Bc2d-?biSry2o1ia8r! zluD4Pwz|1SvoR9rrg!J8Kq+;wB-A&JLP$M*=Vh{vyajc1aEp$0ygUFXjAH1tBlmaa zX3wS>UG=F^o?1hAF7M-aDJVn0@~LUzv6>OIRloOQct&Qoq%YFNBd2-xEx}lL1tnwv z_2*v*&a(tCS6-B;xT7_BFj_?Y|1q#W=zx3MtNSN z$1gsS$|C2evXV!`o6qEXaZB^bIBR@Rgy-xH@Z`%(8nvrhuufX5NP22Td4#k<{=(`+ zy`bU`>xK^eBAy>9Guq}eD=F1Y)YKpEiF`K$0?)0-tVI&G=MGs@qwDNx8~S%`VKPW- z9PBv$qXY^VzghA&c)%n?oFdjlL;Iodh|dI>%$rw3N?Q#BFX;&RYdsi`kEL=yJme+m zsg_dD5Xx_IfFJA?PO|T!7C&0`d|Mxlj^hV!K&`1+ivwl2%#s9MOEPuFO z9&ZJ$^@n4A!~I%T9EJ2+>z-)#&unP7es}`(e`EE8lr>@`BdFN+p@%v1XYs)7<=Iut zz^lg0=3Nw7{07L>lq$k8A3p><|SyNn5C zjczi1Ytua88qH=M%nKqk0^#jzM}N3T;$&cGR96L^qR$VnU(k3@oM+XV6g6fs+Qyom zQ$s0RPli|ll}9R~U?IN|4v=>qMbVbE;sw4Ct*l1t5kL6njb6=!zW^Ee$wnzlc{n#`tZir-Eq zUXAw93;ZKRqDme+P0^nST8z|+0amFkU+%0GW65nL%Q&7l&uZGuGwM z)d{5N4XLd=yqx~|dvdBM!*ip3Vk7_1iQkI0bbABawV@~L5yaC=*+656 zsXsoTDlWOydMV;PnvJ65i&XG3RX*@fH|O~i@j=8rPNuToOJwZ1mxtX1J7X_+kWL|m z(yZg2^k5%=7|q^i{)3nS@>O=kPU_!efL_Qm`h7lE_GC`9-41BdKA#!#w?3Cj+p8Jb z_Kb;s+0|p+va6WTn)kFTScZ@CoMaHzL16y8n2}GqDeYS^M^E~?lG!yP+L1n?6fT#S z<^%6`r$wA(1MH??<8EgJ-jL3JlJ{1jZG14<%Cd3rOz1!Fg;%+hQvj_IeIcXe$J&J= zziEZVykEtE3caN7wyM$2gy)<>^jNkbJPYqdvh!!o=I!8RBp0)f-7l> zC3Pajp^SDudzSgU+tat1iWJB=>pz#6{1-!^lle4}@!rdGC`)E&fluU{K9CmwL|mRm zLz3+}+m63-sU))2o8EO**BPDsB7BGFT>+~%$AZv?$;^jlXqzQye{-Rgy1dpW&{Lxp z*|PR%PI~xz##r4UP3G8mmOK z5$Gx^v}IJ~7g^6`KU7lEc6LoP?|ZXu#V#AghNH5cOB1$VKiVw~$Ui>u7HKi`FmDt` zBmc|ztBq-8f)ff*oi;#g@>~xKUKo9X zQmvyF1F-$P4&%WWYf=aE*+|XSTFAyMJxNXuMfZ4(?BHZ(LAt*Xi50QQyBK3L?s8kQ zS@c01(aKEN#-@=(D=mVIu;b6pJF6F zMkPwNt7tV3FKP zdt;&NhmoPpnZf4twlf@{%jB&2vD)9}>O}E4ZTcGJle*buqxE!M8!5qA3fDm(Y#>Lw~ec zFQY4nDni7*`lN4gTg?$n;%(R`!=9pj(W|>TyqOgt{nEu^qo`wN)nQiAx7Ehx)F@j}oeSMCcGe=EnM^ECY{Qe!TlA%#uGW z!^ZQ&M||c7kxc#^?^x`H%Cv}wPu8I=Y_u=Rsl!%_A(1_BYz^FEztD>psWsHXg}Hbm z;^&X0HF`0g&)V#IFw};p=&hEHeuyoN2p5eQzvw_S{Csg(p%?l*8QnUVamiP9BFFaGu6-mvi+6?Ppzi19;aFCAhNPK2tRkFChD>}x8S|poZxC;(!AO42y ztcUN&-&p!@t^|E~Wk%Rco3HR-r_Tbeu%{WLFk!G5e0;bXs7mAt|I8 zk2YzWoZyok4zI*_-W40hH>wuc7jD12h|W5~Y*x1~riVAfX?o07KRfiQ1^6avcs6&4 z{k;8`Kf0Rx=?kB}c6%=0p*#NiMh=#inRlZ-@^c|0G!B^}80Jh*wz4(lU1dC*Z%CWm zCn=^sEGX)shkUGS|2J!Q`s3$%-UWPuDp3gpK`Jk(XlaT-YA8ypHX+TPljbC6vpwTw zypQ+Y9v`pc@r-Bj%y?{%?Q?7&J7-Bw5}M@Xge=|ZLMv2Cg%F?u5>f@K5HFAr;ve8e z`F@^neobHWB^u4lZ+V{kx$kTFUf=7w@8^DGh+6mhT*fKe!4!JGiRw6>kD-avEOK}3 zl}#7{TM;E%DG{-t4#~04=7hBzxtmo#WbHWhF8tcHJ32AFW2ZGekW1-{yV9{mjOn?I z6%uq^8aBp{XiBw2yuFfrVb!0S>#$?9Z!B6y+0~(T&iQP5O&;oN5Zk(TGb`k#mn<5h zkULMKSI9#XaL-tKXN*3=WHAFP##G5%uUP5b^tVjFIjszM#m3-{p4-b&hRB+IqOZ=u zQ(yuQm>B+R**L5qvU5H^rajsybEk3FZXaDm#3ge*G{eOT9M!Df%ul5#qP6~@T^8jS z+!%@&FYkqMEK@(RIhIrYp>I5kT*-p%V-UvA-^^p06hOl~LQ@L*owJ5A-G2d9LY+S7-xlf60wmxNqdJcb} z8O!3uu)k|VwTvC|$?|GFK9QY!TFBGHD2;# zGT?>KkLIxf@zDGvl)(>N!(W!*Q?_A8Cov3?v%F?O5SL!91#x!g>WtA0gjH@hL+8k# zGWn`Y!4$s3YMsrxAc_B^A(NSRJ8yCFhi{Z6EqGT35mLihg9yLRkT1)ob>r z@?`$0a~d#`-I>#-J-G;bQfs0^ksW*Stb@PSf3F(Z$Eh(F>7iR*18 z#lrdHt>OEV={bFqvR^uqUBW&+bl)-Sa~%jtCeWwMj+L`>@fUmCmuO;+?Wc0g&Gd>) zp_{rRPlIR1=$UIjmbOo%j!{2g?<}6&p#T2((4DM&V!4>0T@J-yJJzVksbb12S-bPt zhdW=b(-n&=X9&t3mvaQZUd*OVy-+cLyLchY=2;k}(ZM_QBiV^uraJ=Aw!Bw%R(>aI zt;cvmj(HTbK)cg69ttCF4qmAZHe=5RxveTrv0!(;SxEjubNYc%-EXoxR{F{K(&WG|Mhq8s+0q0*`5pr@I0`B`oh!m<^1a$~5B*dGFqSMi?7d3K~xi{N|3EjlG%(zfz}Z9+S? z&c4j$@IYCqwHxypsWsWED!T_6&F4yUVMVPv^25tnOG6LPAM=5-swL(hwL6}EZ%x%& zEKmU(VVhMgW)x4Q(*C&||C8B)V|x%#&D-wV_TBqTu5PbC)g`tv5o{Sx8=sF9;iWnC z$ylA;3{OPvlNs^uoMSHCJvr=NVxEAdc1E4O&?-a`(`sLL zqNyv{llM=nZgr*2{aWrVx#w1zRXHV$F?(o~5#s^R3_RnoDEB zMa`SEUR%DeHB}ueJd2ceF(xe<+!c-u-+v)z$|1?gI9dADw1au!MxMuZC)c(|wVfcX z@l(01Z^@f@DBr@{PNWy^(z0657+n{>81U~Hv4k`^rXEkFS^=K;W|qk7rJt4El+Ja!M_UU(couIUEd?8QSs1=opu|toXxPMFjQ&0Yn0_5cf%{ zdpnSiOb&xw%}%WDm*TgwV|v(=W1)`T!c=uKo=VH&JY3aY*KqL+dZa&BgIHpwS$h1S z{LL)vfxN~mcoUmf|Nd`-bjNcIYiw2eJtZ8v%oY6L3(XMpq}M)?EL&tY z+igX(ReIagW7e!!;^(QP+J?Yy%NnYCa5A|t7YPnfKGUq6@E)4@Z?N~&ZF zajpBJr%vLPwCod4Ref15Txo7VTl8|b(Z5jvY&*WbAu zGcnYdJr@wOz&6;8QB=y~!>D%;ZSvGs2U`_$OtWg4FlO8ehYZH-vz?tF4eXOKx2nJ% zjLTXuxhE`B4TE?h4X&!M_=?u8l*Jl~t5$(&iMH7QJJlk~Z8oT4eku%U%g}CTd}(<$ z-d&DPt}FqQZ#9Q4V4UZI8+t2K#n0rvA|VNz)w3GdE^4mcnmMHB<=7;fwl9JG!AL?z zT5Q0;Wmv5+t9e2Xvl2d&9lL}7SgyXF*Q0qF^4$7;8Il-l5i!b}$%G7_jY{MKARGk zRUfP*?#S^6-xx&GfZafF1Bb zmI+f9awRSG&H^?-+bl*l#V14t(H<(wn4y_Ii&l_9T&GDoe^Cbb0E2cn1pW_;i*_R$*Bow zpZ$An<`ll04dn06;5TH`nMqX+NU46>*e%CveU+a%n#9PdnV@LW2qGGr(dPAHWLXd9 z0_$LMxu<%$UNnDXpU$^#nYD|A_|JNJ#m9K5%)YrKe`0m`m$*(w&3oVmZ0HPsBdX}2 z4U(-N@kv)Slc6P1%_pqbnhry z9V^fe&xo)KDqB<|b$9pa_?6vFTJgXRTJxhhE!s5#2X3XBhpDWQ@keu}d$`P&k&xUB zLa{rTV+LQuGI|`U+H(z8|gjTk|PEW&zn_EWsEHV1F)S_&v?PYf9) zdCIlQy=W2>fgDEUX=0w`1lQ!sBugqr25IR|jl>#Uw%Y6-ck6TP+#jQls?X%)cvgHo z%s{rY9Pa1t9Mwl&p!$dNvH|vB2Z=r{!ww+4UyNky_5+pYvjFpI+IB~$*4>FT%BAf0 zL>uf8ALOZI;(JjLvayOXR8ii2IEM%MiPR7rpApIZrGIQ$d}d>E$*yf^?oiIj|3!*s zA#AI>O{8TZ_U);D*c45&Uh9S6U=ar9!$mb%jOc1)bvUff^|s}6_v+vnWNZx=4o8ZZ zy{nM4QNdL5=s3+azGTEn zbq1g1Iq<#qX@1Y$JnK0Jm$uuTyr1sa5Ol+$yXaRY(>xVlr@!I6>;uTeHK z76qHF9LxTy_70gD&V)u*kFYP=a9^03$W;8cnLmvgP496k6NWcdutvIlU&=n0UW6O6rQjQJ(bI2I$DSF zR+HffSRjUeBUkdh+7fHVR^5ZRGp{ghvhcGf5kbT-GHlFF_ubhIg0X3KA`jMg5|YtE zSaxrZBWGIoq!QygDjwoDKj57>48(yqG9G!GXB=7AvwdJb>WM5ALaHmMyp}b(|4Pom zn*D_!N784OZ=I}Ow34HyYK9(K;!(V_jGa$fHMujtWQSE-XDHf2JrWWj?T6;6F`hYq zhpH}NRidv|YOr|A(C%`^rQU(5zzBC-)3;nv<&&MLNvKt}Zl{ktS)btKY_vL;9JV_M#>Msw-qFxEiOcB4;=UY7hFy~|nNOaLg_99(MsjwZ zwhj?yRwL@X^60WMy%QrE?_kO4XbLVY<%r%9*pV3_bT(EkTPylZj==%!ugBKqQY=%i z#ZSHueO;NHCv)t0j^(3z;A-YZWRBG)2_8#ZJ97rB#l6^--oisMQ6}TF&cj1udjhEr zL36BA6vYL_Ke}cS^yU6TF@QE&<meR=zH}ck7M8Z)co8X$#S#i1fMtfbLR`P{Y0 zhUPFy>|PcOMQMpvc!M6Q|6@h^38%v*F6e?#SdDRRvO6njh`L%mDsX~+Jl|(F8)CbaQ zc3F(+JS3DY3!xJ>tlCTxWK`Zj_V_)|RP%N<_rbt_y@xT4^%zXLUxVI*gsD zd+V$GRz2UG3)?ItG>zGH>{fVvQP4&QFcI%dPjFYD~D^gL?d{GJB^?zPt}U`)W#$V_v$PKHzR#f ztTW~OLqun3WK@=}=lop!hZdE6QAr()4H;wCIql6+{L8G4-jXpdCkN5LQCY7@wfF9g zWXonpEAYi(c+k4K`%}o2-`1`nx3;y!!o^Ko9LHlwVYP6*XCuSm3(fcGgqK#zjaYoCwMzCm(kA315mgw7!4pV@ZH)4m zdawO0cjRbmL(MzLAn;08Ze72Y$*byPYC$4JN7|@N{+Nl+NB&#y=EKHKH&&;KJFQSV zvTK)$@J8Cm!xBl!f0FWAT#~JR@sR7FfuwlA? zDR(N7Bq>3DQK2iFjJYfHU(P&ovr`e-Tnx|9&*oWHZsBRg9Vl8Im|N$~G7~eWX2?Yo zdf@||kFMRr0DVv=@603{R#(HQFatalv+>#YazzZZSp~N1eg-xTj~ky?(;nH;I9r!# z)Ouht3*j43(5#wsc#<9#Vxhs7EH`ftMK-Y&73$ zbe&h^QR*=4V{f!5x4?g`6i{hn8|2(8(7WTiC&GCCw<^FIFm-#8+q+ptgM*7z*VCf) z&!RLu>nbAgA3NbI(AEx_)iF_HqiBVk954-alf`^r--Y+0?4AK5_yFrt z%U~69GkPOWRXkZk(SohX`&dCe5mLY)9su*OghmYd;is&H)}gVZRh3~ttNiLO^;Z^S zHG-V1Pt)ldFV8~6;K@Q&{%)#JutdHj`qH=hhjy?pQDQ!f$X%?mrF|L6xG&ftah_RsWRpxf(=foqA(zWN6ef3TRbEMFx&04d|y& z?%+}ZG4n$wFcg}RrI=|(95R}df~KyZkAC4HZx?ylGwbO(f!)b1WSC|i^_M-vELfx$ zyjRVnSqHYEjhW2ppNaL#1MmeFA`_Hhz#dXpQ88-L)~3;@r$9{z;WHePb;2x?#F6kJ z`sj)z(s#FA??|I>wqX^ek&B$47?d*hw-x2U=U{v_pxo`8v6dt4&qo68wiWec z@i>f1FuP-8aFInqE;CakXRgihYAZM;lr~#UW8|%x(rl<%T3cx>Vl6xaM}lLb17-~i zaV~ z%BE$B>|1n`z4Kq=5)aL@@-6sGF5VfBsL`4qB$BCF7ih2fwS4Y;wu~RNim6?9HmY2z z(OYNb{yupGPhsuz`TXh-+5yby1B>pWKh;|WPnw$6jy@ordw+0zJwBBy#4R5VNJD%MZXg;Pd9At?(fO9EP>9PE`i;%PaY|HsxFu zLYnGY#QFzpsa>#6=U1bo1O;55?>W!7`H`OaygtPec}{VPUQ`3wPFW^7!Zz|Rs_9Gb zydia0vCgI$S`kBRlu?tS^)S%I%z&r6znqmK?md~$Ur_;O@NKQ**Q|#vJEK{d9*ZGH zKr%*(h0~{KFdA9;11n|my-%Z9TYRP^zjOmnSyiptYZ?~I#m(xU=2$$5*khR#?AJ=4 z{nZor;+`c{tmpH$c3~{;gzd~6riw8@LicO&Z#Zj&WCbhesPos*spDtO@Wo13^All#bUjFNm{1RiJYnK6@Z>$}=+O-Uu+v)V#vl z34Kt})@%0DS|c8` z5!l_q$k{wNY1Ml@^GUoc`e52cC|1v|{9Z|xri#V0S(aD;foYNjv1HP%ZPHe=5cX3! zTASBdJ*yy$3O8p%MXQdzR&1vZW7Z=qU`JL>v-uy`hx?7N9hS=~S*_N3mvt-PdJBV~ zzvrR*bRz9~G9g}y>w99mI3_oOMr=t`gDUjvNgPKqi(%ZYRM7sVj79CgRnYRx_JWn* zqijjfnzKSb7Ih})Y5!RIu{)nVmsfWZk9P-WNz6(Q)&%9O;MIaPRmMd=^vKed^LeXn zdI`zM%MXjk7epzs+FDKC#oF~(B+xf@AX}41Y6HUZ_p|Abo%z96H&l?1tBjpvhbnC-=)MSUP)w zt*0_?NLTRIzLl!mJfPJ}b_+>5LUXz*#F*K~@+rHu(n=rQl`x*O_-Ny*v(zKZG1umc zr{y@JBn@|Fl*O|^^ISYlJ7pMH@Kj`kTfs2ie_`yOL=Wea#W&A`rW?6FTTwr9%)T&W z`8NIWX_iYbVrp?hZYDDGD1MCb+9icdSw4M|ygg;idTEQ!VQ~3_Cn-R9kx>iIm*ZFo z*gRdNb}tW}KUzb;g3E`?L_}SfBA+zBW3?8}s}?RA;nwO3{0yV~WPULmdvY_hfEz5Q48MgM6TuV$n80;!U^ zR@E#>!Td9=$hX;-c6aBzji9}r`q`Wj^nMoAGMStiR~$+GkWaD&yGWUZ;Z0EKX5?@x zXfI;V%sT_M5G%Vh%w1s_wYze3F-U%X zDEEY7(7U4(b}kz*uS2?K=H#uidG^ibmu3m_Znl78SobxbR-OiM?3QuE;<6kzLc1LIc(l~IF)JqNw$k6U>mM6o8Fy_ zOrMRFdQ!9JSr*&urDZK*S0dnNT5QC>k`{TOD~Of$tSQH3J>rc%(kGO(OQN=#A*r^& zM!i@0rTHR|8Do~s->>IWRTy}!-<=8S$n7n`&YNxUliAb`PbW&-W!BmW{T3f(_0S4_ z<1_By5ue#PjbNaSMp*3ZFbdL?!IW8959L^Xr0(TfR>#fec{jenQ}qt+w9?E=w&xe3 z@Ou&vL)fTe`6d0hKHahj$W#`~E^p*p^WJQQ51q=@*n-_=T3NEXruPJ~R6G+}k_3s{ z_szRsI4Z+X#S|C%w1SRN z?|DP36*!lk^iEByUs<~Kz-kn};W0EMi?c?KrE5cVpm_oI;>np}pH+!vjd+Tx4tr#$ z^<8Y!6?h?gv~tm|P}_1OZThS3tI}g^EME_rQ_^qoPz|>}2A>+kSfFF;HLWPHWG#?B zH1jKmg)ctASMYLL^9)8TiJVACtYZme0t39)7Cz|O@}4yyB54a&yMaj{d__|njOQe~-hLHb67ExD2`pp1z9v7oD|wY$C< zNc!bTlX)!yJBOFpdkE`L$5KCYRaJ5MqO-&-t7%|2zh;ZByd&S}hnfr~3=7$$mgRK& zbF~L)L4}5{n!U~CbDzKod5ZjnAK*xOfrDPj_^q=bF}Sa`$5uxcAm^4RVjOC3*0Qh& zwQ_xgr+jxjpEk~`f|4)9Yn8a`EZn=Sc8KwjvRR8wX;;h5CADK+7P%U2@f}j?8zhi< zdrk_5uC8RXEa7mZ?#@}9sxhY-3frsYLm7D)uZ8mPk$)H`>w*#1yFnV7VO?aRLfiSF zW<&>qnrb)Wxgs9SNA;HVlA^nu$QF)i%e)}nkq%ABhvbr7KeRWk@#FE~T4bs=%5(LP zow6I1cJ*X=57d`8VC?UteMrVvU?;C*jVo#AdSa0I!RPb0&v$YLJB4A!B{G&r(F8`bJJMewonVR53O z^CmNzsbuli3geSv8H8;9+ZkeaJmHR3meM?CM?U-G-Slrh$8!wHW+AVAqkZVglHs>l zMCQ$}>9Dg$@~H!PlAw63=dDEV9s8~H)L6`k(yo7PPZA4_=V&<-{B) zfMpspBwb01v|a3A>wE-Y1pcZK#43CrLl+nmtaXFSi0=?6}85|jD6;tj=575P5qhrz4nur+8~w4*0E7K!P@t2yFicCWQ2 zS>Jn2mRP=vvyu`dA{)MB9AZ?f2cyXAs-qos-mAbW3 zRzn^T>E!_nT9tLC>K~mp>u?lXcdU2_CG_!P(9qfX!$)|kywy8@&Fmi!Lef8!7+HYz zC3LC(`p3R~TQ7$(@UGvp95#s|J5J2+EN`nce2Hg@EaG*s$o0i;|2xKJD#q6JPOinf z^+C;wRgxNw(2uj|L(AAPpt+bzRBCm5Va>uwv-hnJGsrPMG+er^74C4Kow&h*@KIMrGKkGh<5)iU8VO{*k} zppcr(=+C_=Y!M5^m5QXKOn)lm{1TFg7_yPx2`i^I^JFBCa~+koB9y#fCD-8Ds>6C> zta<@6>eJ%3PmBaNhcEW=>N*7afF7-*HO}d;b84%Q0$%YpI_q_u!}@)~Hy{kICfDjX zVE~NAf@oST3rn4gBvb}loAaG0pzrk^Xo^#^8J-Ex;2Q6tUt?r{-aF5kyr);uXD!Jg z*kf_pIebi?oCgD(i>dH3uYBM8kKidgc4cQ8u^+AZ&o|T?I*Ub@Dp(kCYc70AMq-Y_ zPWG}2S8{beZVV%DZ!Gc&o(RucztwkFQn_Fuq#=VbTE~)d`(&K-02`b|pVa}4@z2_O zEr4|Ty&TOhBT$b|urCsZYsR5fuhjUo-JbU<^xz!hX!Zb|DtkSETmCmHR1Z9h&D6W- z)%6@R%9OOH)(wr=M91k<)?gHD$yLaj1lcPo@oO!PY!knhyLVM>+o|pxL3(^me@0$Q zmOZ<9%SdV@SqW>!F+4>Jdc^Zs3Kmp`<5)*j(`;?zA3i71%BjcEcgII&KI>J?h>@{* zSaDPx!z>7nFFNo~5-PIlBM;X$Ov3yQN3)`)oX(Ch?3?EGnkD#@cExf2B);fjqfA+u zzSUQp(|A<7g$R!BstNqAKB`xuf_||wj04`6zd&zS)+Tw?C&|}3aS_~VGFAB23FXk( zZRgE)Wkkk?-J0hjF{6RRKD8DKhlNBmBl_1mT(j53J1d7u%UzSIH{&@6H-?q?*y$j= zF>C)~5DS~D-I6VS?_?c+56CSJAX%GYD%nG zrfH0=|A}1nY<4%9+}YC`)ho4VEuvYWNGkbSKjeFnJ1P!vMYPcjl8OJ@1h_Sqmz|kM%Zey&LZ+) zEw;8%ET(NzkrkHdn?db*S32TpeIhdU#a3XEz4S049>*$QP9C^8;Dj~eq6xIcN@}0Z z(r0&rl56Ky&L>V`iq%CWc_{nQ?T>z~CKnHAl?4}{S(Dz2(rOQ_l*4;GTrGlcley}x zI>^KP)=Oa)R@vO9-Xm7?TXj1*?e(-{763jQXYIPZXqI4K?a5rP>za5qhUl7VGvr}x z>N(cLd^l}+j~7EWnQigzZ0@pH%z5s*-kIaK1qmuOJhBnRT1wGX7Q*Lv_zOyiuXl&>rc$SxeLT-*_^c`>KI1TTo04v!*YGKJ^KN! z=FC1*f(4r;P=zpi*I{!| zY;$BbopW8GBl6eW7tX#rdSrL9AGJj3FHV^hE#*~ecTzjrU-YDBAJHs9`5267HurCpa)XW-NtH@U5 zCt|WXy=yuPpAuuByNnN7+TTqD6;6^fEbS^9cj5ZB`+ks0<7Oj{_@Y9mvZ}(#V_1f3 zvmgjscH4POVL^LMO@ z zLiT-K3?AJ~?~4(>v9Fz{-ogyEYKTHkt;U$IfW2%Heu?D#q0FAAlB_kRd*X#>(gI7x z6ZwJp_4%P+)*4tvemT#-xRWzBGLz#>^N?1=shG1hOy~2FvZ&-r7!7sjNf&CppLjb@6170AQ&N*#@BEVINw~xY@Gew zmJ=)4=Tht)Dq~pIm_SKS5!xQcfyvti**&_B;1PCqGiydB@XDH7y)AbWeGiQN<9u%B z3=VG3wWi{i=8QyEyh&tnr=5NO;5HjU?ea_4B7L){ z#-SYu-DqhKBs|Ss$;YEj{M%YV>kL@9nAcXs*Hgg*^HL-PuQ6&~0A(lhy?Yvcw}zm$6!w|sDGu8%*!36(wU!5R%&8LxJvxd?U)!&HH5 zU&k`zV||PGwI}xOxjSOLd_Z=^7x^Y;Be(Yse)V%cycp{?GrB7p^z=7(4${Jztl=c9 z&AIAY*s^2RhR>lpmcY(oFkhe{IXbLm8SF#!RonN=>x`mo0%M;UV#keW!I-ZPb^*g# zFux*eTmgd97N65HE|voOwQ`tkelW%Wq(>I`k8$1Nw1

sm!x=S+$rWmSG!k&NrtcE1B!&cRtzYP5dcCc9iS#RDHnv%-7I>x}10` z?wRpWg%UgJXQCSJDvNNg_GU9KD}Y-S=LdGKd6-_@32)(-Ja9`akfobn;%Ruhm3Nqw z-s32;O1TXCGY(!srd_*OUa&3RbTHOv29KVuL?T$J9m((?GCz^nD(0`{TfG;ttWB2H zvP`qYwL`nh(uat#o?rJmT0PJ9M1N<)A%2R-Xo;1JnfhuyH)&cEN1|{ZFP7ixhx0uv zsElA𝔡xwxPGOHqya8T0y}}$y9d6du1iiyb%*GVN2FG;D%<4MJ-s;`OU83G9x8A zG?y9ARrf?kz0ZKw&5*%}?P*#3H4;}?$(2=kSRnRL4H!uo=8M836(tJd~Ho`+TQuK#JoP29hrQ5{Y+6Lbid9@URp1O;ne5_mo z7ODc21zO>9Hs^?~@&>CxMSS<6T0;gaoFQW@YlUub=)-a_xmoJqbcglw0;ueI*uZq~}(&|*H{H-~Dc z2CTuDU=vMSbKZJb-eG*5Z*9*v=X=s5G5c$= zGT-nK)?yrVr3S7Q^7IsmpUbh3p0AL`@%XGW;fzro4=ReJ;#mLlPh3JYp-ovg-huyk z%?~{dw)+xSJC15KOq+N$Tlvh`??#qJ>3&Q*p0LZZH~uG5E~Fh90^RUZR;CuqHeDT> zSp$xSp#ozASgyR@{<1L%{) zNQLBiggTXHB-kmD*NPX`=pBeJ7o%VsA1t@*tS)IB%lPl*eAhKzD+SEg!!!35iK?{D zj-eXtk?)d%r#m!?U^TF!z9_ys#{PkD4ptZ+tb=W>Kw)whWApYqQRyIMmPxXzzO4#4 z-hGLr1m~^qlS6(nS7R?Y3GbT9bF>@F`HHGoWlmGB>>8Lm>+N~~QitWRu(jf{tHqqn z2aJ{N@)tRsPxu*@_}RRsD>Bu(tXGsJDX~)J@aFIht0==UDt*;I6mB9dppx&@3;_G*^?+rR?rzAQ~xo7 z)>FumjS+*0f>&8PSxiFcY#9aaz-4*{y2vHr}n*#UGlox?3bMdhudc><3=EZ^C_ro~1&s zori>AB&lM+*Ksm@iQkZY{j_{g79*DR4g)yEe=!eML^otay5=wRQ8j)spR)n1N))y~ zfYy6woVXAB$shkP4l@m+ktzVoq6?O5tni$5!a{PhimZ`=Y}L8FbEF+MSTQNnBh)gF z3WtoZ@@I#|+}p!3Wa=!vZH!=TYMMUPn{owL;yt`f&(w*`9e7=>tAv_GP(##v`8JEH z-@!4~T00dBosF%zu4gb{j=N)@bm-14HG3nruZDShGORy&zZ`b$8s#YLbT;cVIJE0wNa3$@U!^E3ZhU%ME*42V&nr}^|mk>uU?Y#~8 z`2e|N?nO-Tbw{+OD%O~<#@oD=bsGm>=ok8m1EP?e^rf@|&2Tap4{7C?;(ec$P*%c+ zcv+E{S5-zZyt4ohjjt7PS)S{Vn4?t%tRyTG;uChRDW`JkWb9MgAP@mc=hC_4Lm#Sy+LL zf%085Uxx>+lw0xwjJIvElQMGFq&C#Nj=XSvcs{}o|IMiIEf(*7Ah>Kr6G>qhxbCIY zht(fl$!aJzK&o(r1~F z`{!$wgpsjto-6K@H?WP)x9hWL#u~09^2z~OpqT+#?3R2dDb-OCvNJQNK2Rn1ZLMLW_UApxKj8 zT%9K2EB3KIco{oo4=}{Beov%|@~yORAfpqJx~hV-U6F<8?R4s#&_~pRunYN-ZPKsU zO4H6MqOm19Fk@EVB2%(c$AZ`Ghsayb$34p$*Z?ZTEKJfAK%qjfe1vBhGTNitxubn z&p2pR3}K0`EQZ#TSy&N9Em*vIJ}o!Lab0=qr-#qAVEsRf>^!Evsi(mn42a(psq`L0 zGY*<&VMfzABYq%KySu-f&H0XT52n8930)c?{NZz|8oUu|X}9?3bH0vmkbUi^-c}UD z0oc+)#?>F5s4pz3uS6%&!IN0Z!AOJn@+pi`x$c>EFwj@gj78`P$+wS1hvJaWn$_`F zvDEmCi3Xs9d{GaJ>Y|61^^0!h#3W~Sn@_+^EBEY4tOa#;j8&YMVIRon&9334Oij%g zx4)Y+p#`oiQ`3qFra#rMI=-k=L?vl693qM5UtuTu>-k5t#Ov>+4WC*!BKwBiYRf9> zcCEGYRc0m!P(2_E?Lwl-yf3b)FRM$sk~_2O%OpRM@t38VJ(u;lv&<*(h}=k&Cpyy# zb|bJJZ%^_{eH5`oD>eo(dB1sIwyau{&*y)~YQ4!@+3cEokve+_-^yF-t7NZAqn^TFSWurH$dVx+7AihsNybz@gc%h1 z@Iuz!_(}uz|Is5@<5^bvP8NIesd;wo^SbID?y+RYm$S;VR03GO{8_{%aq->z=H*>; zs9pLcP1tXIl{-5$L*gn$PAs7{57-H>jSu4RF8GA0h~7mm+7X|48w?Ps@O|fyY5A(w z=~sND4Y800TP>k0Gt&z>%D#NJ^Z9Z#$#VGBQeL^AiB`l<{bN`DdUi7uY#r1}B|D>; zU7SkfGiNS>w_0I^4&BJM*;wtA2J1be8L;B8sHmrqy){B((tDa>*^P-t1a*v^%o_Lf ztrFwQ{2MRBe4Gi1#IM%aNkhb*A8f*yNJ~5B>KiBNTbtqtUdMl3p^V+0QF>i}(w1wp z7U!wz!31Y$-C6ErH;>4|)qbjLE%LUru|qrWsl{QFX4*wa+>fo31IuAo^)d(r)p1j( z>pjW%g^Z4b!{(GA4IM%nwew;mhSW^F@fVMxAw9-f^{i~39YZE-_{eT#iK4B08+K>J zs#~&3Qn&sb4v;6w$>hZ`*EM4Gm1ZZdQT?(tW2Pe(!tdAzc1E`KdvT<3x^pULQkUp6 zrko*iu_B(^-W#DcXL6eAi!ZVA8}YZnraWF&C!SkOREt`4>!YUv9((ko_Z1-#Xv z)=;(P*c{Qy!SSGoqkY0+=TcvQL{^?+rZ|kbO;+IhvWnL2c(658eEaK>qZJl3LdRwi zS-Tlhyh5g8jexnwatD0ao)feL%gtblH&`;?t9^RSqI!>TGufBYrkKOxV3YM%>__ei z+vQg<)U2KQgK?}R+CnFpCa)sNFXUBMYLP#7XzZk26ynv+FP7A*4vf8<)_3Ixx2Sg2 zCs-7#!T3cu`nOAFwJ1t?EiSPHQPmui90f+V7QUKyq*c2Y-N=u$iiGt2$w9WO`Gkcy z^3!>RS@9RqWj?R==I^x}>FTUZ8p}M&?#y-;=Sjjk38+%eD5k5Wo11`H>Ov~5G8$DMev5tE zxxv1CVkk`^5PcU*d$Irx~Ju6UE zPhMxml-Nd}IEiCf6YRxbjF)ZLIfdRtUs2>(xNR}SJhPSHJA(B*b9>Gn&$TAU{wz|m zzS><%wRv$sZ09Gi-}+?n9izo6#d3df^;U;j^-{j&b5^Zb6L&7{znH(YWZf#O#th-@ zg;X)%-1e-d(a$I8gC1*nJR>pE&Xmx@^Led8zcpjFvgLC6x|rkVhfI+icvtJ}n6I-* z03yR&8J3ySy?NjHG+8JN=BHwV+yLV2PPIx-U;e?mt@@Q0*+)f1PZY#_%&1s3R`zY> zCuEmXY)gw%X-94${$Z;0IvE+dCNH%=5!Ay#Ow8fspOZD`%vG?UwSK5(|`6u(a z5X0Q)!(5SM%qhta@MN(@O_dHhuVlA(`vY&vE*#J0Q};iZ!J%C%*?J!Y)@qE_;~E+J z?V2>Bbyq%ZJj&mL(X}XV9gbZRq3rp5&tH0%hqWCzzbuLMo2xmQnH}D%W+ToSf%+B9 zf-)1ird5sd7QRkuEEmgQuT~6@?zvo1^u*;L73AnWf|D5w?=>$b^58eTa%WyGBx0(x z;i|mXPFve@RC&EMdeEG;yTeC>qMLoO7`1sddvP8LpG$vf$n2@(&Zp1#zgYlx=dj1t zDp+OLIhg5pgucn=WdiKJJX}RYgoXHKQ`EraMp`m<98ZN+e*b#(EMt_1bxnev;FKbI zG1UwUxx=_^d8MK#dvY~C*f`3^u$igB2D*Y03xs5Ma!xtDyw0<9%>t1wjo?PcA;!{V z_b$1YPvuGGec(B1Vsm0&d)FCl(y7M894M||{8@$6>cj73tR!c)!%VRm1+h$=(RZr~ ztQj;)@r;k%&bf}TdQczndGUqC@fLoixBk;Ilr~Z;waMNLb;l)p^j$RqZTN{1sK~Ml z{bo0fj>hPDE}g-K&8&Ak?cu8Q48!f?Dd+X%DmI4M?~l*8%Ml}P7KLS&BhkIHcIH^4 zX%$ADt=vRpmou_*e$w9Z4&U=c{bOVJl}xyEOtebMsx(OB2^c!_x; zK8u-@73&%8(BzR|%bnp2UPbck(ebo{|MY$voYkJIa8Xt&Bh!jDT6MxI8{aWX^EKAf z;j^8oVrh1Zf`aa(lS{}P@DTS+LXDzlt7Py`#1eZ*h#W|iAK(FI1J~1zJ(OIikpIQ6JY5^pNd~ zUygfJQOCIQ`SE|%Vy}N zTphk*-(*38;x~j7k*}mL`|`?bctV~$o9H!Ajrp};gN>v!eV z<@g(KBQ1OEipraFESXfMn9*9!z(rU&J;7H;7`GgNzpRF_nVV(7+Gj&@0CPpEWum{8 z#IV9|SFp>^TC>n|tz$)AkvbGmt(_aj$r|t!OuJ~ugItMUvJ|WJ%7*;y%1N4XRlQ=p zWg#R+m+s%x<7ygH70Ei|%?IVpGK1oust#*Kd$Od6KpA{9Vk(6|g@{md4%pVW;2`X1nvQvwX5i zk<;;AH2@LHu8SL1jfsM6%80Q$^<+Jw6ZtOv>OUQKEpgYTsw|S2%+PV7P5EzEw~BEj z3E8!bAwXYs1=)xznipax>f4Js{$eb(>nioyE^TUS&M!+LC3x2H0 zc~O^5sd%uY+EeSu@;%iwXj^|ZQ)pE|xux;!8Ek-U>ZOrFI=d3lCC`QaVn41Rin}^b zWL0nzI*^K0?rfX9_>l~}Rza%=23Z=5y9N0sX5bz?&pYKyW%HzpE6Wf_fS(yH-yHe= zOKIKRk$A~+zK1EUTxNkUn2%va(2g$DXo{>V0*=%sd(#Uo!_V0q%WhR?e4=896&a(r z#Gl>M>KmwrwL(g3+UZU8z(`m{?+jt*vI=Lm_xK&Tu>qXqUZU)5syB8-fxV5j5JsfX zDu3WPcn4l4Ho+?`_HMiC+;Lj2Hy05}x9dw-l-}z*Evow3s~L}ArDP&gXuiV6;3Lgi zN6ky>$#P}+1U@59k-4+Tve5?`^9pCtj4@%RYD9dMXK9t4sKIG@6lKgS;gdxHvxV-x z(-*Z0*ANrjflkIS7&$Hyp;GwSGUuS&oNk@W9;O~n~#OZ znQ6y&LbzGG_;Vbnbd%9RPwW%qvwJh8~-LLz#i9hl$Gh+~e z&R{zZ00D6yR?oU2sf-iKACBDA<6%2qDi>!H;#_l2l6B=GFF*6l5Wd9!)rYhwtL>a& zSp}b(%P~B$&x12#%O2#r{9-a~c$SFT8mu$lq@p&FqjqQ9@;AK@JIGQ##&&qNxfgMw zb2YrD%$g*-^1L2y?BZhQ89cp%2Cu{t%+2l0RXX3JMQzDn>4f~S>FJ?=5KC6UADVHi zqtR|zylMg&%gZp^uorhQzo=U9I~9slgZ+AaF|}~ zC+_}y(k?>F>UnAJC==_U9p>#mMGV?HLG>YP`QR!%;YF2)9CBCc@OD;kr->tYl>M+( z_pHvG&)ne3*x$^EHgX=%CRxnGm2m-h!8SZK%!)optBOhL*1Q)_;De)BH4d*b2n&i= zEJki8{=55A79pClX6?YM6N7isR<+c*t^M)JEji+9e%+(AGvCq(Okf3FnN``^<3O+3 z$Q$`TH}=0XW|d4)vB=!4iWjq1sQ9_GiCIHk($hMhfSbIDx47qzgpG-{9}9o>niaqv zb}Yj%n}zdvijJKEK-T8GWb0;!b=tskPz&c0{#*2dc$9^L3_`jB5j`f?(1l6ke zundHp^oOnJb#oI)#5zQj$N9^`9or`gSnuok7b^V_qt|!7@u^CmO2$DZ#*EqPCExZ9 zME8lE*INPNDud8na`Rdb<=7CIr?hTkP6KLnWfJVNGR?jIFm+4bW4v(pZq5@G z#Ssyo1=0=wYVL-sbT*Gi>LadnB(t!eN&QqDJ)N1%7vcjn$fn&DB(E})R2jeMkMndi@@h#o)2eU2ew`=*14~$k$sY&xv>(gL6-!o_B z2y3Cc2NLhH(#w3m`WIedoKOfO5{KY96w|tQVl0b;501l_Sx@bwIYW0pGb=?paxOE^ zTk`8^4=h%M;8n2R54-WBMb#TP0I}3sp!}hXgGKq|eqQM(du)}!dMDK|44UR~2p&Wu zn7dhZ>)pD`34M^8BMydNSVQeDDN;~5WTEiiU1U$ws;6r6dATfZ&&TONwGSeYi}_sr z_VaR>iDx++3{Np54DVFE_6@oa(`MpXt2ysU=0slm{wUgGxX>)fs>*#ilckB%u(2_M zHPv=RLjAxw>1}ju5t+Y;D0~Dz=CS2>q}X~1Z*wk-6%9zn$j$Vq;fRR(;4}P%-|{%V zt*`7!zRpIvQb}8WwTg>*CYP&102(EEUX5vK*>CSn7K@C_NJJI*=E@}M88DrTaCd^t z>QZEgIpAIRoxa#BK?KJhuxj5}k7N|&z;EPp#=#f-Hx6e(3jJ=>hmv@xBOr~aM$46r z)|}C*a_`G*F2Xl_N7f=Dd0`wPfNS$2l7WlOpTtH{2MTL*)Fax{H(bNftjxF0D&n$q z5+((ab#gei%$7Fn4WbHU?;d(4WphvBBOcOGcf}o>XAvUCv+IQ1morE23dE?xYe>l-iVYFXxEn^mxEz z`g}HV&GlKnh}wD?rcOSr3Pxct#YVH+Bm>*@hThDWVqLXRdZy*}Ror1q#%sjnVKxTIi$+B3Ox|(}^)I`iJ+fzpT?^8yOMDFHhMbN!z$0%mP@Lg#p`nM`t z9QIURxd~>3QF*%Hwj3*h@_l$={+Flj%Gr27udrJWMyMx7mikzR>rQ0rJ?ll2W-(nIzK$2?Ql6+fGE?bT)d z({uUscD_>`C=-zf_%#%l2^;W=FA=vxcRRLVXL0-c7uv z0Z&ORBeQM-7R!Jy$7XxaG5>1~86NQFSM%(onH*cD`_Wj7$i>1T2rpyR&9sUX`b@?} zL>ABA&d0yZC_sc({$&ObSGjTvO7$E#(wL?%t|8Vmx_Ce0X{~DNA3ttxLrze>`K6Ki)e{y+I?Y#%UR)sq zdlL4#y_4G7%+{rfq1708q~S)_#zE+zk`F)RIF5EdO7YqZkU0x{s{XGU1?#8dORK|oDof<)XH-i*kT&f> z^T`}}G(O4y=F$h1(q?UDh|GoZEY@E+o0&W@_S-|uWpT!+?bmXy7--afpb$^?E0gzm z<;pYm=UWKt{uq`)E*FAMo}xIB?=Pfxl_5_fvCbu!-#3#2<$G@cM6i#ic<9b6GGH|; zIZs@I|8U6qx3+>Q6$kF54d+kh6V^rQBCjgC=x-inIe&Qt%MpLfomtz)28{tCT3y$7 zbS#%${HXTc1$X+ zp?y)!HDMj(7kTx~)#{aAkrORBzI<9w^wc}&<0!>E$2Zrltc*d#WixmW&uGhJLz-nA z&KvP0-?^J$IdAR&{oH+HTWV9`K)ZSzNQx)qx^T$2E5o)CvsIbg2l0Iky!;o zwo~cT4TeJ!;t1ZgX+S2nS7BOZ>cuY{rSruLtt$xT#ujBBaFSa&_1R?H!i z{Q05o>=DCs(?FGJUK)gt(A8V92!))75T(t zHu-8=e{t-)LC=pwLPxUl)X43l&9^Xgt4<&fy_S`_x(Ebs#RazD2$>s87EyXHxABsr zN*hciSF=L=)L1cCyCxsXnW|dm<8UHBqnQLN@9imvUsNkPjkldr$Kg`ts>FhSy{?%1XPta(O73+heEtvJyhBu`RfPy}8RyhDGwQ zi@eM<@Z_DDS(BmgUfIv3oGT`?T%1CVvNaWWSkw9lBvLEINZ6qowYj48;VOEEgTRG% z^Is;(QsCUN)UjbSB)^d}%+H-j>nd07#4!JQVwkrgJ^8*myS}y(r;aNlK9sYU@`{Dv zc`6ZJ(<)!0i7UzIa7#Kgi)1y)bGZ`B#7oRb+gI(tpgGrIhq9WYX?@p`We3G4cy}`M z1@0{1m3F05GvEz02dh{(v}j~(9_;(h>+sA&^#Y?damQV6ngtei*YfXT~Kw$l> zchgSoji=-0Mx+LhVYnJZlht-LK=aDVPK^tO(2h~b5Y=nQo^-@h_e2<9*#dcsB)%2P z^~S1yQCI(2Ien=rh#wslBp1i|1NqCSkHo+90v@`HLq@e_c+a1BmAqnm+MrYOTkfIQ zm2<2TaL=i{iQKK(#)kC5zL(8*Xppa|j2(zh&BTdV)^74d6+7{vPpVMO=^Y&G+m64G z>od8c`(pSFmU%K)G|TJtVm{@;?vb%N4+gN{iy1qetI~^-DjQ-fyXAElCR|ffQ*$vl z3-?~jxkhZ}b~2wFisW6l)~j}ItvUI!Y1L*DaKFDC4hyVDsoWbgPqz#D<@8W2r~T7G z(95~M1F5glMQd3feGFvp7<$?9H(T*{x6wjrV@)Xx>_GIk4a|}(Jfs*sE zb@7kYnh_%jyODe*Z5#=5y3fIEQSU9l?dVmXm-9P{O{^UIBSX4`-x!fk_$-WJ>7;Zk zuP{t_R=u(`k=yQ7BIc$+!manP5EXUW;}_-q?9R@Jt)H=TK0$B%2>VfG6`xvFo*nq4 z>YFMr8^ia_pKr~(o=c`IQ#GXCtcoEfTZceOd>dOMS$t8=QXUDhy4s7ULk&5&JMEe! z$qwyHYpKkou}RUjD`r()^s%`E`!zOgj@D=BQ!u$*}Lh@c82HC6rnT(4K zs?u~;vb;f5gWUYZ`Se6$WKnAsJ=r-c>NB|N?83h>OI@m6L&ZBrs}IP zMcqc*dQ672%G-DeRz&Y!%WmkM=2hjf4IWiFHFqQ%dNCe7aV_$3#bO$6?CN^sdvy(C z_0E4$%!nHMXq%MUrt9+UD?yh=?M7yDX~sFKrP7EPK%4sMYGSee;BCfB%lM(Qu^hb? z6RIuTx;;^uCco2(^P6*&-5ki7NAly=9nG8T-Xsa!+kx=xu;j2 zV-%txyBH60h*2<8j57N{Rv3#4g+A~Q_C!B4*O(y(BrE=|;*F)^*N~W|Fhc$-e(UF< zFii+jcG^{-GCOxciQ;4^PHU;2f_1aAuBL)Uo?ub5#CDszSiPls)_Vn-8JP!mo~!Zz zvc*VME%dfJx;uY@NN!bo`v=~x5cPO|zc%jZ9? zXv}m2t6Z5^=?`SK2B$0iARODpI8-`1g9432XU}Dnr)te;wF2|>LtFcDluBJQ?N$zo z27IimA^4SXll=&BI!|0oVZZ7->__{&Ob$Mo-tY|e+u2ojUl$*1zr~Q&@Zbx71=aE<}R0Rg!RTG_A3GwH_Rhl&g*H=?k1QUs9i&$vfFYBde%#C^l$@ z-dMy1_XXQYPo}M>6T{W4=jWg7N>Aq^x#P*Z<@|O5tHfjw+NlMzcKTu~td559KlV-& z>}_ZMlBE?3{X|=`Z7W-$ynGP8LBz&XuXu*Lz)9DfsCES+X$oRS1Y(KDUt`T zD))m9eANAxMZ3mGy{B_6^L3fmL~=WPkL{_OlL|&eQmu{9>q;zw&5)s-q!p)Hf{FkP zWIINGC^@Alu_@Ak65Lwd&wa2VROPmcYS$hP>& z3)Kenmo*wGR_@t{Y^wJvz6Va$7e=#P<*K)OhPl*9^Pnu8%7cplm=fy2BcK#2^ z>ajcWo;3L@Z*xCD5u9JOcIh5(ISot4s$7ShcqzM+-Fse8*U9J??b;#4%Ch#Am$@U- zjBT?neQRxxw#gg%{-6R36^w<~(x(14GRdL%Z{Pb1RYymNFy@K)4G(K8Sgn5X?e5=J zzGQce+L~U_b)$nTJ723<62GEd_MjJL?aT`lP8G?;2)|@fZ25Ygm1)8d5GOW-jD-wMS8|KKfG7 zR-L3PRko(3oAFcFs>k+RypbQ=LX8xs#%k>VUtZVBi)hV1aS1E>)hpx$Y(ek&!@e+? z^?ZZlm}@3~o-I0Kv2xR$Lp*XXnAXVJ+z4M~6K0jz&!^K?c~|4I)@h&D%V?ctJ&GFJ z@gUFL;i@0bJ&r|3+O`(z@1a z+rVr%Zciq$SCsEMIPE|;h$7>Fxju340^AYBF?YxCL|2B{G+jQ(tNE3_m__C_^iDRS zf;c^@pyLnbZYnQcr*$o8xADUG&!p~eT?uc1Tx2WSblym%RvnQi`h>l~u}hgh*p{Qp zC-^wre<{~%bniXqFae8fCCv^G`pGhl2lqId70>RgfYBHd9UF}bjMsFg}|vPpyqxGvMBS{gbO4$d9>WM~N@yPF8bea^MYz^Vx~4 z#f1@NhPa934!69&8pUJ#bImB9aS5FaT_<-14+jhqg>#A))a!*!m-iEWNoiv*z zS6rxGrbUvnrb&JBNDxt-!+uP)z2lL3ZFIEbmV3M)2sGJ<^!8>xLoM9eOZ$V_UF2v` z(ds-5{zjyCEIL*Z=X*s8o{39ohlP`f9Lv6U#T>qXN5OPF#8X(TFOdi1G|izJsY!tc z@Du3CZ=rzrXIPH4+t(Z<-JDBHlQm#8ngJ1;rdKAVeJYJ-^i;Y znU~$0|0>eW`0y$#cwAeJjNh4Chi|+_rHoYQLyU%FVt}2%unjTh{eSnpKlJ|hj6L(W zCyt)EJT>*RANFg!-oJ3b#lr{B9DV`Qp(dfBtsI&;7+~ZgtJSv1RcS&ph}4qvdw??`w4F-~Hmp zp7}fJ)Wu6v$EMF5nVLR-X=>`{Klt-gJ?I~Pe?&k2mmTsKr1I@g{qRrx@Zx)ZI6v?G zp=W;VFFX3>%LmV0Iyrmv$n%Sz+KAmOB|qDonXR?>?rQw+dj8+er=IS&mOBI<#2bAx zpXUcG#OiJ*=ed~wH*?-h-a$pr_Fai*ujiH3_mA@KQS`W$|7&C4i^a@kX6QlASkD#Q zkG`B1Hgfc(vHuvWSx&{?(}~w&S?g(YN3MB0?{B0vyPIHXYk9X6v|Y~M)gZ=t?!R)M z)_2pZg|TnPW^d)Fuccp0vG=8T)zh@{Fg;kw)$gZ2%lTc%_i7$id+W{he72gl_vD># zE~c0G>q5>l!@inNJ+H%_8=mB3)YsD5YOb*O=SSYp)$EJ6njhm>&%25AaW1d!q+d%J z-_2a*N?^sN8SahvF0cPo zzC9XryO#FeO<%S9G-tjO6#K2n{%)}LagOth!e7eg-x>S0NatRDUk_5eGU)E+*e}Nl zelsn5M##yaDa1UQXH;zD8VAxZxyF^Sd-${~5~lsP$G(~4z8mX!5bUxB>S(0zF7Y>V zmU{FDGxlFfEZUW;Jc)E~=Pc;_)tvKtVS7jNe=V)N9cis)tZ(L+i|Hx6G{fj_k{1UY zJD$o(# z9^`qOv+2=dKDm_f%RttH@LwAHmBGKhnCm`>hVEzFZ|Ai<_&{X!t>|hwqdJrEY(y)! zBh{-p@(b~*ley9tqPef6y+27yzmt)?o8z#~rRZcw&izM`;UDGQ?Hu!^XbT&BF%r}2 z*|Gne{@%^E7ostDMPfu>%Q0`~DsSfaFQ%2Rr@c4Ro9VItm2a*`VsC{5i7MaDc?;3n zVx%;i-GcmE+HSR4CiY4v)dTDm z{#JUfVz`i2XL8oV^z(Mkc$7X}$*~(b@@dX{7@0gtyRQx3LBR8Q2VWlL|J8hJ_T_1M zvXCp?N>BKrJ2YO-QF{3Gyrz-cY3D}zb~wTQZIShS`m>RrFXYv|wD^a)!p-#cX|8rWEl%b7hx5(twDyg`Kb&EdnC49S zyb&DG(yOtf-^|&+GWMg<*?M|4mCwc_w}<(3Ef!_xh{cQ^9^cO2`FPjujPvXuJ$ADg ztKhX)bL6Y}#{I3MaI z>fVi)2lLs5v{V~nQ;X5eujG4C`#aJ5cXPzGXn5b)Kac&qp8xyvbl{~JGj zH@*9IMtm>!wwiIlJKi-NTX`pKsRYVY9t7#}MsoO2Y~x?1#W(W)cO&`nT+5zbk8`$_ z)1vv01f%D2wEHT)o8z8j#IS37aP@5Z^VMk5YNW&I$&;YjMl97#t=R}w%)9xy6x-UB z>-|WsA}6>UOTLmtQVc8ow59_#{W7VLp#wzUCv>5HYo@4J0 zKD8A8cr~B#bH2|TuZ{hBdUPR2u(ka;&NC0*N$hx%Ud`odi?QpKSoQtEXK95$UyJT8 z=M4O0Jx91}<3iqv4v+KxX|8ZDpIyoC$($(Je6LkweU$#o1LyJ?F2wHH`?b7U&F3P!Csp2$miML& zdpwIT^Eq~B{!ZriaNgTPzz*g&({mhYIl5J;dYbPp=2KPljf_D1q;n_sW9I?77AVKe#DL8oW;mwBUk1#>|7+v|H1=UmTa)R&?) zQ47<0Gp*Wx`9|zhe;sY_iO1>Rqe$XIdG%NG+3RCJo*v!LckiZ!zm->{a3^QKlGWlD z#{Ng-XXg2O@Z`_Zw&$&zJ-?G9?dWnLG3d*=+Uq&uTe156x%y;!x+(n^?=Ht4S?;Te zl8>U%{}da!l5-#CJNYh8Q(0We|C__89uGG9zFg_Gv_2D0`0ebZY5!nan#d7HT+eUSFP zm^~6Onb~;9ALN@m(SwX|GA%#NnBK?;50Cw1`Z^zfdXyjbMc&(U_S=yjIi1W|c+1K3 z}(-e13N=XIvgyvo`DcAa^LZ zklyrJ18j4Ckj!lQu#)5H^hVmfI$Yfj&7zKHxr(sM@q)I`r!VHZd_eqi)PwYHBmY@9 zzj-BpMcR$%MAl-wo}&CD61tjmm(nf^hgnZE9;iAW*)Qc=IUkn$W`1ww+_@a3PW~j{ z&&9TJ%iqp8?#1Gs#2(&A%lC7HxcAL``gZ!|?%gI>Hq>s<%3hxAkPNu!rzRw!|bv`4XYXdf{ z=6ieCs3ypzUdY*N>E-((3t1el_N_46f1jQ_$a!kpZ)H^SGczv_^Z#mEkZ-B*+>bXu z&G)kz{?hibP*>oj_;(6JGrJcspeZ@sr*;e!NK;YeYSls zy||a7FrO3o-pJH2#eRKWPBe4GoxC#d{7(ML8DYo6j132Zov+7oHipq&%y;*LLyz;V zU4PWPS>>DgtA8Z*YWi?5M_{*K&uHFGZG$|T+jhP|HeAEd`Cx#pc*<4`0nQ(DhotG({#FW=cnd+Hw~gMG0Uybe3FD+~X7 zl8YM(Q2ni7> z$`y(fAt5ff;r~1T`IM1}1hT69)%nhOmuLMy&-=XZc_}|v)2`THEo1rP@byu?|C`~& z*gj zZ>Lwe+`F+SALTx4Ct}o7X?YG;I4-$GXcQ1txyy2C6f_`DC>G;jhWX_wp*Ewn1d6sRWKi{2*RYFR7M>^q_7(X?ij?og-?RjW~!kA`J| zR%Mp8S&}iIJJl?TGB4+^FXZo~{IjYdezX_oLGHUgNTXcD^*p_nxn0XWXnH@-%OQxQ z#Q87gvup-GE!Q@ckzEPm;oVNBpX>P_4iW>f_eLyBaU*|2wGgbl(RwH*&u7-^c~}65 zh1YcI=Hq;xi{Fv2^vhbc=kXO8(=g60H=AaeG`+8ciUVtvLhdn7;hSeE)Q1;VYXX*Abn(bWa z8HLcV*ls4H6??oHse?iA?smo@W|grz6$-G@x6;pKG;1YtMfQwWPEC#S-HgSoU=6Vj zyml*}MHA@FQ}En)#iz;2h{i<7n`z-##>EQS<$5EMB#&VqS(88pO#|JfAzSM5d1hFRbUgi|P4|SiKMO+1adi z<*z5R`6N#+g{m{bOJYp%#xKQFpUOC2P3xCpsm_IF{L^-R-WdBt{(lnAypW#elDjhJ zN16Rjp7?yOS_|9{QkIvRPG1`t*ON%}!?Z&Sj)yMS2Ws!9@6C+oVdl4x`=P+4+$+ku zmVU-FE>sl(Kh6KY5J|a}tMJ$zj)XRz4xLXi}sr{{>VQp zW=wAkR9ACGUP<{Mq*ron_wt=&Ue?nKiMy8X?5ukwG$Q+~>g(Z!%(fVZx1|@qoo_bs z-C9QWTCTu#WEHkoYq=U7lEZjCb9_GETuvU8MRc6`7p zoLSN)Pw)snH3pTboXn_T01>*@Z{~mhw0kXMavi!6t-8ZMG;4dJYN&@l_N=kNqk5wC z9iw)OkJ^IPIxjvuVH9W6 z&-IL0?sGYJjpu2Zi>|2g+cFnog}Z5;71;?IS&gpXp==QAstVnDjhqsnBqRQ-p(30L znTzje*N@X0o&LGZUhF)bd;%$1O%G=J zXZfy(e=qm{dMw*aB;bRLKot6w*d2U+Be?EHsQ4hQzL-eqlLNKIy7G|E<-2ovQWe7U ziK*;=_*BO8ne_Bwdih?)^>;H{Iq!+g;nmD#F8uzv;b~T5K74x=3arF0)Q7BRTn|I1 zuck-m4#-NtE~xTqT0fn!vI`e8*4J`}T18oUxZz*t_iJhI?OffCZU3D-CEpKCPo+(D zIDb91MdssvDEWt>&0_lf1Vy{=z>hqC~W{K6ou4b;AndfAp`rioWZ|3`tGZu38uhRRK{JxnM-_49q zMXRq3_Waj_^#5D%<_GCo$oI&E8-R4Xo@`eX6< z;;jn<)vSEn88rHS{(mJS`gES!9Vm1r@qwLYA0?Wh14YQ9ViIsS?L$&$GV2E!)A@|e znj{<{68l=lC(kacd?(jfBXQYm=EfE-=9+wlY=Qiz_-rPlQrmGePu$HYtmRIoHFjHm z!Q0LjGB@~4r0KhFq#f3n$KY2kyDE$&DK&ElB=w4%x8qZpE0fG zN!~$@i+3QmB1P}nyfyaYpHNpY`9q)lTmR2`hL>J_^#@BK{*J1J%`boSv!DF&|F1`X zpw{8W@Ba6n{tIdye(DoHSnDwJL!bP=YaMdS57aum|J@%^>hRZo^oM`+hyTK2hur=H z#SY*75xb)2q7S>F1HWqJ2=X8KN#2=m^N#8fSZW@3E_7vO=JW4%o)Y!Vj1cmJMBoe= z)JxF|zT9YTgiceT(sbI{iw%e6J{-Q`_04K&pqc22*I&#?RNx)SZxJECAa1=G{nn3n z_3>f-*YyItqiwQgGHKSh#X7L??$9%jC@z}Lh#^Z+$XtH3D?Wt&D1(AH$)~FChi^0!jISr`*Z^ zdY-&OW9}(O^|X-BXEU#6L8mgdW9bE!w`RSS*5xYn^en#@GkU8BxAWcY+zaQ)+RUeq zYk8vRS6)kg%5UQ&A2XrAa_*jwx2lww8T#Ih{X;u*6JZ$TYq?^i3+dgu(X~Mq=mpJy zw0#F};{_f<%%Ti^i|8)o2Y$w7Bji!gjs%O}kPOhz9xAqp_s~#ujVfqHIeh~F!oEihb)U3&}2JTr!zKLf*W~?RU|>? z!(Vph8+l4*vb9kvjM%4pxmWvh8JRWaRzzrvc38DgSwT`{T_GzqX8Ix1FfNa&hQ?eT zhAvmbtACn%jY6IpUSK({=Po>FyKs+Rt**%b+|QGja+ftmvJIolj#x3J;q=Gq&aL$5 zFOg~OpIMU{ktuoZIQe3$lp;8CMcznnYt*jhX>EwLHixM9*W$Is0%pQOs0U)P$@_GE ztR+}E=TFcKP00;APYzjE?N8^~O5Bay&*wld;tSE1u{NhiLRkUcsiQ|ZGUR_Psad@f zy}KRy^Ea2%mYLG=+ATRKbn*{2kE7es57zZEs~;XUp!_KmyVxptqd zxXNP5qgtUhI@XCjGFBcE*QPTH6k!8c8+5r|;-NYcJ#;Gc+D}XeTXO6Hy6+@~j6L?ua~hJv6A4u>53KUd}in z5m*_o+4A~^xwMF`q(GH}*4$D1Ze(~b2ZrL1ebE>F5#y8l{!<{Rge#tPuri7|tiD_|dK$UbX)O65%TRz^0%bFcjvdekDjU;!W>>U9hS&4?{9u8g50qmQ_#WC#&aG0Zia3ctT+@WTX&&tkMa~bw7#WQ zmM-y-dY{P^6*y#^wG>YkJBtf=2Qs~$?;&mL%rqLFVxxB=`)owfh%ukfTu_%^C4D4K z20{cU=3LFU`##Cba3up9M4p7cEn1eM5$;;DJjE!=9< z!^*Hoyc{nA1sE5@AyT#aQG5-m5!sdXUQFlRAjh2v_@8np4j0L=W1=$?^(x_@8E^m z(duLMgluo7RbG-b@zv1Cp^z`3WLkv2|E?emsa3$hXpt zontC#pcpoZeA5Q2H#kl=**W;m>IrPl^P{u6N%Rz@lKA;#LyzS<=hfWHnC*msxzLps zl0eps6}Q4kAGE;o&*Xj_qQBG0RX)vkyZPUHx$I(8HR}xx@&MNBuf#JxOPi0wXI4j? z`<3|0)nsMAm^OBDeJisgSH?gG%*?Jh>z&PXL5*ysbz7EBlx!_t#+mJB$LzBCR&wq) zL%k2umc4UUj2q30yoxm6%bnIWn$u}4DAURwlRpuQS_{&1K35l`=t)xFEM(Ug(Q33pr^Z4wC1#*~FSUXlXE&i_C38cP|7&u zCh3K_lOHuuMc1_%>@Z6T0g_1^)H8e!2cjLz44ripqX^TC<)dg=qZD(HOVXZE!CZ@J z1wUC`{DY1}f#_}4s67`cvoDCP>`ASn9KOQ%i?g*Lz9KV4&a9_aQOP~1*E&&Hsz`#| z$bI3z`Wb!G#9~l?lU4D$^}LO=V#fCF!+5t-&!Udyf_>S=@hSDi#M8%$5@PpIl+vy@Kz{(JhU+v{KfZ1n&)!Wt`?Z# zYU~aFAxFLx4O0jCY?uSQjAzy)-66JNhpdJ?$W^prr6*!1RG*V7{(u(oQ;%cMWJuxD zt>~tm{roLo0*%-Qe>D~SR;AgwThVUoru;0_W!DXA+Ve-c!~!d!-r?N)pE3&3f_(7P zP;Wl1i&+~%i(Kp@^9)Jwjv4bnVKRc(_Lc19FT8<7c}!JjB4ie!+~gslD6P1*oe?cH!D*UT z&22o!mdX&yy~-2FY>NreoEG~H`#~ls0k_c`cC3hzKV!!sFTRd;kq}SXw}={AbC+w{ zrn|KkwIOtl&aivdoX8?n3603D!8RgOwhpdlL(#;z$f3T`!6zdqHZ)t5@LRj$BO}+7 z@7zbv+@mG(N&|6}g)F^%>ps8D%bJ1Nv1_8va-}N{W@*&-a*w<;4DTKpB{I|8P^}a% zOv2r-4VJ^m%*#0aGk2CmRHI+$b2=PV_XPP*=V^1cqpEg`uhvF=l6KiP<0@rj^T;&a zW$`_ym9~tcG^r7SZ$&Nc$90n8@8kKcVu`&`$Jg4!=BG%|WM(e1gB0LUILba-HpU!S zJ*WUza2mE{qYmUbGgiy0{!L|(I$j)Hie^7eJI>KPI}$ueb1$SdtIBJ`J!;?f z@|177hhQ?_j1S{=E(|HBkvL%8cp&`N%V%@ z*itf6FV3>TZz`S9L&jh|Z8w%L4s>trKCO9@`(I95+i6G42L;ev*#ojX9Z#m(_npkb zN&L{!N?Jy%#=J&bPebmyCT1TO|H4+X&#d;*Accj<7%6~BNO|R^zD&>R*w`@@Am03= z*5*K-ektSYI-{{`Q{VOXtnPNM*b!Oe$`g?VIGKNk(;sGJxWv{Nu`G;LR}m(BP`?RH z6}dKM$_|TerYwxl^+k5RYY#dtHa2>3Cs|=P7bE3tk|#Z5{JW8XMq+BG`5c}? zzLz|~7goho|I0hF6Z}Ul1>Y$TKnjeO#PC}vYG0UHs;Y&hjY0m6T`q!Q|L`9c<-lNdZm^HXPM$M)0!P&b>c>C1R=d{N zJ$&Lr$$VuCH=WmFADo;UKV|M#6`BWZTuHCGP654;-eO4OA&IQ5Q)ez@9O@KFB{{T` zWX|FS)bIK$t5WPG4puqECybbAxE){hTv{eOq?7C$A3mCuiX82*Fm{pPjV#)FtgE?`C0PcRRpy{sFZmw5;4axU zZhHi*c3O>K(djVsh+3Xf+pRM4Td@TE(5c*^1s1`mj7D3q)U$Z~T0iv`a_@Fbt7q;= zn;nJS*bh?LtSkTT(~W_`H)Ex{h7Uul2&M~ca-~IOw=4}C#yg?2Yp)Cl1cH`lP1rK3R!xqqu=Vpka}5`Z>lZuIohT2WWf>e+CKG>|HpI6Ng^^hQjkP4j%3 zR%NTn7dz(;v|xiy<=%yO5tK0sE%E>3iNAPI6(r`)10KpJ++a&L2TG~aF($STRc3P9 zLGg_I4lj>4WQ>JW6AIP8mv5|i&@(Z(ctK?ft#G%(zK3s;jQdlTUtRI0Z^A^O2*Ssj<#5vT5_T9 z-n}w%Ow$8}`Bff|%vmuZU7k^YE0$$VSXI`UEfED)8st#y3H#Ob1H0%8ht+@47PCwK)leuOGxF}(3&_~E_K6JS{aOtJA_HORq%2nEVEq_%@piC!XT??q#4UamC=7HHRXWPKm)|JWhQm$Er z{>Un|tj`r4W}}6MQ@RwwLFc@zCaGMlU;Cx~biRJFOL79c+wMP8RWQe%nPL zVmDuVgEun@-@_PA+w*2IClbk}Lxp64EjA(%0 zBb&6^en{sYuqAe+TuCo1DV%|>I|IF+OADl|aSv@G0p&TFu{!=JS78*ERUY9&=&Zg+ zmQc1A@0)jQv~N`BGk55Kyvo5XBvK)Cm7Y+NHETE2{ zvzljBFpxf|5O>VZe&cuI&kp9f)%38Fe&j7)h&*oQ4l{Q)vV0l*#U7gjWTke|+9e*7 zOWL5Kl813J-9Jw?`fukXGy4hcd^HoOLdI9df-ROYw||-~5&t;tb~&RF1(1*j;g;+Z zq{E`I$DfKXdzzgkU(IOws-5(6Fui>t_{I4@_wvk@j2ewtS2*H9s zNBMsu_e_TVwKWhNIhDPu_kn{UAeB+RnaY?y%6Bl9TpNz?QT%q%oha3+U{OBQN(a@` z$<4q{s<&8Rl^7y7n9%wM+f__e-(zMpM?8y)ycz3)?#H^nljlFZsV#p|YVg#4cV>iF9kx zoWTNL7$n5(^@qa08ijH7@K9eow?c5>% zZhi0b87qr>f7prU?Z5va`?V*diJ!}?*`Tf5Z4^DxMlGM+kf^Fk*I8pSiR2qcWdL?E zgWHkRgM&WU)o53W^CtH4G`k{0N?v)ZW)kdWRq<hHFiMC?mgih-m1x4LSv=Ehu6!e-gxu^)HI7ST|06a4|Fo_W%=qu52hwBb zEI+y*X~V+>Q2>aw*{{t7}klK(}3GWLh_EY$3r8fRO;&)#D6yTjjn0^bbr ziTTK&dJbm8Z`kk3*iWQa=gkzsz;9p5b=;s=B-#2CZtx^i z@EEKaO}5HK-bEAg)*FehPv^?{jQG9u`+5*G6sN8Lo`J7;596B+S0AUPFO2>B`~~-0 zd2p`Q&Jg{vCgP4?iMIXc_~MC-1a@z&GY`cV&>skoZT-qHbGBnGquq;S**C`auuZaS ztS3Y%P7&GMOJ84#42qffO=#E|7yfGY?Zu4yHzF+)d4`m2XP%z)+xnps2vC;C;xkdS zKC1f%`6_qvKB(Irb7-I{Xex+R&Zic?*?Q-Jk{{L2qj-y#l%sHx%zApi9O^okwAn|Q zWOX3sh}PE7yc?OXHxp-=!Z-`zk7ruH1s{sGp#UQ^PL*-24_^W$K;!#qja5T6YuZ;s zGh-HsJ8Ouqk>_EtRXxI{WJKP2gfPf8R3oP#x1qQB=!+5aWp=VdM>Mq>j67$ctXevm0pxpLs91V?V!nCVuUBp5G0J&_UkGX?3zUB#vy8#jcOM zGF<0toWRDru;+3c;v4i6-J%r>E53nj|3>a!%2;`QSEqvk@E6{;{zmF~IPVr=qs31$ zsBi!v{?zw1(Seh-j?y4%y3A^sLLxH_zXGvoBc&IvW&Y@8vdoq{SYTevL0(gB0 zn;t{OGzZ0DDmH;UsE%Zlc>{S$k@RNzgX(cZoY|UowvVjP33^_PNh)x#nl8duC14d; zZ(R#AV?FSt^?GcNmixUikpsSvrI%CVr;HZgs>vi60+@;39Lty*|H+fPL`B8Cl@H@ zN92lcp5b-q9=nL1c;h?vlAO|aXS_Ly^I85PY5X)9@eC`EKF$V^5BO*KyOB7b3uC`C zT!--RyQ_4@tUp=>hsimxHRfwB z$`GM(!OP7?-Hs)KG+xNaMa+D(XrCONOdnb;uk9ycz2Ul1o=4m4HF`Y_<(-h<+6YKb z-o!Y?8e~q++ia|v;+@$yb`Ztrhwtw{+KChGT`$H3k)m|#>L6(G3~sVnqTjAK`0aTb zT|drh@~l>EQKl`44MZd;MhSbI7)f7fSW8R4-RuAIw{lVsCH}5{!AxTFuK3X$G&73IoK;~XY;_H9 zZ#<+Nf~6PwtGC$BQ{+tK-wa_hG;D^|F(V;Sbb_8g3YDF;2yf6|ew!Vrlp06p#a5U* zTGJu&EMg|f?p@FG{0?NS<#K|As8&N$9*K2<9auk9vZ7(MALQF+H?6*s4V+<-w52lj zRAOp9^m=0v(nkx#_W5s_{u*PKU;sdDs4uU)aFi|(pDeHdZ0P0WCkQqd_#JjOw;kwYoEk>W~DXb zQhx{e;Y#({8{&8<_=`S}6V?ZfY2pZzqIx-Hr9>ow$MixhM=!{jacRk{iYzKovM6`~ zJIN=InM$)yy+=L`&15=A$4K9tK#DuGrm}e|#TICacg9ZgSa4J_Us_(t zK^OTvc$aTtjo6_5{N*hNdiFdE=iFyen7PY{@B^E<1_hPE*D$9EpCz%a*Bp@XglMp)WpwKzVTYx(ow6^ zHF!Ne9j?Iztw->b69*6H3K>;D@Kd>GX|T9ZBHiX~^$IhJwpbcEB%YE1ED}HiK8ifE z9jM}+57u`0ciD~3zVa^8V?9@bT6ssUOhxA9TK3Z;%!V3t?{;KF?4~NoP8PlR7EPcv zJFTE>cIs$;a7K%r6X}I#Sx+%uC0g9ok(xtyRG-TgRk(5^+C(`t*bBY2rA^p^9T6$e zLNj3T*erBqGvwQ?F}b%hBOheZY`P*_zL7VYOj`I8rw?1_>OCZY~?l(^8Urq})W0N7kW?)B%y&3o-LT&K`?u0jRywAc+ zwI-~X`a{`J^Wn*~#apnpsGwJ#C`I9lxT9KML(tH+ETx$i=h|!#dKu0k+#E3$Tx5<9vAscJ!)vmc{j+N(^@|-=%&Lw7%)g03+GlQO3PfwEA+81)b;xzW| zephre2fAIvVb2=WLQ;#>#c9=z1>|#E%t}1>jq4LpWSIgSw1)NqQx5B=N zK;_vE=Pp%9R#|ZCwLDh~WYvajI_p<9TP=(^TdQyiAAcrCiTn0qb+vvqS6KXS#}_?H zi*%vHJS-Uxt0X35Cd49k?-`o)dnn# zy27FtpS9UO(VldW$&Lt@jgsf*8|j_B!lGId&T^u&I!t_}Pj)|}zUn4E+n7XbRy64q z?Uj+CgD7llo)BrkfvVToc@hFU;D`LYdzy75$*92liesuBvL1Af%w0%dPKI0^C1bEI%l;h>RV-oco8U=C8725C~{7G1{n~^<4$+FS#9I@RQ6hb5L>aVCqpYM)$&+q zjBBDEKA6PIUA~&@e86t*(#NgHB)oPc_ps0;;kAs0Z8y`NpvZ>d1^E_-(^4L2Kfg;S zF$$?tUBbel+p+vF?>E{FYkdqxH!~VcbDQ6zX}pv9eKI{bZR}BI_+Xd^`Ks?IKCEZ+ zM!UvGe5j0@RTS2UZBq5qtPtz~%Qkv!X0MX}I5V9K2KwIcJQU2D(tCALwf5Q)O^LP| zGj?at!^ljl$D2K2U&woPr!oJPSW)t9Olk)Ba&}#fnAi|?R6n3)pM}r2^A=Gn%eB)` z*?2Ug`sO|o0}1e7MoY)=23o~8nH>2!Qj4mrv#KkjXH`lc8it#Dxd*ZkA<6DOOtpl$ zkb3o3s!ZO=lQdk5J%g)!#eM+AKHon-<9nJZRUOhHy&`jRyTqMEXdOlX|nVPq*RgfOu z8QtY7>g`mk(CB{>s)-8lk*?U~s}-ZRhATUGC_m)`PXYPxuc8Zc#3yg?sU4#;u-WC< z7Nf15KuHooQ=n!PB(YuThDk)g>K|UnldPQhi*&H!C<7(46;>F{3l;EJ&PPUuPwtwj z+|Gz%u$9~{Zu-Z&p={&Oy|hMWU<}C4*){memsshw;&CK@(T8K810YAm?r8tY}f;Q9PCvU0c66a99Uf;uSL-4j3>r&t4`m7;3(bVokKD5`qdU*kjNu0*i5`e#{#^RBxhUr z)M}?#jI|`0@^`YHsuoqM$-!@=6>kwGmoT6R2oFm!wW4x~XNH;DdC3;jeDf$3*)^Ve zH@%P!b0t6gsGKROaCVKihLHXOlb!UZp*c_vjC#t1{bLh9a1NH}Zj83oBM^>-0jV};Wb9!Q9_}duB4(a73`TS}v zE6TGhrxL@nTGkV7XHz?8D^M_#-K+M>$%Cl0S3^Y$tQ8(jv~VNm2EG`o-3d;v_rRmt zRFYUZ@}6-XQ^t{vV2|jh{2y7U{CSEbK^Hum-0&!7!#8@bN{W7{=E4&-%32}`{6DEE z4n;|4DmVk;c;-b;%o7KlF@tKXJgY?}cJnuW`ByI@yU8-C!RV~UzfT|ZuAe0pVo92U z*77QRfEG|3n%~Y?%?4-4zk6ABTHyr9{XVQEQiH9m3zKE~0$Z{iFgL2OK-w@gQdYia1E_!PzIdmx;+VEsSDcd7k&__F-^?sThpd~Jly=ZAngClEL-Q8CgW7&2 zBUq1&+cjzx-Fi66yqb3Cx)G{PQO)5By2{zpKpuwOvsQZ`^SPfENVxZ9+T-o?j%T@p z+|pT@5$$QWbuKWYy-@TN?^!|7G7l@lN84{@oTq}+a2W!v|01Kmo;+u7zI7L_7Vwb%9*k|`c?nh6O)Hxi7Wa{Tp2 z&eN2AZl1wAZFtIyxzC${jgP5fLPXMWu~LmkA`QRj0@4y39^l&uqclX7s%nW5&jab9}8u_ zmYoYmyOke^gZ85+nHg0mJ&51qso5*moCOlIY02A<_&I06(L469s}(Clq`*shi~WAS zg_2Y?L;l8T?l1tKr{*8xl%r{WS9S?XCl$0$oMyL=Q$0yfb^6WRV;_lpj+HLj%satH z{?Vm((x%-IFJ_$Z4+)^xZ~!{+1!ulbC#P(YOf*6X@2%!*A>)_R|6+culHi@ulN#qz zjQk$&0oS1dZEBsB+3JUE+CS?Qfy<#p5fjYFLat@>d}ytSUZFALVTo{?Znt`aZs18H zCr@h&mDpk2!K<#^ko%K^oFPBD0l8R=$-4_T{mMGL7WA2-9i*GE+EZu56*Kzjkn$L>(K@#PNKl z0iq9;JMysIVPY0+w=5|XE_$>##(K-ce3qTHlk#9%hF{2_A7l5s9#`c%EbL8e&IW`R ztPRVK(z33Hu$}g&G?xk4;8ih;nR-9AlN?px$Y6_H*(-aaAq!_{vZ{PB)NS>)>vJee z=Xqgk9H=pomdLakdAn6)NZ6&GWGML8E1|*TT@nSNA?2^4o>XzU{H?cyl<-iX9D=Rq(b4XYk%iyY`+n%h<>kjoeA5X$+a9%_NFNY^IK;o4wJ) z-QsT8t!13Y~`_#Ld$D)aD<(beOeZ`-=D<0DpQnsBdZ-j2- zxzW7#{iSz)z@2rj z7)dc6eR~5Q)PtV93<~i0I0+T_WY*2D)J|*J-}>O!*N47O<*w434>B8L(;jZhV8e4f zZ1YfL#EA3&%c%RKIcNp%+GA#{s-7Wowox5CnSvnMF?NVNh!t_rKR%Vt8HMlQ{E6Ht zHfOi>;anUZlcb?34TZ9;Lc@G)ruvIJ(T!FKp{iJzl^12guk1{{ytNn^P4=MCY_mgJ zquDH(C)j23ZEXnF=3V?WzC0g9I?4q&(^B<}*g;zh;X0f1X6!89kVC8WRt{B28gJLD z;V5TNpb-5f|0?tKZG>VQR5cf~rV*sZdI0}{LaX^--O|zYTnokri7U49G)W;>;!%~B zJUN|2S@yMgb)Vg)2NXw%b` zXqwb|T6qz=N@98aUT1}Q6BS4EL;WbpL@zuL)2cHk@pGXJD%lgqE*mc^Z-&Lw?1M}@ zi{$;oSF?UiPplZ&)n(ORE~|CtlaY?1O8ZsudL`AxeCt|zqOV1PunK!V7mCUcoyw=R zSOVUFO}Lqzwkju`?vB&fvM2Jx(3@1SJ@}2T?q;{gbKU;pk8+RG5>CaVUynyo>$RT! z$-f@CuGFc^ft_I=a>=W`n%~~IY^7X%Pv2#RSH9=7x{sEPgtYC_axQFl<*Dcvk#1#8 zVLDRrUT9zq+qlH)s#EPJmaTV^t@Cbp{_(W$jhheC-gy2#oU5el^w@9Z>O|UX6zI&k z7t)q>isg*e&cpMWlarUdjS4MBXiZK*{O6?0n^`@VhvlocG6OY$FNM}!Cv>N^+<%mI zRM72ZMphKO%K@q;*Rpp;hPj&xOJ)A<4L#eeSo@9!Xn^)W`@H zrSppR;xA-$-hh2Lvw1ypKa_v61K$4rMxK$wzZeAVyw%U8MflWO%toZeF2`?VY~D@F zTdVE5llIil@q+RYyu;MUV!m`#qWej0LmaPA?Kh5Q+XkwMvR~mOY zk}UuDd!fZ*MtU-%malu9xr??A@JROT z+dXtORuuw7Ew=SqcrF8oKF$Pz^XZd#&?sv&c~lmLO{aIPh1?FSYYhA#t(OIWaOYBc z=hV-yjg4j0r*k^GoI0Niy`mJKvy^oKh~Vqtv3IH+&*z>?q85ma5Ce+5AU=@~?@f2? z*lxb2_R`z^_<1ukYS|uWxOl{KY6ze++_dYak&P?O-8J@xI>>1Cil<{eSw<+PSdO2= zIhj%xty!_YIieVoZMO^6X=BEMa@y~@DD7fz#Y^`?>DEWG^1M?sr``ANTTI#w{bw zKSL}k!kseKEDcP=roc21NO2X)p#e^^1+ZUp{Jbscp*b?ED$Srq_?(@XjfShxrn$xq zP0^3e#Y&hng6-gO;zQSlw{y>4WW?z70za59J%buZg6pF^jIkD7vxhi`@=n|{B6%?O zPpsu>t4M4DFX(*$5SuEW>b?=E2ee9v`@Ayh(}>DCB*vTLHf+N?I7z~uS1)cGlY<9_8~^e04E^JU$umM(z=Mr z3=NvHakRmSxq4?A+=GTz%IKQ8u&a;(-r`{;UQAB*dY|eZ^sQG?Hzvz=ZS2*7K5RJJ z&{mRV--@`S(!)<_htAqN@L=E;gb&+TcR^K@WKU{~>{DO~j7wwzdp-Mp59r8`Iw1y^ zS%sx=VJf-M7jm`xl$JAZvx7UxJsHM-(oBc^Wgifo-wW?+p;gk8*^hID&UEZfML(Qx z*bRN7^M&+fm5*J&IB>9gw9STn6IGcL3Dg(Qg{Q_;9kp`J@{=VzfmH0)y_S1by%Z=r9?I8H0$E)UcbR zfE{8FyP~LH@<(s#4d5*@jxS<#yzpGtb$FiI3(>77%)UESmxI?jK5e0HGX!FA{i|2O z9oNlWwXQjjkVYvWJKZB$4AokaKLb0to z*_iqG|JjV^U*;Y@$T?AbgZF|~M`)IjkPu^nOdvCH1iVc0*eZTq{#sTP6}&}|Ej1!L z4#>P5r+rqg@inIg8na%!8(*ScoHoPxY%6d})Z=@Uon zeua!q#y7EUt~ei!e$W;;3fJVW9|z;}Zg98Lru=^@-{LL5;CxaN%39EslkrL-WZCdL zxzkxiG8Hrvu5q8eQSvA3ZnL}W0-a)4D^GS=trxQh56_Cp@PEtY=)Zu9j-9-NE5)P|yVJE(n-%Eop zJ`f4Ce!rdB*x|t+!zE69g?BnnEz*`&r?ElCt^SLzw6Y7K8wJF`idWP1S~SJC8i!t; zo!Z@Q;y%4}HGy42UwIK8o9@zD|C^PFm(GuHjTu1&ye_O|&5Yz|q3fA4F7(fpo<7}N zBmAWL2MVG)+GIVAT1+o9$r|#htsO^Av|t&UDKJxZOAEBoZ}f4+`Y=Bv3*;`k#dB8Q z^llY~-Ga*5eWR?U)RsPY%25P|uH?6oD%{)ZRD8u5cE?Hz-(`%}!B|4;C@hy1zMf|Y z*>_a%sYqNuY>l?)!}c(8-e`oqSaaH?e-Uu`tsnEyJ}!!>%)&o1fYK;pUjB#u^Z_N% zLnCrBLyg2B;>th0yH(e*oHpNf3+_S!e2RsSSAToj; z#VO(db7b{-AnQk3v18ZsyU|^zp30Bzg`N|k;HT4`_aj&@Z@nS(Y^OYH?_FqO=tF4* zmA2C^zXkg^`;Ogg-LGC*URIEX;XIjzlkE23Y1s)_l~h9EbXh)&?S+ihuzNd1V{ZLg z`{J*=qmHDJWal@dq5f2UsW`DBg7$~fj~uU>LyWwSy8fiEuHg%GQA6pBvC1Qq zXAj-=#ARYA^2grVtLh9?5{v`v3)x3^awLzWeJBnN)b@jXI}^+VzoEQqWZqTw$o0w4 zXD)XCNj~!fyrC6vp35$DYd{Y&=k?sF_JCf}N7#bZ!8bB@BD1`aal@s?Y~)1@jfJ$n z7WBA~75wc%PWcyfuQldTpe?%sRdbf3;s|chGIR^~;1$P@HwreaB1F167-Hyps|E8iH_V&R3S940VS_ioe zEA1>6G}GAAxw0ZMyDM3FI#_?8?yeOz_3T&DuI#bwL4B(o_iFpxS37Uk{>ej4r+2FsVIoDY>t+M1Q;6@eC#>`5woH$Qc zP>we7jUskF&?-DxAlowo=a$p=FX!`4?&Ra;O09I@BdceI`1*dHZZ?LUfHABgvejfs zyv_bV0{Dp2D8(+B7f;d(8~Bk1K-b+7d^-J@Gszb@PQ`MotfBR&ATp3GuKa3=hsAZ( z0Okz;(r4qTwNw{+JsM;La$hVo9C3Ewgt*cgm63FZDt%JJhr+*|`_)v!W(V?VCEh}e zS1ZwdWU|(Dm90R_n~`d>VgZe&9?(FPf*#>tS4@*W)?2=|HmZQ-;oNukN zXpPS{KUSVMV-H9;-k=_B!~+t+s~M@PN0A7dgIBOF8?4V_BKP1_Gn4XMtOHAFMA~FQ z%tPGsMyOyVi*|mH5wHP#3S_N9zKGp^W7c**|2rGl-OxH(+{u0JG-^AV?JL#p3(m(6DEarvKPtBkT5<2hn7Pb&>fe1YfrLiuo zK+iED7o^_^jR&H~J-(dP5dFe%JiP06SBwuaqZ*R#uCc2^T>)tqTeKn(!lC!%k%rl2 zLV9E*M3l~qlxaOQpdqzd-YN3taHn=E*T!q@Kt9hHwWTkSA@9SI+u84Y;L~Xh&#k2% z$mn)*FK=rMMnZBPP zhu&gb`=OVWey1Ii1Gy@7`YHhIbda@O&(pjGp9On6t4r1m z70IvlB|Xq}cAI6khQ-$#pV(ZiwV!rXRvt@RM(W+9PRX{`AI0}vh7_$GHvvM50!IvN`Q(4dAf~UqoJ&H zy^mQLkN&LA+8x95Ii<|%z;;Q&tzv{brS zC%!%Qv(ffj$=-R>x!OneL_N%<#3D{(ej)eY&9x)B@_I%DiK(Kd5oCz&&>Ip%>btrK zjk!ye8gI^e$Ty;z_YaUdc7qgkbO$0QJiMp}h2=3J#czE-*A2y}*(EFT%iVFzyO3qq z(2hlxjhD&5BWUMvT5V+s{_;~0JwEUz^zLTptJ-!u@hRMl6IU~5bT!hV6RrCe`ebd; z5-O1)REf=xzdz98)lg1`Y&Ta$`esA(_(u8azQI>C)2~las3&Vtbrf+4$ z&=EJ~cG*YuQY1%gS8Ur1geS!%FhDb3R?1|P8yD2Vm)4{oZnAq!zLkxqRp#1=&<=e4 zLP&DM_7BM2$&f$=%_KEbZ}#}-J1s!c7eg)Hko_k|wP7ehW98eO;Ng7?XA=R*zIKJT zzex-40ZWMANu`$1Mtsb^%HQA?U&7aQ=LZ=k%RIFe1l8tH5-P~is$#<(&+X>9lc9mk zwmHE3_y(Di4KbbTcF@R7$#>uhD<~eM#r1hK&$BSE)*&jW5e&Att~sxUWWcfN-&r=OMWiFk2D4W8pc88g z-R&i+lsB^P4+6JJJD!%DCHlS4L_7zz@kkeANzs?(_)gY0WoiC?^vSskvXp2=m&}Cc z;?r>Og|wy?@7}P7b31dR(QA>V?c9O;PF*cJLl0815K9dcu)Fe_xOZ^4Q$E5Q1KhWp zo@fQ%!P3EHPS3cIIW;eOJ+VV0F%c2&Ig^(3qcd9tDX|y!Ig8YIXHkZl8!KQJA}3Z{ z-pW&pd6t*=rnHG%!7;hUx5H;R{3nJMwsV!wkP-NU^aORtqyC$;^^aqpofXA$IpvR~ zKvj7@-b?Ef$pg|b7VJQtae~r^Y1!Dx5RE|>^3Fyq$IrRvr$Z?fMQp$7fW;uGB2Ure zURruNeai)ShZ9>$BN`Rp0aQ*(b_R;f@C*^Ui7vD@Lf;Tzcr?T}_T5`MR)S+t;TQGEt`BrfSHviqH~gilb1{Y=i8pm$;s z5`z*X4~ChF&$ZUNlTZ37z38ZsvybfGOfZ3W+>CMqER86iR+D*raoQeBPODJM8inzV z(7H8ctJN}CY?Ek+XJ1cSty@7^9-9vp4MH5|J=#aWR-+v3EV|`6*mfK`o@>tOQv+r8 zFj^xq{>muWD)B&LS{Y0BV=LdtC6lPi0-LVpseE%?9L%eV{OBypX7t5!Y&cC2!?Gr3 z#dhR!YO{5pv zfr=tc*c3L0L9S;GPeZL2(;|E2?Ke=nSX?~cj8XF^^wDa4YXL|&Z_UQjfA(`WW2(h4 zR(Kd1&>LCg)A@ap2mzD#Rv4%*Y`o3~iHx`M99+g?sPE?6RL9FnlO36ZQ<=NSU0aZz z`XV-2JV7ey0H4EZ(4+Uq{#X9KH};n^($ksaUUX3eeIhNZN@daUjz_egiJh8GzpUwn zpnaZ@hSNpu$#pbFzMJRZH*XqJe?fA+8}~+bN1HK@Obk~2*4Up6^RlxRM&dnqz*&2_;W#i}bQ(-s|*s zs)&!}|3m3Rdm<}YP-oVX8c5~k(8`JH|0R5rcRHJyK1dIn!#-A#H97x$di_S;SPScu zF}qWt?TK&X?%3u#>0d3ZnLrQdfugEN zSZX=6g;Y$_05pOv$tCLnpR%jy{c2`^EYum#h*Z!YOIy&&YU~E?%lDkhh$i#%O1?vb zsXXGd~L)gUGp^N!FU>tPj)q(R`@xz9dAySUUdhzEFOT((-bHCz|v(`H(ZL%3kq zruo{tNLyGuenU?xKWPYi%DcnQs(@Htb0gbkA&#O=^2zp{;(d2{tHqE7BNa#Ui48So zp3u118j)1%gVX@Q7W$e9chu_3Z?Kd+u3Qu!VD97uKUl`*ob+N;MdQZCZZ&tIN~Ss> zI)|zB?+R;7$N2m4w8?fA9~uX3Lou`CVVnS_Z!*H0$p?E|r3gucv6)YM*`oo)zMm^> zvN;{hQ>-b=^C(wtCohVIJPRG7Whleui>iyLT3;#;#qZLg*3pRbyK3e+_EFz>D(3XG zkuO9lWDez8pnC0;R_qmR6$OxD)R&d;jJrr5?(hpf^K$s~o8dIx8jalzR!w<%_psdj zt@V+%#2(Y8=W-{U4%6`WdMhqqdwJ3Nx2`SQVT)?kD&YzE32pG%KdoE2mZ@V|eB*cX z4G?=R9o)w!?BzQdI<;NwJxQk7xXjL(ZD&m9&BR|kde?r7{jCP_4`f#iDe7aZjh~g0 zD>G8MZ`FpkAT>|&1EIPq`zvA~dypfaQH+T1T84332dFppy6;w=j1^)J^~SH7Z+kL! z6fp|zR))$q`D&by3$XsAesw3bGxO%vSPD90=H!Dms$VN^G8Q<@TAXYXn$joW z<|fFJsxA72)9k*SXeAD6#Bcv9pCU5*YEMTtXb$bdJDFPi7pc)HF@szYoq|x=E|v&N z?&=BZq6UefWAcIWBx|YiYA%wD;eza3?UT>vGhR6~^3xd8=%KY{hftCPK(uG`NiRm{ zUOpL3c@CF`vVEYLvHvOh_(|#KlFIEL+X8kp}jmI@1_OY(yS-0hR}K(7^yT$qegL( zPz}c=^r11RN>apU*JHOUi8PXYKvQ^+2ksnYK5H+`l6-f5K0$!>aqOl(&7LQP!?a+& z)i+TFD`zCohf^V7PhO~bdi~RxYM#hb1YhvEFz&=UI(ci_y)v~)4qLb(OKIN4AJ<>lOA=3Oo3&#Xn& z*SoW8&mc}A=>b{<$|i$MhmjaGIo5U=R|Vi(TA~{y#fW8sd`P-E3`sP zu8-=dEV|6;T)0tPA*Il%XqqfTM?Q;sjms6)`_A7nYO)}zDfnE`CApR$j0MGSVyw%J9!L z^h~Z^jTL0U=#A&$W(ZEy&M#X#d^YSMR?kCvXv2d%*Q!E!EY-$xx{wSVC4I7B&eHcu zmWFJ5vv#e#U5AJBgxFBl6n-XuEXkYsTy2pL)*H@}E8JlvMJ^&=s2>u5HGE>FpoJnC zYnNurpLnuofoqHI$Ty$O9da$W?H#Rlk{rultyCbzEU{6MF1q2r}N*ZB>yeK9!!+?PEfD^POdvet0Ep1agbY`x5jGYPL|hCBJ|TI}O{nF$*u<8nMLZ|8fhz*CYn-Ge-JE!1GW>58Z6u-KG- z;jFl%XD(HOU?&LwgS4Y!_hNp_j9-l8qahiA*4XxD`DqlpZwHV3E6gBs1YKGk;e)Kf zl7;Gt@yIZY$7aZFtz|qaUc5sMBKHJc6~Cx|kokF#zpXLQE%7PqQ8{EYPv&pWvH$o7 zQIas4PHtE+Bd1H`L~hw!RJk3?Rmg?@h%34?3%x}XWZPN?ZKs$1W|^9$XXmtPW~>-b zK*Q(*k6TaANe)_dpZ4eLPRfb+GeV?s*050nOowWi@HGC!N(G@;{Q_^a^#2 z%X&;vw{gkO;(hpmG4mlL9RlaY;fG>(Z|SxkVMb(E6~D?l_43-|m91jZ>t;;NUwtvF zOCw1Oe+oNTk76m!3(^y3tGlOP&WBTjXT<|8j8`s_B#SeQ&|TKGS^e&S^tn;s-GQ#6 zE!Kez^R%AeRZ+OO4W40nc|!lIrD<=p!TLF#k~clSohOzeKh4&V8`8qB(?Yy~nPlI| zCr``nd(RD2V5Bkur}HhpL)YCQpP*kSoxmwHhrbmwq6Djmh9dX-u`NbTy3dDGP?ZK$h#7R0y4NAJ`m`G$QUe{%n$tf9ZysP0+Q zg7}X;R&CP=8IQ%$G=OYssrbh%^-Y(mb)IKuj1eBkDM(n$qAK(yW7?4$H5)o*l>?tg z`By#yjbKH&tFr^GSPHh%NZ^R-4*Mg+0O62yUc5FAUZof0*{QN}Tl5)q@Q@!>9j9O3 zz!e#7wN3bhEwSb-(%H-3jd1);BGDRk%#CG&hm3|j zwCXe!yYGE;_ER|vNW6fu`o}*bA+!2tN9d|6B!a~Ggyv{3lQx~to`Zj70F8h@@~@Zz zPtA?h;M=UA;+|FBv(X-_W~cJJ8iuZ9?gR^n@|{Gqm5KmuKFE*E@XcU=yOGw{#{Ot{ z&iakfJAISRsRltGnq?oVcTCY67ToCZN8k8sPpin@svEvr(P#TTu(0$U(vvtBBWMBQ3{dl#%%b{{N{&hB)q=v-u|28vzL4*To`YkRtj{pd9NY6mPY ziiiAQPl}dRwts|A5%1i#!fFx;es~bu^vT!0lcKASaY-m>n z$sCKKj?e1X7cwsP5_iyEO%v%+mB5bEqGo&bb}4h>Q^+HH>3$Zg{;_tW2*+D;nlB-% zEH&FeztK*_sh>s?H0)u<>s?SMbG^PAf+NG#d-lrPmd!~7ENZ|N>owjQrf=H9zCbeQ zi7w=GFW+7a&E))L8_#5HGdVx*w{l%IPBV&lLISLhS(`v_nhxE-kd5R0FWM$aW<+1f zDvI*yE16p(2ePd)ULU*$&KeK<-?t^v5l_((&zBSKv9}(A(x-@tHNT%box;~h2@H1AoQ>+9+Fa-e4yTJnb##|qqf79-dIyLV&QnK$_eG}7 zk63SF0gD#u6>5D%_G|zfV}xq|c%qA$3yWoc)A`sJb@j8US>gfwe2}M%=Tt_D8zdVi zoy10F=teQ9=%0MDCSQmIbwv8GE{Wpx3Z#zD*bXXCmrH9Pvt|dHU1V3SI5zW#PUggC zLn~qzm7UN+tF9|stbjUz(;3sfM9n=_*Se5c-^pobLOs(JGg&4{n10&A2l)?c^<#Ie~PS=dUDMk z^o%#v!eS-T`TX#$HBGij>+l(`M*n#-<6t}JTUWGX%B;eu*k_^Tfr^JO#20uQ&#`DZ zi6E<+`KtIHAHWd+aCiD^w>6RL0xlMaUR`cU* zMzz-{2XR1G-a&27bP8(k88zk)hp>{UT$x7giL}A{S#z*XA@3)b#|qI>s2*xk=_Nx% zn|T;HI^IXzEP9*EygmC`eyayw55Gj{b|13oG+IrHk=VPjnHFI9qv>TSHF!LadO0;f zFxPwGg){MGV`tc-^_wO6Oo=l0hh9|m zqY-*rm9ei~jC3LLq3YSQYzFHm2IraTEwv_o=sL8_iB|$KUwrCIi zX9MY=_YQievb_7@%mCF%I7)f@om?|~LXynj?XmxwK95Ca`CslVu_3!-(wHD1#8YkQ0PWYKG;~9+|dd1_F$SxZOgOfs^I?EZkY$uJwu0z&8H`bw-S4%wkpev!%NL(#qD zmgx`FNa7%XiL_u$)~QvLc|y%xk6{1EZ0>p)MDFNW|T&^`a|}lzRI`F(a{Z>Vdgw2T@x|mPygY7CuxAl z&&f5SyhhjV-irjQe5({|YcGGXU9h_~vYW{Bdoc~)zgES*c3J6B*#cyCpYvQ8m&*^ zv+BPZ+mN5F;XYQcu^~OUnASwK-ZJMZtfrjO3@S#F8`U$z0(l^*~yn z`^)ircB((i9mjK3kL)ok!kT!-Dfallrl_J}KOqmbTjZ*GAolLLNwCB9&|E~RenKt< zU+oJ~(JT+qI;yUY?8ZJ?FX9okEIxTX|3mV|;~IIl&km+zulYkg`=+|dTF@$2$UiTx z=CA&RRmXc(jPw-pZ0@Z2?yiiG6_TVDlIEZjRA#*5Wj({g-BF0ksEE_7Wn*EI)03*< z-+E0a=&G}Vi;M&%;3D_7evi!AkEn|LaYjlGFGc=o+QN_(W(CF7MgisM5ly^F+*z+q zP;GQ2Z|UCFmC!Edypg-gpIM4m(Syv<6>aiPye}z+!ssak$|A8FDr>dNl61b-muVQO zr}y|l`q-`~xs$*4z1(WocWT4U9;ejfu^~?Kr&ZQX(45XVYaJ)$kkOb0w2n!F>7!^& zR5=@}vDn>J0(FT>p%?ro0sky;Zb=me<{lKKd>BG|0s7}&-tla#ARx&4%G5$u%lS#>hw{ZG!OF#9HrK zJ-fELCJU?h2Hug?GzuONej~}UptTCH3(xI52dI_Z758f!NBDAClw0|pL|H%bzBT;V z$owCs&&$L5j~$-#v22I)Y_o1E_~9&j|IpX>yilzxp8{){S@XAWo_U$AH3AU??S+2t zMGlQc?$(1C*(0Fk2gP4PSF zWtT}iOMuRB0ZFjJ!3sM?X*N;U$g8}W`|J-SVWJRgz#@Oqja&abem+$$cRmw~crzFh`R8~H@_7TnDV!rxfV*d;IjmwZA34V}oQHxGD8$*}tG1-_=Hjo`M13rN? zxC2GS9rS-MKcp1B<%eNGwxV8)P4F%~GDccxIvL;t^h_?Dd9s~vXr1*_k_pL~10RF7 zd^!U4;q_9o8mdv7@1zIhK|9)p&~V$R&6TG!6Mjpxjv{>;0Pdin}`Le}VuA9RJzJ$pRAX*Qmy zM!+-ZpffFo$7S(^r;07_XGG4CL7U6@v=H6i$|n|-Y*-h9m5qy4q!Uh^QOiRn_%t%# zRStSVZsczkgVbnkEf(*7HFFm&@dfk>0uphNEYewf2*>h5^tD-}@|-^Xh=Wo>BG=4XmBHbc<_gh6;%UI2k51bA? z>5FP+vcHo#Oh)T)fyYth=Co2d>-J_pIDH$f(VJH*P>dI)oeoAqXaf{Tf8^8Ez*O&L zoOx+~Cb$*?HVE{yH1K$Bsak&e*Un!oTwaqjN{j$IH+h zUPI0apV_6_hxN48NCGX$HZS6S`sQx^^H7ky*wDyiaoLF{`J1I-A)WNan&Ubxrnd{x zdD$emM&^iI?&o(khX+#IOMh7G`qXB&8%t`5#uSaqjac_p#n%-S`4=)o)|~5jB;P=H zG*RZy82}`KopVM6#j&E41>h zv~+6td$enZAH@6mB4bBA5@E!$5VTm!&crh!(xrDEja;flRn4MjKb~}5T;ht^I6(;F zD9-o$az@-b5A(uD=Us|RTCa@$I4o1q{1gt#Ht>xiMwEj3MFihX|0iT#H-Ve7~?L?Onc%Y16% zMf(LFYPzG)qBZ=Egw!kh*sFMeRJn^56mkB?ZzpiDREZlbPk^SyFtx%DscR&1o?h@&V zyZP;%P)(&Lt(zRyNmbPTd|Llz?kH#J5Z~T4dmaf+uo}nKU(I@_eYPt(Cqv%c83GT6 zQQr=QSS&S%;#IY&@P0{2g zs6~m|1Efuqq-~m{Nna%^+R8Hz9r(d5kcz2Ezue5!4)%gb6qH!(Ck*ym)m3uDT{@l7U47*l2;;5__>Ovw+VLYv#Nq_MK`a9T) zY5K(z*^K#siZT!oH}hs*vVkSjxeLm$B3Zjt5>|yhmQ0pM;IQ72#=Fg;)vC39v96Or zggxmQzpSc|-c`m6%~YMN$y1Sosc?ui`MNl56wr$$!FVxH4=?2TSE5N(Be;l%`;X66 zL}Wjw(yPBuzwA@7p8l_L#Yj#>ES^`H#)-5*w$OoRs3RXu+dfqT5#e~Vw-2xoe9trB zCahs6(~&=m`%(I#4gRvWz8 zoGU5tEIkl?$gR$-qA^}m=up{|NKlNYu$vd~L8k>->CXmOjMInMfZDOqvSpcEFsaW$tM*g<1mm>hq$b)|aCp=grTRIgc>8nYgI zzIdf=c&j(|@rnV)LuPm`$(~CqX0UN^k;NJSoLel1Zmxmt@(SAYjv9==_zS7Xq{Ri? z+2^!?HHtudj!ofctj*ph_oy83YTUuNF>}3=5nGFfmC6Spm^I5V-B{HXF`7NOvb0?c z*+~#|DMG^}EJ(ebean7TlAS&3?Qq(LZ*0as0_R$=O6Xy|p1qoKBP`lTtlkrUFu|Hd zuURy9Puemmvq<=`+zvbQ$=;Ui&6OKfu|K1Q3-H-27Y@e4*&>Nd=1x4LSf4mzMUU*1 zkFX?c&y{m>R$V6-{d*#D!mP#FWWEmvxvXXIog(R zoT9jptnK;44QDDoo~WR5QTY&E;XgOiu61gBb~c}3oof6t#hKip()z!-P9`#%Z_R38 zxmq^EE}yRRuboi^&E&D-lennHWgbfn<(2$r2M;86Vz;rgr^*)OoUqv%XLT!Q|6n@u zEizOMWD&H6OV{6cp;|4};4iohuOqLjkKuF1y(@@xBMf0Sn&fk$VA&}jm`#5Q8TAhz zqYGULH6ST+WyE&;*J@$i?nHimgB5&^lvL`gw(47GJC*C8ISkdV zw+?yZ!a%O96_$GKnIr(=#k1M8bTB+r6Ucm1n+begfB% zgRo~SUbOsTEa-3;$*$Z9^Qtl|!a_&v9^aCsnUCikD%IZz19k0S>#qm%DaHX`<$YvF z&Q?%86HkO%s$^o6p6gr9yniPa$Rkycy~7oc6^KR{2}DNVf+4XT*zP7fzDQkG^N5nhQKsS>LbkT@@x=Pf6%Z1mA|u!)K3_=ta$Oa-M>D3I$;iy<{y5({b4;8X z%pGb~5R!k1CiIJUS=01HDt(_$+wv6=zuquF#*m-Y!%seB<7$;w; zx(M%A5ygj}h;`|kj9kUI5DC{{GxV$fK8Mt&@|#>(-4U7;`(XPxgI<#w>%%s!=Z9CG zT(X2p<^RrjpeOiq%!oyTrM9A9XV(gE%PGWp8$j|z`9~h^Czx#5fGi>!z9m8o{Vh9{Lmh4QX|Fb*` zs;Rs|h9h~yg34#ja)`=P(KW5$#1(~An~QhC88s1=3ONi*dN$X?roV~IPUL&1`ik&3 zT4S5TX=^(V-}QxDQSuzlC=Tb2U*@XvW__<%%Qm2dH%ZAmKNdVQb7Zy>*L)#%E>5xm zXQ;E#??-m(gFK$^9Sk4CAI&qstLJhCQ>{IaLA(&J@fprV8)6!JtDM$nl1DN9g^7LC6X`M?t+{B{u1+RBZR-r{%os0&z z_6a5_7qY&bz2Nl~muXyl5u5aRELWb2jDC=3R9qHmWqdd#gz~$w;DvO|lPe~P%A^2$ zjs@}TSi6~#h}bNLuUR8kb5zh9BcgfPsQo8qq>8V|xcR^Og}H{^t>@68h$>Uj4-%LA z!S0z@8Xk(pO+|Mi4lET5yc-^`BP}S+t6`vbtjY_Gy>bVmekpB2g*t2V#Yoxryp(oW zBRy5VU7iAqUds1m1*xz=5@ui8RYS7F%bZu)937QCR&R`&RLcSjWwk}VRiQR6tU`6V zFw7{5VSBTWJ%RMYWAFp>=aqf1Yxw|AB0-2+d2p?F;z_E2Z0u%!$GyZUeqh}ZJrx(c zoZoQ6ISxjL(Eg2#teD}Y_=u_)6pn<(w7R++ao51?&zxvG0P+Y(-oSe*nSGU4HR z$>vC1E5)bH6v^4Nq9TMbd?BBl%!)glQf#dGO*4IL10tDC6P;vho`s)ddLp6x)!1K5 zOWrMF24cLNKYkhpyyvYYH(9YZ%O5i}ao?FPGKqNvr%2@3Njj1s#IHO`h#-aka9SAXpRI+2RU#?bvFm@xH^naC-LYWk z{dpES$Xu+ty+0`NVp>rv!AV6>?Ixu*SkcXYyDhBR(3%Kc_d) zsji@Zc`JN?ToAubq$@A1HwYg~uVDl4k?E-6TyIGU-cRN`b4zkebM3X8?CDri)m&Mk z6B{9sJI#Ku7y7X87YdXYhy^kVdNP-%Drva4bPR`KP%;rI@fY$W%>cl~2uwZpNbtXipFRI4S~$dmCUxsXv{4*d3VA_YDpD|B*`vrw#A zI2POzV=EJfY~mPSzm%UamtVPp+*z*5rV)~^-DgH-?U(s|cKS@~>Bp1F@C8UfLv?qt zI%uF?j|Y$pOi*jPp7vjkKk56GNP~qyW&bw@byl{IYk}l0?Rdl^MO~ zutaa|GOJrx(-ymxCz%njKE}6KS{XWv#}rgmjz)4uAcw8lBryj{l$OasWG6A7=&{!7 zIaLEw3H0uU{jEL~T44fn++1IA$Y)X$6+`@8u|?fi@1gW;#!pJnllM3`Lyg@jT=vz# z)MC_+e{0vY}PP;8vNh$wK>v&9X#O`;VYwNO!<#$g*T zF-o{qa={?A2qAm|4|xmEQBzfGs~U!#i47;{|M{E}*JWpHo2~?o@@;C8DeDoJ<@>i|1r`f_pZ)d^s zjNjcX+E1ARTV$CStyRXARXPn9va<`dAN3NFvm%jJc|gtY?92T$!Yj!MtAUMdNWC8u zSL<~b)@^ikx{)#IlNp|id7d)@=m%0j44fA>u`%An9__W^bIuvEM^k3xowPC=3{SkO zk@+d7C_iRzq*HUI{xUK$q!A;~0>3jx(lisvLqsEUez;UIP?}ZIEkrNLtDBM>#s;nU zxd_kVVLT4?Y#4!Ez2t(!)cENi&!IQ-vermo*?fRs!e*M#e@`X_81;1U$8*XnSf^?+ z3so)T0c?7AuCHFyyUtJDj-2L4SVR4z9lrm3SeD&VSd$u(HlTE^0T5@*7s{@@69o@} zQCIU^oP||s^;CGygZbxtfG?)Co$-M^iHuJLAOH5Q2QsFgr`0D~S>A zKg&;6fvKnxA4n^zaNhXj?Of026oqf)4(rO~xsnT+jno+3tCrMWBD9nXEw1H@GGS!X|kb-<*5j z4Nj=Phtp${sr=A-Iz1OvVVIaI|HZad3A6+OUeCWX$%<5CpbvRCQ_-0>P}6&9jz$B< zA}*AdIiulh&=3Zh#~z4HICt&ua-ChdC&O0`r1Eq+r(2j?w3-34@>JXv>Qtp31{_M; zawGG#wSt`A@p7j$IB(Otq@AL1Dm{5ApO`xnS40miQEqHCN!4{`@}1KGVFDeQ!%(TW zvgvqwV}*uy*{Zra0YXLVe9Lak>3bVUtt$2uXF=QwBXWiHmR9F{J=eUJG0I|85iX=< zmM=>fYspRJ;z+C==7~7RA`j;n{by=sPFwt5Z0&Sx**XC=F1p&D+VjKd)f>5EBA=Pl zbRL=g{jOpWW^OPpEKPMub|(JwShJEd`6(-{7!Di6p~{?#wTg^pS&!xw59BXCG;;_?-xBBfq?}%sL z>|X|pnb5wgL_SYo8+a!>sMQ^+cs!%tbih}h4fu?RScegIsf>i`lu&~!>X`YWSdVFvtjSW z!OVr4ZxaJth4Hu#7qB}E&fLh=a<91&=qZoKWQuRrsK~h(BD*rP&TqY|%cru|xwvJP z2oJuP?`cDe6(0+Y{7x!#QWycBtR;c(6)kY2{jmYjQ_sl7epR()nS^;YtH%$u@aU=d z1?kg|H`JJO{m1--e6;#{?ove%U&z6ZWcX)?zWkjOcoezPr>IXq7;5R&=NGM5tjih<8rMVX0Gu4pdb#cj+%TuhIgM?=S+1{L|Z2CgMKh+}5GT4%bi(`1d@km^|szH3rAV%mTaiOe?qJua)bteAmii z&9+z_bU0N^Ok8%UCW=Ri?R>_lrt{qkktu1BeqmSnYDIE=;?Hu3Iy;3XFeP$foBRt0 zRMFX&zgc2&z~YVCgj3Lv2l5g+Ghe~y<-(Q!>5;0)<=pQZT2&+Bmnu#*PCFi)tLuBH zKo8`mdR)7ra3U5`Ya!@f#ZnEY);QNFWeufYBZcey$(efkpzqd=IPXU+wfcjNUC(ca z!?noEIIPvux2iQ+m!)dLI=+|RjN3D;fWMS`d5^we^!(FvEB_OrR9RF!*&wv`)X2%i>;NKP2boIDf+&03FyHut!~v(;(c%#7up%VeS9|g- zEwQ$jVxPFJ&(&y1$a4#AD%Uz6J<4WCgxqLa2EZ;vYW6Jx`oH_e!VJ&mdY`ybT(AGD zFkltNs&>ZX3$x9rn#rO+-fd6guCUpw@s`i$cl*n3Ue&9M;Gs%@iwZY3oJL?xzmBb-d z`(o^>s&ZAk)g#`=c1njvgwg4R{Qy-3buDRFbAuT|RaQ@4o`dIlcI_B7yLu=n3Wro(Se@+FIQ8GiCq`F7o*Dm9nw22sUJejP)n{R^D>uVq{os0Zr zTJvvYUTkg)6opK!mJ zVD4Dm<6>Gea`JSZ)3L;5npSD|RGNZNY#**-$0BJ})8GO9o{rwhi|2}O5Z!D7d1~JX zc#aln;YcEoGoJc$;=6uc&2zoO`JvqNgVr-4IT@E0RGsm!ioNif9%TaFE2rXNU7j~| zn*A{&Hxx;Tb9je22cBSdYUcLnoyqo4t`~D;O!m&n0=!dhGXLj0Y@5`@a`gjRwg!e} z!wRfUrF}eOBtfxE9z?bzRC34MARJ7xS{ts=y!l5Gqh-jfDxe*6a%K#zgn+G9j>}u^ zQu>el{8WB-604aqY{woqbJqB+h&vn}^^-i4_T_}|5F>_D_@fNg3^EDgAVaAUjOVVI z*w4QFHIkp#(_4GKpq#2SW_Tiz`!91Rq=p82(?33E9sIfU0~_J(cJ8uX8Cfwc{zuN@ z9Au?ye$Q9R+Rx`%vS>3MkEI1Mtuhj5huc+FAP>NEU(S0#zMCBN|5C3xmp4jS1;wYX z=l?1TFau`6DfV87=>2+5#x8$1uV^2WEPpgV`Lek#wZq~?=85T8Jm77v!LPAwwkL}7 ze{!s~78y1LpLRH$3c|9J5c~yu z*K4cJ==!nTt4c@La&&LaCe31XH}e^uRjeF7sI?XLLpVOAE@BO|zQ}{|RW%?;L6ho8 zcE}zI=dXAp=iseyMIB4TuPDXC*&PJ+Zu7IT{AOh4tW@!)Sij1S=g6GQN8!U53mvMy zlZSk^>Rq1f`d4#}xC*_`r(brELKcW;iJU6-!u<`W_j~`L=PwBG_&&rK+IVFx?)B*RY#c zqFI`p%+Kh8nAwYnSQ6KQyj6Bl`>_`^y@@)I&utpRfJDjBuiCj3B73DHGJ ze#5%xkJVyQ`X=_ngj?y8N)XoUXH~suxwwcv@X2C=Jk{*SRQjT4ylW))>`eujesQcf zVhQRUKg?ZvVQt|^o;wxIRt-euR<1gm(Hpxb!4C}n$N9-eq2Jth=( z2`kxt)!x}ZY{er*auz(BCu5bj@)u+&#Br7Arq?7$UaVL3wsW_$QD zRJa)QJCZ%RH*=jxAomw-RnxUc9zGTAf1YpZ9J)e@T8rq(Ct_jNdRsl$%x@}VxPEaC z%wj5b!6FNDA%-?ZA?WPydJ>%S#Hz~T4wxn#7#m%lj?LM3D!vl6C+ga%->yw|UuximgC1H_>rsPaj8xeINi1sRTd-ASdv|6yL0hZg7 zP%++f*}qY!e8C%W!OB+IBZR>m*qI$cU&`O7;<+k1@&QqtG>p-xq3uLIV|D(r^5{zL za~(Wz5*v11-+6lZk~dLeP--jc{bs-DwCo&KS;1`mtNBP#6=SmNkx#+uVtLT1Y|S~A zg<4K;u!>oHfH&I2>dkG6rLMB)(v#_4{GeBn$GJ3>H>)wx*YUK*J7iWeG0zpPa5l5O zq^{yYOa8-YM4}tH24?%o2AmL7S(w#jtVh(u;i1^YjMh3h`?0jEhvZB8WbCQx>@Yyx zPXv+A%e!R8uBsI}P*c36BWs~BMDx=sXA1;(i2vxZ85cG{mBKZh74@e_hjd9RTTSIoi}NQoTnQdi^2+Ey_2_Lir$M< z#z*2OA^{q3{TvFZ=+-LX--ZxcYbu&l@a5u<`zTSFKsyxclB!`S;QM z+v?vVf6(|?m*zM24es2z=>1=5Z0d6V2KNtquDN%5aBOPmVDn2uW6i3~y|FWbxZOJai&$Obq{ChK8vo5J2DNAe6N3PxY^Jl7_~3!?w{M1uYqsP_u@|)_jei5`gPqz`1bL1h$ijv ze6g{v`vaFP!;K5AxTUu0+Fv(5*ZrBx&H~y({KLk_ySKkE4QMyegBF(LuWj+p2eFNd z7;;IZ2Q92$z9ny5(vo}7!X4ydZ6Wrch4mfKq}@OdT3C{IchbU2eW8E(TG+HwU$_IZ z-xgvITIj^~+YR)fg(d0!Z_H!cyf19(;0x``dhmq}U9`|{to_UmzR_HFPTF|~|3*Gh5zhwH_ z9&B4CEA)pvaOw25!!x(7&>!;59@5uErPy5$d4T+N(H45p!;%SXV<9eW>);RV%ewnR zf42m--PpYDw{`G`_GR7uAVwdOj`cxUBms059@)w)6M3==9=GZyeIR0?{4+M zJ=^bI{hsX~YW&uY@zJ%TLpvtN_l@tGT06F5_x>HLS8ac9qcJ$Nc6@B?z}nG~)pu`y ze`EFB-&2!=Lt{f@kLIfT8XunfbYikOGt}I__D$cuukoI@G@cuNyYZ2?e6%(*lFiAX N(dO9H;K=G#{|{Q)