diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 05730ee6b3..14f8dbcafa 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -388,7 +388,7 @@ def forward( inputs["position_ids"] = position_ids # Run inference - self.request.start_async(inputs, shared_memory=True) + self.request.start_async(inputs, share_inputs=True) self.request.wait() logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 8c1681982b..fa48a5df68 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -570,7 +570,7 @@ def __call__(self, input_ids: np.ndarray): inputs = { "input_ids": input_ids, } - outputs = self.request(inputs, shared_memory=True) + outputs = self.request(inputs, share_inputs=True) return list(outputs.values()) @@ -604,7 +604,7 @@ def __call__( if timestep_cond is not None: inputs["timestep_cond"] = timestep_cond - outputs = self.request(inputs, shared_memory=True) + outputs = self.request(inputs, share_inputs=True) return list(outputs.values()) @@ -620,7 +620,7 @@ def __call__(self, latent_sample: np.ndarray): inputs = { "latent_sample": latent_sample, } - outputs = self.request(inputs, shared_memory=True) + outputs = self.request(inputs, share_inputs=True) return list(outputs.values()) def _compile(self): @@ -641,7 +641,7 @@ def __call__(self, sample: np.ndarray): inputs = { "sample": sample, } - outputs = self.request(inputs, shared_memory=True) + outputs = self.request(inputs, share_inputs=True) return list(outputs.values()) def _compile(self): diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index acdfb4a324..afa5ff81dd 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -304,18 +304,18 @@ def __call__(self, *args, **kwargs): data_cache.append(*args) return self.request(*args, *kwargs) - def infer(self, inputs: Any = None, shared_memory: bool = False): + def infer(self, inputs: Any = None, share_inputs: bool = False): data_cache.append(inputs) - return self.request.infer(inputs, shared_memory) + return self.request.infer(inputs, share_inputs) def start_async( self, inputs: Any = None, userdata: Any = None, - shared_memory: bool = False, + share_inputs: bool = False, ): data_cache.append(inputs) - self.request.infer(inputs, shared_memory) + self.request.infer(inputs, share_inputs) def wait(self): pass