You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[2024-08-08T07:17:44,731][WARN ][a.d.h.t.HuggingFaceTokenizer] [opensearch-ml-node] maxLength is not explicitly specified, use modelMaxLength: 512
[2024-08-08T07:17:44,737][ERROR][o.o.m.e.a.DLModel ] [opensearch-ml-node] Failed to deploy model rXu9MJEB07lc2P8C1MiR
ai.djl.translate.TranslateException: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.inference.Predictor.batchPredict(Predictor.java:196) ~[api-0.28.0.jar:?]
at ai.djl.inference.Predictor.predict(Predictor.java:132) ~[api-0.28.0.jar:?]
at org.opensearch.ml.engine.algorithms.TextEmbeddingModel.warmUp(TextEmbeddingModel.java:103) ~[opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.algorithms.DLModel.doLoadModel(DLModel.java:223) ~[opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:286) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:571) [?:?]
at org.opensearch.ml.engine.algorithms.DLModel.loadModel(DLModel.java:252) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.algorithms.DLModel.initModel(DLModel.java:142) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.MLEngine.deploy(MLEngine.java:125) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.model.MLModelManager.lambda$deployModel$52(MLModelManager.java:1083) [opensearch-ml-2.16.0.0.jar:2.16.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.ml.model.MLModelManager.lambda$retrieveModelChunks$73(MLModelManager.java:1703) [opensearch-ml-2.16.0.0.jar:2.16.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.action.support.ThreadedActionListener$1.doRun(ThreadedActionListener.java:78) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:941) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.16.0.jar:2.16.0]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?]
at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]
Caused by: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.pytorch.jni.PyTorchLibrary.moduleRunMethod(Native Method) ~[pytorch-engine-0.28.0.jar:?]
at ai.djl.pytorch.jni.IValueUtils.forward(IValueUtils.java:57) ~[pytorch-engine-0.28.0.jar:?]
at ai.djl.pytorch.engine.PtSymbolBlock.forwardInternal(PtSymbolBlock.java:146) ~[pytorch-engine-0.28.0.jar:?]
at ai.djl.nn.AbstractBaseBlock.forward(AbstractBaseBlock.java:79) ~[api-0.28.0.jar:?]
at ai.djl.nn.Block.forward(Block.java:127) ~[api-0.28.0.jar:?]
at ai.djl.inference.Predictor.predictInternal(Predictor.java:146) ~[api-0.28.0.jar:?]
at ai.djl.inference.Predictor.batchPredict(Predictor.java:187) ~[api-0.28.0.jar:?]
... 18 more
[2024-08-08T07:17:44,905][ERROR][o.o.m.m.MLModelManager ] [opensearch-ml-node] Failed to retrieve model rXu9MJEB07lc2P8C1MiR
org.opensearch.ml.common.exception.MLException: Failed to deploy model rXu9MJEB07lc2P8C1MiR
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:300) ~[?:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:571) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.loadModel(DLModel.java:252) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.initModel(DLModel.java:142) ~[?:?]
at org.opensearch.ml.engine.MLEngine.deploy(MLEngine.java:125) ~[?:?]
at org.opensearch.ml.model.MLModelManager.lambda$deployModel$52(MLModelManager.java:1083) ~[?:?]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.ml.model.MLModelManager.lambda$retrieveModelChunks$73(MLModelManager.java:1703) [opensearch-ml-2.16.0.0.jar:2.16.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.action.support.ThreadedActionListener$1.doRun(ThreadedActionListener.java:78) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:941) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.16.0.jar:2.16.0]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?]
at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]
Caused by: ai.djl.translate.TranslateException: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.inference.Predictor.batchPredict(Predictor.java:196) ~[?:?]
at ai.djl.inference.Predictor.predict(Predictor.java:132) ~[?:?]
at org.opensearch.ml.engine.algorithms.TextEmbeddingModel.warmUp(TextEmbeddingModel.java:103) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.doLoadModel(DLModel.java:223) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:286) ~[?:?]
... 14 more
Caused by: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.pytorch.jni.PyTorchLibrary.moduleRunMethod(Native Method) ~[?:?]
at ai.djl.pytorch.jni.IValueUtils.forward(IValueUtils.java:57) ~[?:?]
at ai.djl.pytorch.engine.PtSymbolBlock.forwardInternal(PtSymbolBlock.java:146) ~[?:?]
at ai.djl.nn.AbstractBaseBlock.forward(AbstractBaseBlock.java:79) ~[?:?]
at ai.djl.nn.Block.forward(Block.java:127) ~[?:?]
at ai.djl.inference.Predictor.predictInternal(Predictor.java:146) ~[?:?]
at ai.djl.inference.Predictor.batchPredict(Predictor.java:187) ~[?:?]
at ai.djl.inference.Predictor.predict(Predictor.java:132) ~[?:?]
at org.opensearch.ml.engine.algorithms.TextEmbeddingModel.warmUp(TextEmbeddingModel.java:103) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.doLoadModel(DLModel.java:223) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:286) ~[?:?]
... 14 more
[2024-08-08T07:17:44,911][INFO ][o.o.m.a.d.TransportDeployModelOnNodeAction] [opensearch-ml-node] deploy model task done DfPZMJEBHZBvgrvrfuHf
Describe the bug
[2024-08-08T07:17:44,731][WARN ][a.d.h.t.HuggingFaceTokenizer] [opensearch-ml-node] maxLength is not explicitly specified, use modelMaxLength: 512
[2024-08-08T07:17:44,737][ERROR][o.o.m.e.a.DLModel ] [opensearch-ml-node] Failed to deploy model rXu9MJEB07lc2P8C1MiR
ai.djl.translate.TranslateException: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.inference.Predictor.batchPredict(Predictor.java:196) ~[api-0.28.0.jar:?]
at ai.djl.inference.Predictor.predict(Predictor.java:132) ~[api-0.28.0.jar:?]
at org.opensearch.ml.engine.algorithms.TextEmbeddingModel.warmUp(TextEmbeddingModel.java:103) ~[opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.algorithms.DLModel.doLoadModel(DLModel.java:223) ~[opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:286) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:571) [?:?]
at org.opensearch.ml.engine.algorithms.DLModel.loadModel(DLModel.java:252) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.algorithms.DLModel.initModel(DLModel.java:142) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.engine.MLEngine.deploy(MLEngine.java:125) [opensearch-ml-algorithms-2.16.0.0.jar:?]
at org.opensearch.ml.model.MLModelManager.lambda$deployModel$52(MLModelManager.java:1083) [opensearch-ml-2.16.0.0.jar:2.16.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.ml.model.MLModelManager.lambda$retrieveModelChunks$73(MLModelManager.java:1703) [opensearch-ml-2.16.0.0.jar:2.16.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.action.support.ThreadedActionListener$1.doRun(ThreadedActionListener.java:78) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:941) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.16.0.jar:2.16.0]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?]
at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]
Caused by: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.pytorch.jni.PyTorchLibrary.moduleRunMethod(Native Method) ~[pytorch-engine-0.28.0.jar:?]
at ai.djl.pytorch.jni.IValueUtils.forward(IValueUtils.java:57) ~[pytorch-engine-0.28.0.jar:?]
at ai.djl.pytorch.engine.PtSymbolBlock.forwardInternal(PtSymbolBlock.java:146) ~[pytorch-engine-0.28.0.jar:?]
at ai.djl.nn.AbstractBaseBlock.forward(AbstractBaseBlock.java:79) ~[api-0.28.0.jar:?]
at ai.djl.nn.Block.forward(Block.java:127) ~[api-0.28.0.jar:?]
at ai.djl.inference.Predictor.predictInternal(Predictor.java:146) ~[api-0.28.0.jar:?]
at ai.djl.inference.Predictor.batchPredict(Predictor.java:187) ~[api-0.28.0.jar:?]
... 18 more
[2024-08-08T07:17:44,905][ERROR][o.o.m.m.MLModelManager ] [opensearch-ml-node] Failed to retrieve model rXu9MJEB07lc2P8C1MiR
org.opensearch.ml.common.exception.MLException: Failed to deploy model rXu9MJEB07lc2P8C1MiR
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:300) ~[?:?]
at java.base/java.security.AccessController.doPrivileged(AccessController.java:571) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.loadModel(DLModel.java:252) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.initModel(DLModel.java:142) ~[?:?]
at org.opensearch.ml.engine.MLEngine.deploy(MLEngine.java:125) ~[?:?]
at org.opensearch.ml.model.MLModelManager.lambda$deployModel$52(MLModelManager.java:1083) ~[?:?]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.ml.model.MLModelManager.lambda$retrieveModelChunks$73(MLModelManager.java:1703) [opensearch-ml-2.16.0.0.jar:2.16.0.0]
at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.16.0.jar:2.16.0]
at org.opensearch.action.support.ThreadedActionListener$1.doRun(ThreadedActionListener.java:78) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:941) [opensearch-2.16.0.jar:2.16.0]
at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.16.0.jar:2.16.0]
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?]
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?]
at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]
Caused by: ai.djl.translate.TranslateException: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.inference.Predictor.batchPredict(Predictor.java:196) ~[?:?]
at ai.djl.inference.Predictor.predict(Predictor.java:132) ~[?:?]
at org.opensearch.ml.engine.algorithms.TextEmbeddingModel.warmUp(TextEmbeddingModel.java:103) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.doLoadModel(DLModel.java:223) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:286) ~[?:?]
... 14 more
Caused by: ai.djl.engine.EngineException: forward() Expected a value of type 'Tensor' for argument 'input_ids' but instead found type 'Dict[str, Tensor]'.
Position: 1
Declaration: forward(torch.embNormScript self, Tensor input_ids, Tensor attention_mask, Tensor token_type_ids) -> Dict(str, Tensor)
at ai.djl.pytorch.jni.PyTorchLibrary.moduleRunMethod(Native Method) ~[?:?]
at ai.djl.pytorch.jni.IValueUtils.forward(IValueUtils.java:57) ~[?:?]
at ai.djl.pytorch.engine.PtSymbolBlock.forwardInternal(PtSymbolBlock.java:146) ~[?:?]
at ai.djl.nn.AbstractBaseBlock.forward(AbstractBaseBlock.java:79) ~[?:?]
at ai.djl.nn.Block.forward(Block.java:127) ~[?:?]
at ai.djl.inference.Predictor.predictInternal(Predictor.java:146) ~[?:?]
at ai.djl.inference.Predictor.batchPredict(Predictor.java:187) ~[?:?]
at ai.djl.inference.Predictor.predict(Predictor.java:132) ~[?:?]
at org.opensearch.ml.engine.algorithms.TextEmbeddingModel.warmUp(TextEmbeddingModel.java:103) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.doLoadModel(DLModel.java:223) ~[?:?]
at org.opensearch.ml.engine.algorithms.DLModel.lambda$loadModel$1(DLModel.java:286) ~[?:?]
... 14 more
[2024-08-08T07:17:44,911][INFO ][o.o.m.a.d.TransportDeployModelOnNodeAction] [opensearch-ml-node] deploy model task done DfPZMJEBHZBvgrvrfuHf
Related component
Plugins
To Reproduce
{
"name": "cre_0.3.1",
"version": "0.3.1",
"model_group_id": "BvODMJEBHZBvgrvrpOGf",
"description": "CRE",
"function_name": "TEXT_EMBEDDING",
"model_format": "TORCH_SCRIPT",
"model_content_hash_value": "b7205b31ba2ee229e6fc4d9ba02cae37288b21bacc798e445dad9123912c5be9",
"model_config": {
"model_type": "bert",
"embedding_dimension": 1024,
"framework_type": "SENTENCE_TRANSFORMERS"
},
"url": "http://openresty/download/cre_0.3.1.zip"
}
Expected behavior
This operation is completely correct on 2.14.0, but not on 2.15.0 and 2.16.0. I don't know what happened and how to deal with it
Additional Details
Dockerfile:
ARG OPENSEARCH_VERSION=2.15.0
###########################################################
Copy working directory to the actual release docker images
FROM zz/opensearch:${OPENSEARCH_VERSION}
USER root
CUDA
RUN yum update -y && yum install -y wget gzip tar xz &&
# yum install -y kernel-devel-$(uname -r) kernel-headers-$(uname -r) &&
wget https://developer.download.nvidia.com/compute/cuda/11.7.1/local_installers/cuda_11.7.1_515.65.01_linux.run &&
sh cuda_11.7.1_515.65.01_linux.run --silent --toolkit --override &&
rm cuda_11.7.1_515.65.01_linux.run &&
yum clean all
cuDNN
RUN wget https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.7.29_cuda11-archive.tar.xz &&
mkdir cudnn && tar -xvf cudnn-linux-x86_64-8.9.7.29_cuda11-archive.tar.xz -C cudnn --strip-components=1 &&
cp -r cudnn/include/cudnn*.h /usr/local/cuda/include &&
cp -r cudnn/lib/libcudnn* /usr/local/cuda/lib64 &&
chmod a+r /usr/local/cuda/include/cudnn*.h /usr/local/cuda/lib64/libcudnn* &&
rm -rf cudnn cudnn-linux-x86_64-8.9.7.29_cuda11-archive.tar.xz
RUN yum install -y python python-pip &&
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple &&
pip install --no-cache-dir torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 &&
yum clean all
USER opensearch
EXPOSE 9200 9300 9600 9650
The text was updated successfully, but these errors were encountered: