From 4d328273af84b150541cafd91a10584b9b78e296 Mon Sep 17 00:00:00 2001 From: Sun Xiaoxia Date: Thu, 5 Dec 2024 20:41:04 +0800 Subject: [PATCH 01/43] Resolve performance regression compared to using numactl on multi-socket SPR platform (#27735) ### Details: - *Resolve performance regression compared to using numactl on multi-socket SPR platform* - Issue: the performance with latency on multi-socket SPR platform is worse than using "numactl -N". - Reason: `rtScratchPads` stores multiple scratch pad which the memory malloced on corresponding numa node on multi-socket platform. The call of `DnnlScratchPadPtr getScratchPad(int subStreamID = 0)` needs to be based on which numa node the current task is running on. If input parameter is -1, it returned `rtScratchPads[0]` which the scratch pad is on numa node 0. But cpu streams executor using numa node 1 by default. So it leads to performance degradation. - Resolve method: When `curNumaNode` is unknown, get real `curNumaNode` from cpu streams executor, and then passed it to `getScratchPad()`. When user does not pass input parameter to `getScratchPad()`, it should get real numa node of current task in `getScratchPad()`. ### Tickets: - *CVS-156370* --------- Co-authored-by: Wanglei Shen --- src/plugins/intel_cpu/src/graph_context.cpp | 1 + src/plugins/intel_cpu/src/graph_context.h | 9 +++------ src/plugins/intel_cpu/src/node.cpp | 2 +- src/plugins/intel_cpu/src/node.h | 2 +- .../nodes/executors/dnnl/dnnl_fullyconnected.hpp | 4 ++-- .../intel_cpu/src/nodes/executors/executor.hpp | 15 +++++++-------- 6 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph_context.cpp b/src/plugins/intel_cpu/src/graph_context.cpp index e200766fa4791c..5b967ed58a7918 100644 --- a/src/plugins/intel_cpu/src/graph_context.cpp +++ b/src/plugins/intel_cpu/src/graph_context.cpp @@ -27,6 +27,7 @@ GraphContext::GraphContext(const Config& config, numNumaNodes = 1; if (streamExecutor) { cpuStreamExecutor = std::dynamic_pointer_cast(streamExecutor); + numaNodeId = cpuStreamExecutor ? cpuStreamExecutor->get_numa_node_id() : 0; auto nNumaNodes = get_num_numa_nodes(); if (numNumaNodes < nNumaNodes) numNumaNodes = nNumaNodes; diff --git a/src/plugins/intel_cpu/src/graph_context.h b/src/plugins/intel_cpu/src/graph_context.h index db2b126213978c..ce51af0c81b4bd 100644 --- a/src/plugins/intel_cpu/src/graph_context.h +++ b/src/plugins/intel_cpu/src/graph_context.h @@ -44,12 +44,8 @@ class GraphContext { return rtParamsCache; } - DnnlScratchPadPtr getScratchPad(int subStreamID = 0) const { - if (subStreamID < 0) - subStreamID = 0; - if (subStreamID >= numNumaNodes - 1) - subStreamID = numNumaNodes - 1; - return rtScratchPads[subStreamID]; + DnnlScratchPadPtr getScratchPad() const { + return rtScratchPads[numaNodeId]; } const std::vector& getScratchPads() const { @@ -101,6 +97,7 @@ class GraphContext { std::shared_ptr subMemoryManager; int numNumaNodes = 1; + int numaNodeId = 0; std::shared_ptr memoryStatesRegister; std::shared_ptr networkMemoryControl; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index f4c2b0eb686df6..de5c53429138c4 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -1106,7 +1106,7 @@ void Node::toNumaNodeImpl(int numaNodeID) { // create scratch pad from specified numa node if (scratchpadMem) { - scratchpadMem = context->getScratchPad(numaNodeID)->createScratchPadMem(scratchpadMem->getDescPtr()); + scratchpadMem = context->getScratchPad()->createScratchPadMem(scratchpadMem->getDescPtr()); primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->getPrimitive(); } diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 948bd6999ce27a..453b8323fe9e66 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -788,7 +788,7 @@ class Node { MemoryPtr getScratchPadMem(const MemoryDescPtr& desc) { if (!scratchpadMem || !scratchpadMem->getDesc().isCompatible(*desc)) { - scratchpadMem = context->getScratchPad(curNumaNode)->createScratchPadMem(desc); + scratchpadMem = context->getScratchPad()->createScratchPadMem(desc); } return scratchpadMem; } diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp index 266e78b3d46c77..3266bf8965c37b 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp @@ -73,7 +73,7 @@ class DnnlFCExecutor : public Executor { return; } const auto newPrimMemDesc = m_primitive->scratchPadDesc(); - m_scratchPadMemory = m_context->getScratchPad(numaNodeID)->createScratchPadMem(newPrimMemDesc); + m_scratchPadMemory = m_context->getScratchPad()->createScratchPadMem(newPrimMemDesc); m_primArgs[DNNL_ARG_SCRATCHPAD] = m_scratchPadMemory->getPrimitive(); if (m_primArgs.count(DNNL_ARG_WEIGHTS)) { @@ -139,7 +139,7 @@ class DnnlFCExecutor : public Executor { if (currentPrimitive && currentPrimitive->scratchPadDesc()->isCompatible(*newPrimMemDesc)) return; - m_scratchPadMemory = m_context->getScratchPad(curNumaNode)->createScratchPadMem(newPrimMemDesc); + m_scratchPadMemory = m_context->getScratchPad()->createScratchPadMem(newPrimMemDesc); m_primArgs[DNNL_ARG_SCRATCHPAD] = m_scratchPadMemory->getPrimitive(); } diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor.hpp index 5b9df5a6e77a55..2016e8f5820dee 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor.hpp @@ -112,8 +112,10 @@ class ExecutorContext { engine(graphContext->getEngine()), implPriorities(implPriorities), privateWeighCache(std::move(privateWeighCache)), - numNumaNodes(graphContext->getNumNumaNodes()) - {} + numNumaNodes(graphContext->getNumNumaNodes()) { + auto cpuStreamsExecutor = graphContext->getCPUStreamExecutor(); + curNumaNodeId = std::max(0, cpuStreamsExecutor ? cpuStreamsExecutor->get_numa_node_id() : curNumaNodeId); + } MultiCachePtr getRuntimeCache() const { auto runtimeCachePtr = runtimeCache.lock(); @@ -121,12 +123,8 @@ class ExecutorContext { return runtimeCachePtr; } - DnnlScratchPadPtr getScratchPad(int subStreamID = 0) const { - if (subStreamID < 0) - subStreamID = 0; - if (subStreamID >= numNumaNodes - 1) - subStreamID = numNumaNodes - 1; - return scratchPads[subStreamID]; + DnnlScratchPadPtr getScratchPad() const { + return scratchPads[curNumaNodeId]; } std::shared_ptr> getPrivateWeighCache() const { @@ -156,6 +154,7 @@ class ExecutorContext { // @todo remove after global cache is used exclusevly std::shared_ptr> privateWeighCache; int numNumaNodes; + int curNumaNodeId = -1; }; class ExecutorFactoryLegacy { From 6abe2e39391a06b89158c6a24e95f5ee390d1cbc Mon Sep 17 00:00:00 2001 From: Pavel Durandin Date: Thu, 5 Dec 2024 17:01:23 +0400 Subject: [PATCH 02/43] [GPU] fix onednn deconvolution (#27815) ### Details: Fix of 1D onednn deconvolution - test deconvolution_gpu_onednn.spatial_1d on LNL - *...* --- .../intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index 1d5707194c560d..a72f09207bd3a0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -26,7 +26,7 @@ static std::shared_ptr get_deconvol auto output_layout = impl_params.get_output_layout(); dnnl::memory::dims stride(prim->stride.begin(), prim->stride.end()); - dnnl::memory::dims dilation(input_layout.get_spatial_rank(), 1); + dnnl::memory::dims dilation(stride.size(), 1); dnnl::memory::dims pad_l(prim->pad.begin(), prim->pad.end()); dnnl::memory::dims pad_r(prim->pad.begin(), prim->pad.end()); @@ -49,6 +49,7 @@ static std::shared_ptr get_deconvol int64_t insert_count = static_cast(output_md.get_dims().size()) - 2 - stride.size(); if (insert_count > 0) { stride.insert(stride.end(), insert_count, 1); + dilation.insert(dilation.end(), insert_count, 0); pad_l.insert(pad_l.end(), insert_count, 0); pad_r.insert(pad_r.end(), insert_count, 0); } From ef6c309cc26d02c1413f8bde546d278295f24f7a Mon Sep 17 00:00:00 2001 From: Wanglei Shen Date: Thu, 5 Dec 2024 22:48:56 +0800 Subject: [PATCH 03/43] by default, latency mode use one socket (#27850) ### Details: - *by default, latency mode use one socket* ### Tickets: - *CVS-157702* --- .../intel_cpu/src/cpu_streams_calculation.cpp | 26 ++++--- .../streams_info/streams_info_table_test.cpp | 70 ++++++++++++++----- 2 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 0ed64d49ea68dd..244adb7c40c23c 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -242,26 +242,32 @@ std::vector> get_streams_info_table(const int input_streams, n_threads_per_stream = proc_type_table[0][ALL_PROC]; } } else { - int numa_index = 1; + size_t socket_index = 0; + for (socket_index = 0; socket_index < proc_socket_table.size(); socket_index++) { + if (proc_socket_table[socket_index][PROC_SOCKET_ID] == current_socket_id) { + break; + } + } + const std::vector& current_socket_info = proc_socket_table[socket_index]; n_threads_per_stream = model_prefer_threads == 0 - ? proc_type_table[numa_index][ALL_PROC] - : std::min(proc_type_table[numa_index][ALL_PROC], model_prefer_threads); + ? current_socket_info[ALL_PROC] + : std::min(current_socket_info[ALL_PROC], model_prefer_threads); stream_info[THREADS_PER_STREAM] = n_threads_per_stream; - if (proc_type_table[numa_index][ALL_PROC] == proc_type_table[numa_index][MAIN_CORE_PROC]) { + if (current_socket_info[ALL_PROC] == current_socket_info[MAIN_CORE_PROC]) { stream_info[PROC_TYPE] = MAIN_CORE_PROC; - update_streams_per_node(MAIN_CORE_PROC, proc_type_table[numa_index]); - } else if (proc_type_table[numa_index][ALL_PROC] == proc_type_table[numa_index][EFFICIENT_CORE_PROC]) { + update_streams_per_node(MAIN_CORE_PROC, current_socket_info); + } else if (current_socket_info[ALL_PROC] == current_socket_info[EFFICIENT_CORE_PROC]) { stream_info[PROC_TYPE] = EFFICIENT_CORE_PROC; - update_streams_per_node(EFFICIENT_CORE_PROC, proc_type_table[numa_index]); + update_streams_per_node(EFFICIENT_CORE_PROC, current_socket_info); } else { stream_info[PROC_TYPE] = ALL_PROC; - update_mix_stream_info(proc_type_table[numa_index], - {proc_type_table[numa_index]}, + update_mix_stream_info(current_socket_info, + proc_type_table, n_threads_per_stream, IStreamsExecutor::Config::StreamsMode::SUB_STREAMS_NULL, ALL_PROC); } - update_ids_method(proc_type_table[numa_index]); + update_ids_method(current_socket_info); } } else { n_threads = diff --git a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp index 908c8802981ab8..57f9f5c5d72a14 100644 --- a/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/streams_info/streams_info_table_test.cpp @@ -158,7 +158,11 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_3 = { {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}, {52, 26, 0, 26, 3, 1}}, - {{1, ALL_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, HYPER_THREADING_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 104, -1, 0}, + {0, MAIN_CORE_PROC, 26, 0, 0}, + {0, MAIN_CORE_PROC, 26, 1, 0}, + {0, HYPER_THREADING_PROC, 26, 0, 0}, + {0, HYPER_THREADING_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_platform_4 = { 1, @@ -170,7 +174,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_platform_4 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_socket_1 = { 1, @@ -210,7 +214,11 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_3 = { {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}, {52, 26, 0, 26, 3, 1}}, - {{1, ALL_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, HYPER_THREADING_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 104, -1, 0}, + {0, MAIN_CORE_PROC, 26, 0, 0}, + {0, MAIN_CORE_PROC, 26, 1, 0}, + {0, HYPER_THREADING_PROC, 26, 0, 0}, + {0, HYPER_THREADING_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_socket_4 = { 1, @@ -222,7 +230,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_4 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_socket_5 = { 1, @@ -234,7 +242,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_5 = { "LATENCY", {}, {{60, 60, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {10, 10, 0, 0, 1, 0}, {20, 20, 0, 0, 2, 1}, {20, 20, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 10, 0, 0}}, + {{1, ALL_PROC, 20, -1, 0}, {0, MAIN_CORE_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 10, 1, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_socket_6 = { 1, @@ -246,7 +254,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_6 = { "LATENCY", {}, {{60, 60, 0, 0, -1, -1}, {10, 10, 0, 0, 0, 0}, {20, 20, 0, 0, 1, 1}, {10, 10, 0, 0, 2, 0}, {20, 20, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 10, 0, 0}}, + {{1, ALL_PROC, 20, -1, 0}, {0, MAIN_CORE_PROC, 10, 0, 0}, {0, MAIN_CORE_PROC, 10, 2, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_socket_7 = { 1, @@ -258,7 +266,7 @@ StreamsCalculationTestCase _2sockets_104cores_latency_socket_7 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_104cores_latency_socket_8 = { 1, @@ -2349,7 +2357,11 @@ StreamsCalculationTestCase _2sockets_mock_latency_26 = { {60, 30, 0, 30, 1, 0}, {40, 20, 0, 20, 2, 1}, {20, 10, 0, 10, 3, 1}}, - {{1, ALL_PROC, 80, 0, 0}, {0, MAIN_CORE_PROC, 40, 0, 0}, {0, HYPER_THREADING_PROC, 40, 0, 0}}, + {{1, ALL_PROC, 140, -1, 0}, + {0, MAIN_CORE_PROC, 40, 0, 0}, + {0, MAIN_CORE_PROC, 30, 1, 0}, + {0, HYPER_THREADING_PROC, 40, 0, 0}, + {0, HYPER_THREADING_PROC, 30, 1, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_27 = { 1, @@ -2365,7 +2377,11 @@ StreamsCalculationTestCase _2sockets_mock_latency_27 = { {60, 30, 0, 30, 1, 0}, {40, 20, 0, 20, 2, 1}, {20, 10, 0, 10, 3, 1}}, - {{1, ALL_PROC, 80, 0, 0}, {0, MAIN_CORE_PROC, 40, 0, 0}, {0, HYPER_THREADING_PROC, 40, 0, 0}}, + {{1, ALL_PROC, 140, -1, 0}, + {0, MAIN_CORE_PROC, 40, 0, 0}, + {0, MAIN_CORE_PROC, 30, 1, 0}, + {0, HYPER_THREADING_PROC, 40, 0, 0}, + {0, HYPER_THREADING_PROC, 30, 1, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_28 = { 1, @@ -2635,7 +2651,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_39 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_40 = { 1, @@ -2647,7 +2663,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_40 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}, {26, 26, 0, 0, 0, 0}}, - {{1, MAIN_CORE_PROC, 26, 1, 0}}, + {{1, ALL_PROC, 52, -1, 0}, {0, MAIN_CORE_PROC, 26, 1, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_41 = { 1, @@ -2659,7 +2675,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_41 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 2, 1}, {26, 26, 0, 0, 3, 1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}}, - {{1, MAIN_CORE_PROC, 26, 2, 1}}, + {{1, ALL_PROC, 52, -1, 1}, {0, MAIN_CORE_PROC, 26, 2, 1}, {0, MAIN_CORE_PROC, 26, 3, 1}}, }; StreamsCalculationTestCase _2sockets_mock_latency_42 = { 1, @@ -2671,7 +2687,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_42 = { "LATENCY", {}, {{104, 104, 0, 0, -1, -1}, {26, 26, 0, 0, 3, 1}, {26, 26, 0, 0, 0, 0}, {26, 26, 0, 0, 1, 0}, {26, 26, 0, 0, 2, 1}}, - {{1, MAIN_CORE_PROC, 26, 3, 1}}, + {{1, ALL_PROC, 52, -1, 1}, {0, MAIN_CORE_PROC, 26, 3, 1}, {0, MAIN_CORE_PROC, 26, 2, 1}}, }; StreamsCalculationTestCase _2sockets_mock_latency_43 = { 1, @@ -2687,7 +2703,11 @@ StreamsCalculationTestCase _2sockets_mock_latency_43 = { {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}, {52, 26, 0, 26, 3, 1}}, - {{1, ALL_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 26, 0, 0}, {0, HYPER_THREADING_PROC, 26, 0, 0}}, + {{1, ALL_PROC, 104, -1, 0}, + {0, MAIN_CORE_PROC, 26, 0, 0}, + {0, MAIN_CORE_PROC, 26, 1, 0}, + {0, HYPER_THREADING_PROC, 26, 0, 0}, + {0, HYPER_THREADING_PROC, 26, 1, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_44 = { 1, @@ -2703,7 +2723,11 @@ StreamsCalculationTestCase _2sockets_mock_latency_44 = { {52, 26, 0, 26, 0, 0}, {52, 26, 0, 26, 1, 0}, {52, 26, 0, 26, 2, 1}}, - {{1, ALL_PROC, 52, 3, 1}, {0, MAIN_CORE_PROC, 26, 3, 1}, {0, HYPER_THREADING_PROC, 26, 3, 1}}, + {{1, ALL_PROC, 104, -1, 1}, + {0, MAIN_CORE_PROC, 26, 3, 1}, + {0, MAIN_CORE_PROC, 26, 2, 1}, + {0, HYPER_THREADING_PROC, 26, 3, 1}, + {0, HYPER_THREADING_PROC, 26, 2, 1}}, }; StreamsCalculationTestCase _2sockets_mock_latency_45 = { 1, @@ -2715,7 +2739,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_45 = { "LATENCY", {}, {{208, 208, 0, 0, -1, -1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 0}, {52, 52, 0, 0, 2, 1}, {52, 52, 0, 0, 3, 1}}, - {{1, MAIN_CORE_PROC, 52, 0, 0}}, + {{1, ALL_PROC, 104, -1, 0}, {0, MAIN_CORE_PROC, 52, 0, 0}, {0, MAIN_CORE_PROC, 52, 1, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_46 = { 1, @@ -2727,7 +2751,7 @@ StreamsCalculationTestCase _2sockets_mock_latency_46 = { "LATENCY", {}, {{208, 208, 0, 0, -1, -1}, {52, 52, 0, 0, 2, 1}, {52, 52, 0, 0, 3, 1}, {52, 52, 0, 0, 0, 0}, {52, 52, 0, 0, 1, 0}}, - {{1, MAIN_CORE_PROC, 52, 2, 1}}, + {{1, ALL_PROC, 104, -1, 1}, {0, MAIN_CORE_PROC, 52, 2, 1}, {0, MAIN_CORE_PROC, 52, 3, 1}}, }; StreamsCalculationTestCase _2sockets_mock_latency_47 = { 1, @@ -2743,7 +2767,11 @@ StreamsCalculationTestCase _2sockets_mock_latency_47 = { {104, 52, 0, 52, 1, 0}, {104, 52, 0, 52, 2, 1}, {104, 52, 0, 52, 3, 1}}, - {{1, ALL_PROC, 104, 0, 0}, {0, MAIN_CORE_PROC, 52, 0, 0}, {0, HYPER_THREADING_PROC, 52, 0, 0}}, + {{1, ALL_PROC, 208, -1, 0}, + {0, MAIN_CORE_PROC, 52, 0, 0}, + {0, MAIN_CORE_PROC, 52, 1, 0}, + {0, HYPER_THREADING_PROC, 52, 0, 0}, + {0, HYPER_THREADING_PROC, 52, 1, 0}}, }; StreamsCalculationTestCase _2sockets_mock_latency_48 = { 1, @@ -2759,7 +2787,11 @@ StreamsCalculationTestCase _2sockets_mock_latency_48 = { {104, 52, 0, 52, 0, 0}, {104, 52, 0, 52, 1, 0}, {104, 52, 0, 52, 2, 1}}, - {{1, ALL_PROC, 104, 3, 1}, {0, MAIN_CORE_PROC, 52, 3, 1}, {0, HYPER_THREADING_PROC, 52, 3, 1}}, + {{1, ALL_PROC, 208, -1, 1}, + {0, MAIN_CORE_PROC, 52, 3, 1}, + {0, MAIN_CORE_PROC, 52, 2, 1}, + {0, HYPER_THREADING_PROC, 52, 3, 1}, + {0, HYPER_THREADING_PROC, 52, 2, 1}}, }; StreamsCalculationTestCase _2sockets_mock_latency_49 = { 1, From 9b60600dda82bf5e6a852f3844ef7b5cc4d57ce8 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Thu, 5 Dec 2024 23:36:24 +0800 Subject: [PATCH 04/43] [Test] Fix test cases of Reduce node with empty input (#27849) ### Details: - *This is a fix about some of the test cases of https://github.com/openvinotoolkit/openvino/pull/27603. The test cases are modified so that all of them will cover the new added functionality in Reduce layer, i.e., input tensor is empty while output tensor is not empty.* The PR is cherry-picked from the last commit in https://github.com/openvinotoolkit/openvino/pull/27813. --- .../single_layer_tests/reduce_ops.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp index 9b7ae687e9c81d..bffb0787333185 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/single_layer_tests/reduce_ops.cpp @@ -32,7 +32,7 @@ const std::vector> input_shapes = { const std::vector> input_shapes_0_dim = { std::vector{2, 0, 4, 1}, std::vector{8, 0, 4, 0}, - std::vector{0, 0, 0, 0}, + std::vector{2, 3, 4, 0}, }; const std::vector> input_shapes_one_axis = { @@ -60,6 +60,11 @@ const std::vector> axes = { {1, -1} }; +const std::vector> axes_0_dim = { + {1, 3}, + {0, 1, 3} +}; + std::vector op_types = { ov::test::utils::OpType::SCALAR, ov::test::utils::OpType::VECTOR, @@ -174,7 +179,7 @@ const auto params_reduction_types = testing::Combine( ); const auto params_empty_input = testing::Combine( - testing::ValuesIn(axes), + testing::ValuesIn(axes_0_dim), testing::Values(op_types[1]), testing::ValuesIn(keep_dims), testing::ValuesIn(reduction_types), From 4d5d546562474f476a469724035918534e364bf9 Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Thu, 5 Dec 2024 08:40:04 -0800 Subject: [PATCH 05/43] Revert "[Frontends][TorchFX] torch.ops.aten._unsafe_index support (#27909) This reverts commit 147d0af7bea9161cb5be9d9c3d23d70be06b54f5. #27617 --- src/frontends/pytorch/src/op_table.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index ed375fd742d7ed..a73c13814d7663 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -867,7 +867,6 @@ const std::unordered_map get_supported_ops_fx() { {"aten.hardtanh.default", op::translate_hardtanh}, {"aten.hardtanh_.default", op::inplace_op}, {"aten.index.Tensor", op::translate_index_fx}, - {"aten._unsafe_index.Tensor", op::translate_index_fx}, {"aten.index_select.default", op::translate_index_select}, {"aten.isfinite.default", op::translate_1to1_match_1_inputs}, {"aten.isinf.default", op::translate_1to1_match_1_inputs}, From be4d2914d971694af7e18ca73c41117fb53c0aa5 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Thu, 5 Dec 2024 17:58:59 +0100 Subject: [PATCH 06/43] Add 2024.6 to conflicting versions (#27931) https://github.com/openvinotoolkit/openvino/pull/27929/files#r1871180640 Signed-off-by: Alina Kladieva --- cmake/packaging/debian.cmake | 1 + cmake/packaging/rpm.cmake | 1 + 2 files changed, 2 insertions(+) diff --git a/cmake/packaging/debian.cmake b/cmake/packaging/debian.cmake index 59b312963c180d..c82dca0364b463 100644 --- a/cmake/packaging/debian.cmake +++ b/cmake/packaging/debian.cmake @@ -99,6 +99,7 @@ macro(ov_cpack_settings) 2024.3.0 2024.4.0 2024.5.0 + 2024.6.0 ) ov_check_conflicts_versions(conflicting_versions) diff --git a/cmake/packaging/rpm.cmake b/cmake/packaging/rpm.cmake index a4a63c35858bf9..6e9d535d41cfff 100644 --- a/cmake/packaging/rpm.cmake +++ b/cmake/packaging/rpm.cmake @@ -87,6 +87,7 @@ macro(ov_cpack_settings) 2024.3.0 2024.4.0 2024.5.0 + 2024.6.0 ) ov_check_conflicts_versions(conflicting_versions) From a1f4571f42d16a34f060e89227f9af5e7613702f Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 5 Dec 2024 23:04:18 +0400 Subject: [PATCH 07/43] Fixed RTTI for exceptions (#27937) ### Details: - We need to export virtual functions to fix RTTI --- src/core/include/openvino/core/except.hpp | 2 ++ src/core/src/except.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/core/include/openvino/core/except.hpp b/src/core/include/openvino/core/except.hpp index fdb3746d323350..a923cd98c7e576 100644 --- a/src/core/include/openvino/core/except.hpp +++ b/src/core/include/openvino/core/except.hpp @@ -62,6 +62,7 @@ class OPENVINO_API AssertFailure : public Exception { const char* check_string, const std::string& context_info, const std::string& explanation); + virtual ~AssertFailure(); protected: explicit AssertFailure(const std::string& what_arg) : ov::Exception(what_arg) {} @@ -71,6 +72,7 @@ class OPENVINO_API AssertFailure : public Exception { class OPENVINO_API NotImplemented : public AssertFailure { public: [[noreturn]] static void create(const char* file, int line, const std::string& explanation); + virtual ~NotImplemented(); static const std::string default_msg; diff --git a/src/core/src/except.cpp b/src/core/src/except.cpp index 6ce0568e04e387..7cddc5b3ec4a52 100644 --- a/src/core/src/except.cpp +++ b/src/core/src/except.cpp @@ -45,8 +45,12 @@ void ov::AssertFailure::create(const char* file, throw ov::AssertFailure(make_what(file, line, check_string, context_info, explanation)); } +ov::AssertFailure::~AssertFailure() = default; + void ov::NotImplemented::create(const char* file, int line, const std::string& explanation) { throw ov::NotImplemented(make_what(file, line, nullptr, default_msg, explanation)); } +ov::NotImplemented::~NotImplemented() = default; + const std::string ov::NotImplemented::default_msg{"Not Implemented"}; From ea72f3047cb2fca24a89bed7c6069d4abb960a99 Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Fri, 6 Dec 2024 10:31:49 +0900 Subject: [PATCH 08/43] [GPU] Fix perf regression due to PR #27573 (#27898) ### Details: - *Remove unecessary memory copy.* - *Tests already exists.* ### Tickets: - *158009* --- .../intel_gpu/src/plugin/sync_infer_request.cpp | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index ae12ed087bc02d..f87f9af5275722 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -465,21 +465,6 @@ void SyncInferRequest::wait() { iremote_tensor_ptr->copy_from(plugin_tensor.ptr); } } - } else if (!is_dynamic && is_remote_tensor_impl && output_memory) { - auto& stream = m_graph->get_network()->get_stream(); - auto user_mem = remote_tensor_impl_ptr->get_original_memory(); - if (user_mem->get_allocation_type() == cldnn::allocation_type::cl_mem - && output_memory->get_allocation_type() != cldnn::allocation_type::cl_mem) { - auto plugin_tensor = m_plugin_outputs.at(port_idx); - if (is_convert_required(plugin_tensor.ptr->get_element_type(), iremote_tensor_ptr->get_element_type())) { - auto& stream = m_graph->get_network()->get_stream(); - convert_and_copy(plugin_tensor.ptr.get(), iremote_tensor_ptr.get(), stream); - } else { - iremote_tensor_ptr->copy_from(plugin_tensor.ptr); - } - } else { - copy_events.push_back(output_memory->copy_to(stream, *user_mem, false)); - } } else if (is_remote_tensor_impl && is_dynamic) { auto& stream = m_graph->get_network()->get_stream(); auto user_mem = remote_tensor_impl_ptr->get_original_memory(); From d62effb86b50781efa24af18a8af77bef9bd11db Mon Sep 17 00:00:00 2001 From: Luo Cheng Date: Fri, 6 Dec 2024 13:24:22 +0800 Subject: [PATCH 09/43] [CPU] Optimize small batch case for PagedAttention (#27847) ### Details: - *Generate more work items to avoid thread imbalance* - *...* ### Tickets: - *[156347](https://jira.devtools.intel.com/browse/CVS-156347)* - *[158477](https://jira.devtools.intel.com/browse/CVS-158477)* --- .../nodes/kernels/scaled_attn/executor_pa.cpp | 105 ++++++++++++++---- 1 file changed, 86 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp index bef34881ca41bc..90167ac86a8e1a 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/scaled_attn/executor_pa.cpp @@ -939,14 +939,14 @@ struct MHAHelper { // wv_scratch_b: [rnd_up(kv_len, block_size), Hk, scratch_b_size] void exec_kernel_multiple(const PlainTensor& query, const PlainTensor& present_value, const PlainTensor& output_emb, const PlainTensor& qk_scratch_b, const PlainTensor& wv_scratch_b, const int32_t* block_table, size_t ithr, size_t q_blk, - size_t hk, size_t q_len, size_t cur_kv_len, const PlainTensor& alibi_slopes, float* score_output) { + size_t hq_beg, size_t hq_end, size_t hk, size_t q_len, size_t cur_kv_len, const PlainTensor& alibi_slopes, float* score_output) { auto q_start = q_blk * _block_size; auto q_end = std::min(q_start + _block_size, q_len); auto q_cnt = q_end - q_start; constexpr bool q_is_xf16 = one_of(precision_of::value, ov::element::bf16, ov::element::f16); constexpr bool q_cache_is_same = precision_of::value == precision_of::value; auto cur_kv_len_blocks = div_up(cur_kv_len, _block_size); - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { auto* q_ptr = query.ptr(h, q_start, 0); float* c_ptr = _weight.ptr(ithr, h, 0, 0); // for each query block, loop through all key block @@ -1065,13 +1065,14 @@ struct MHAHelper { // weight: [nthr, H, 32, rnd_up(kv_len, block_size)] // output: [nthr, 32, H, S] void exec_kernel_one_bh(const PlainTensor& query, const PlainTensor& present_key, const PlainTensor& present_value, const PlainTensor& output_emb, - const int32_t* block_table, size_t ithr, size_t hk, size_t q_len, size_t cur_kv_len, const PlainTensor& alibi_slopes, float* score_output) { + const int32_t* block_table, size_t ithr, size_t hq_beg, size_t hq_end, size_t hk, + size_t q_len, size_t cur_kv_len, const PlainTensor& alibi_slopes, float* score_output) { if (one_of(_fastpath_valid_prec, ov::element::bf16, ov::element::f16)) { _gemv->tile_config(); for (size_t pk = 0, i = 0; pk < cur_kv_len; pk += _block_size, i++) { auto block_number = block_table[i]; for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { (*_gemv)(query.ptr(h, pq), present_key.ptr(block_number, hk), _weight.ptr(ithr, h, pq) + pk); } @@ -1082,7 +1083,7 @@ struct MHAHelper { for (size_t pk = 0, i = 0; pk < cur_kv_len; pk += _block_size, i++) { auto block_number = block_table[i]; for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { dot_product_block(query.ptr(h, pq), present_key.ptr(block_number, hk), _weight.ptr(ithr, h, pq) + pk, _S, std::min(_block_size, cur_kv_len - pk)); } @@ -1091,7 +1092,7 @@ struct MHAHelper { } for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { // apply attention mask & sofmax float* alibi_lookup = nullptr; float alibi_slope = 0.f; @@ -1122,7 +1123,7 @@ struct MHAHelper { auto block_number = block_table[i]; auto* v = present_value.ptr(block_number, hk); for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { attn_acc_value_block(_output.ptr(ithr, pq, h), _weight.ptr(ithr, h, pq) + pv, v, @@ -1133,7 +1134,7 @@ struct MHAHelper { } // convert to dst for (size_t pq = 0; pq < q_len; pq++) - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) + for (size_t h = hq_beg; h < hq_end; h++) cvt_copy(output_emb.ptr(pq, h * _SV), _output.ptr(ithr, pq, h), _SV); } @@ -1162,8 +1163,38 @@ struct MHAHelper { // aligned to cache line (64bytes=16*sizeof(float)) to avoid false sharing _weight_bhl.resize({B, _H, q_len, rnd_up(max_context_len, std::max(_block_size, size_t{16}))}); - parallel_for3d_dynamic(B, kv_len_in_blocks, _Hk, [&](size_t b, size_t pk_in_blocks, size_t hk) { + // for small batches dynamic scheduler has notable overhead + bool prefer_static_loop; + // if less than 2 work items per thread, loop H + bool loop_hk = B * kv_len_in_blocks * _Hk <= 2 * _nthr ? false : true; + if (B <= 32) { + prefer_static_loop = true; + // small batch and all batch size is same(like SDPA case) + auto kv_len = past_lens.ptr()[0]; + for (size_t b = 1; b < B; b++) { + if (past_lens.ptr()[b] != kv_len) + prefer_static_loop = false; + } + } else { + // for bigger batch skip the test to save the cost + prefer_static_loop = false; + } + auto get_h_params = [] (bool loop_hk, size_t hx, size_t h_each_group_len, size_t& hq_beg, size_t& hq_end, size_t& hk) { + if (loop_hk) { + hk = hx; + hq_beg = hk * h_each_group_len; + hq_end = (hk + 1) * h_each_group_len; + } else { + hq_beg = hx; + hq_end = hx + 1; + hk = hx / h_each_group_len; + } + }; + auto loop_qk = [&](size_t b, size_t pk_in_blocks, size_t hx) { auto context_len = static_cast(past_lens.ptr()[b]) + 1; + size_t hk, hq_beg, hq_end; + get_h_params(loop_hk, hx, _h_each_group_len, hq_beg, hq_end, hk); + // kv_len must be valid auto pk = pk_in_blocks * _block_size; if (pk < context_len) { @@ -1171,7 +1202,7 @@ struct MHAHelper { if (one_of(_fastpath_valid_prec, ov::element::bf16, ov::element::f16)) { _gemv->tile_config(); for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { (*_gemv)(query.ptr(b, h, pq), present_key.ptr(block_number, hk), _weight_bhl.ptr(b, h, pq) + pk); } @@ -1179,16 +1210,16 @@ struct MHAHelper { _gemv->tile_release(); } else { for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { dot_product_block(query.ptr(b, h, pq), present_key.ptr(block_number, hk), _weight_bhl.ptr(b, h, pq) + pk, _S, std::min(_block_size, context_len - pk)); } } } } - }); + }; - parallel_for3d_dynamic(B, _H, q_len, [&](size_t b, size_t h, size_t pq) { + auto loop_softmax = [&](size_t b, size_t h, size_t pq) { auto cur_kv_len = static_cast(past_lens.ptr()[b]) + 1; auto ncausal = cur_kv_len; // apply attention mask & sofmax @@ -1210,7 +1241,16 @@ struct MHAHelper { ov::element::f32, ov::element::f32, alibi_slope); - }); + }; + + size_t h_dims = loop_hk ? _Hk : _H; + if (prefer_static_loop) { + parallel_for3d(B, kv_len_in_blocks, h_dims, loop_qk); + parallel_for3d(B, _H, q_len, loop_softmax); + } else { + parallel_for3d_dynamic(B, kv_len_in_blocks, h_dims, loop_qk); + parallel_for3d_dynamic(B, _H, q_len, loop_softmax); + } if (output_score) { parallel_for2d_dynamic(B, q_len, [&](size_t b, size_t pq) { @@ -1229,16 +1269,19 @@ struct MHAHelper { memset(_output_bhl.ptr(ithr, 0, 0, 0, 0), 0, _output_bhl.stride(0) * sizeof(float)); }); - parallel_for3d_dynamic(B, kv_len_in_blocks, _Hk, [&](size_t b, size_t pv_in_blocks, size_t hk) { + auto loop_wk = [&](size_t b, size_t pv_in_blocks, size_t hx) { auto ithr = parallel_get_thread_num(); auto context_len = static_cast(past_lens.ptr()[b]) + 1; auto pv = pv_in_blocks * _block_size; + size_t hk, hq_beg, hq_end; + get_h_params(loop_hk, hx, _h_each_group_len, hq_beg, hq_end, hk); + // kv_len must be valid if (pv < context_len) { auto block_number = block_indices.ptr()[block_indices_begins.ptr()[b] + pv_in_blocks]; auto* v = present_value.ptr(block_number, hk); for (size_t pq = 0; pq < q_len; pq++) { - for (size_t h = hk * _h_each_group_len; h < (hk + 1) * _h_each_group_len; h++) { + for (size_t h = hq_beg; h < hq_end; h++) { attn_acc_value_block(_output_bhl.ptr(ithr, b, pq, h), _weight_bhl.ptr(b, h, pq) + pv, v, @@ -1247,7 +1290,13 @@ struct MHAHelper { } } } - }); + }; + + if (prefer_static_loop) { + parallel_for3d(B, kv_len_in_blocks, loop_hk ? _Hk : _H, loop_wk); + } else { + parallel_for3d_dynamic(B, kv_len_in_blocks, loop_hk ? _Hk : _H, loop_wk); + } parallel_for3d(B, _H, q_len, [&](size_t b, size_t h, size_t pq) { auto* temp = _output_bhl.ptr(0, b, pq, h); @@ -1416,7 +1465,23 @@ struct MHA { } }); - parallel_for2d_dynamic(attn_work_count, Hk, [&](size_t w, size_t hk) { + // loop along HK dimension: if mixed first/second token and elements count is enough, loop HK to reuse KV in the CPU cache + // else if elements count is small, prefer to loop H to get more work to avoid thread imbalance + bool loop_hk = _workitems.get_reorder_max_batch_size() == past_lens.m_dims[0] || // if only first token, loop H + attn_work_count * Hk <= 2 * _helper._nthr ? false : true; // or less than 2 work items per thread, loop H + + parallel_for2d_dynamic(attn_work_count, loop_hk ? Hk : _helper._H, [&](size_t w, size_t hx) { + size_t hk, hq_beg, hq_end; + if (loop_hk) { + hk = hx; + hq_beg = hk * _helper._h_each_group_len; + hq_end = (hk + 1) * _helper._h_each_group_len; + } else { + hq_beg = hx; + hq_end = hx + 1; + hk = hx / _helper._h_each_group_len; + } + const auto& item = _workitems.get_attn_work_item(w); const auto batch_in_seq = item.batch_in_seq; const auto batch_in_token = subsequence_begins.ptr()[batch_in_seq]; @@ -1434,7 +1499,7 @@ struct MHA { _helper.exec_kernel_one_bh(q.slice(0, batch_in_token, batch_in_token), k_cache, v_cache, output_emb.slice(0, batch_in_token, batch_in_token), block_indices.ptr() + block_indices_begins.ptr()[batch_in_seq], - ithr, hk, 1ul, cur_kv_len, alibi_slopes, + ithr, hq_beg, hq_end, hk, 1ul, cur_kv_len, alibi_slopes, score_output); } else { const auto batch_in_reorder = item.batch_in_reorder; @@ -1461,6 +1526,8 @@ struct MHA { block_indices.ptr() + block_indices_begins.ptr()[batch_in_seq], ithr, q_blk, + hq_beg, + hq_end, hk, q_len, cur_kv_len, From 0dd7434e299bbb2e85a936ab42e9a8bc40729f75 Mon Sep 17 00:00:00 2001 From: Egor Duplenskii Date: Fri, 6 Dec 2024 06:43:59 +0100 Subject: [PATCH 10/43] [CPU][Refactoring] Introduce VariableExecutor (#27883) Depending on the parameters a `FullyConnected` node can use one or multiple executors. With the current approach, even when just a single executor is used, every `prepareParams()` (executor::update()) call goes through executor selection routine. The idea is to avoid such `prepareParams()` overhead for a single executor scenarious, which are probably the most common ones. Thus, split the pipeline input two branches: - only single simple executor is used and updated - a `VariableExecutor` is used and updated. `VariableExecutor` contains two or more simple executors --- .../executors/dnnl/dnnl_fullyconnected.hpp | 4 +- .../src/nodes/executors/executor_factory.hpp | 201 ++++-------------- .../fullyconnected_implementations.cpp | 1 + .../src/nodes/executors/graph_emitter.hpp | 46 +++- .../src/nodes/executors/variable_executor.hpp | 140 ++++++++++++ .../intel_cpu/src/nodes/fullyconnected.cpp | 18 +- .../intel_cpu/src/nodes/fullyconnected.h | 4 +- 7 files changed, 232 insertions(+), 182 deletions(-) create mode 100644 src/plugins/intel_cpu/src/nodes/executors/variable_executor.hpp diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp index 3266bf8965c37b..1d078feaa6549b 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected.hpp @@ -8,12 +8,12 @@ #include #include "cpu_memory.h" -#include "nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp" -#include "nodes/executors/dnnl/dnnl_convolution_primitive.hpp" #include "nodes/executors/dnnl/dnnl_aliases.hpp" +#include "nodes/executors/dnnl/dnnl_utils.hpp" #include "nodes/executors/executor.hpp" #include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/executors/memory_arguments.hpp" +#include "post_ops.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp index 419ab4abf52cd7..f12795d5d1eb16 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2022 Intel Corporation +// Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -6,50 +6,22 @@ #include #include -#include #include "executor.hpp" -#include "nodes/executors/implementations.hpp" #include "nodes/executors/executor_config.hpp" #include "nodes/executors/executor_implementation.hpp" #include "nodes/executors/graph_emitter.hpp" +#include "nodes/executors/implementations.hpp" #include "nodes/executors/memory_arguments.hpp" #include "nodes/executors/printers.hpp" -#include "openvino/core/except.hpp" +#include "nodes/executors/variable_executor.hpp" #include "post_ops.hpp" namespace ov { namespace intel_cpu { using namespace executor; -template -static ExecutorPtr fallback(const executor::Config& config, - const executor::Config& fallbackConfig, - const MemoryArgs& memory, - const ExecutorContext::CPtr context, - const std::string& name) { - DEBUG_LOG("Falling back to graph executor for ", - name, - ". Original config: ", - config, - " new config:", - fallbackConfig); - - GraphEmitter graphEmitter(config.descs, config.attrs, config.postOps, memory, context, name); - - const auto& graphExecutor = - graphEmitter.createGraph(fallbackConfig.descs, fallbackConfig.attrs, fallbackConfig.postOps, context) - .ensureAttrsMatch() - .ensureSrcDescsMatch() - .ensureDstDescsMatch() - .ensurePostOpsMatch() - .emit(); - (void)graphExecutor; - - OPENVINO_THROW("Fallback logic is not implemented yet"); // return graphExecutor; -} - -template +template class ExecutorFactory { public: using ExecutorImplementationRef = std::reference_wrapper>; @@ -62,9 +34,7 @@ class ExecutorFactory { : m_attrs(attrs), m_postOps(postOps), m_context(context), - m_suitableImplementations(filter(m_attrs, m_postOps, descriptors, implementationPriority)), - m_implementationRequiresFallback(m_suitableImplementations.size(), true), - m_executors(m_suitableImplementations.size()) {} + m_suitableImplementations(filter(m_attrs, m_postOps, descriptors, implementationPriority)) {} /** * @brief Retrieves the proper memory descriptors based on the provided memory descriptors. @@ -95,104 +65,42 @@ class ExecutorFactory { } /** - * @brief Preconfigures an executor based on the provided memory arguments. - * - * Preconfigures an executor by selecting an appropriate implementation based on the provided - * memory arguments and by creating an executor using the implementation. - * - * @param memory The memory parameters used for selecting the appropriate executor implementation. - * - * @note The main use case is to offload executor data preparation (i.e. weights packing) - * From the make() call - * @todo Currently supports creating a single executor. - * For some nodes it can be worth to preconfigure all the executors. - */ - void preconfigure(const MemoryArgs& memory) { - executor::Config config{memoryDescsFromMemory(memory), m_attrs, m_postOps}; - - cacheFallbackStatus(config); - - const size_t implId = select(memory, 0); - const auto& impl = m_suitableImplementations[implId].get(); - DEBUG_LOG("Preconfiguring executor: ", impl.name()); - - if (m_implementationRequiresFallback[implId]) { - if (auto fallbackConfig = impl.requiresFallback(config)) { - fallback(config, *fallbackConfig, memory, m_context, impl.name()); - } - } - - (void)create(implId, memory, m_context); - } - - /** - * @brief Creates an Executor instance based on provided memory arguments. + * @brief Creates an Executor instance based on the provided memory arguments. * - * Creates an Executor instance using the provided MemoryArgs, selecting an appropriate implementation - * based on the characteristics of the memory. It handles fallback scenarios if necessary and updates the executor - * with the given memory information. + * Depending on the number of available implementations, returns: + * - VariableExecutor, if the number of implementations is two or more + * - Simple Executor, if there is only one available implementation * * @param memory memory arguments. * * @return A shared pointer to the created Executor. - * - * The function follows the steps below: - * - Selects an implementation based on the provided memory using the select() function. - * - Retrieves the selected implementation and checks if fallback is required. - * - If fallback is required, it creates a fallback configuration and returns a fallback executor. - * - Otherwise creates the executor using the selected implementation. - * - Updates the executor with the given memory information. - * */ - ExecutorPtr make(MemoryArgs& memory) { - auto createExec = [this](MemoryArgs& memory, size_t implId) -> ExecutorPtr { - const auto& impl = m_suitableImplementations[implId].get(); - if (m_implementationRequiresFallback[implId]) { - executor::Config config{memoryDescsFromMemory(memory), m_attrs, m_postOps}; - if (auto fallbackConfig = impl.requiresFallback(config)) { - return fallback(config, *fallbackConfig, memory, m_context, impl.name()); - } - } - const auto executor = create(implId, memory, m_context); - if (!executor->update(memory)) { - return nullptr; + ExecutorPtr make(const MemoryArgs& memory) { + // only single executor is available + if (m_suitableImplementations.size() == 1) { + auto config = GraphEmitter::createConfig(memory, m_attrs, m_postOps); + + const auto& theOnlyImplementation = m_suitableImplementations.front().get(); + + if (const auto fallbackConfig = theOnlyImplementation.requiresFallback(config)) { + return GraphEmitter::fallback(config, + *fallbackConfig, + memory, + m_context, + theOnlyImplementation.name()); } - return executor; - }; - - auto implId = select(memory, 0); - auto executor = createExec(memory, implId); - while (!executor) { - implId = select(memory, ++implId); - executor = createExec(memory, implId); - } - return executor; - } -private: - static MemoryDescArgs memoryDescsFromMemory(const MemoryArgs& memory) { - MemoryDescArgs memoryDescs; - memoryDescs.reserve(memory.size()); - - for (const auto& mem : memory) { - memoryDescs[mem.first] = mem.second->getDescPtr(); + return theOnlyImplementation.create(m_attrs, m_postOps, memory, m_context); } - return memoryDescs; - } - - /** - * @brief Caches the fallback status for each suitable implementation. - */ - void cacheFallbackStatus(const executor::Config& config) { - std::transform(m_suitableImplementations.begin(), - m_suitableImplementations.end(), - m_implementationRequiresFallback.begin(), - [&config](const ExecutorImplementationRef& impl) { - return impl.get().requiresFallback(config); - }); + return std::make_shared>(memory, + m_attrs, + m_postOps, + m_context, + m_suitableImplementations); } +private: /** * @brief Filters and retrieves suitable implementations based on the provided executor configuration. * @@ -205,11 +113,10 @@ class ExecutorFactory { * @note If an implementation is shape agnostic, no further implementations with lower * priority are considered. */ - static std::vector filter( - const Attrs& attrs, - const PostOps& postOps, - const MemoryDescArgs& descs, - const std::string& implementationPriority = {}) { + static std::vector filter(const Attrs& attrs, + const PostOps& postOps, + const MemoryDescArgs& descs, + const std::string& implementationPriority = {}) { const auto& implementations = getImplementations(); std::vector suitableImplementations; const executor::Config config{descs, attrs, postOps}; @@ -244,51 +151,17 @@ class ExecutorFactory { return suitableImplementations; } - size_t select(const MemoryArgs& memory, const size_t startIdx) const { - OPENVINO_ASSERT(startIdx < m_suitableImplementations.size(), - "Failed to find an implementation since start indx: ", startIdx, - " is out of range of the suitable implementations array: ", m_suitableImplementations.size()); - auto startIt = m_suitableImplementations.begin(); - std::advance(startIt, startIdx); - const auto selectedImplementation = - std::find_if(startIt, - m_suitableImplementations.end(), - [&memory](const ExecutorImplementationRef& implementation) { - return implementation.get().shapeAgnostic() || implementation.get().acceptsShapes(memory); - }); - OPENVINO_ASSERT(selectedImplementation != m_suitableImplementations.end(), "Failed to select an implemetation"); - - return std::distance(m_suitableImplementations.begin(), selectedImplementation); - } - - ExecutorPtr create(const size_t implId, - const MemoryArgs& memory, - const ExecutorContext::CPtr context) { - assert(implId < m_executors.size() && implId < m_suitableImplementations.size()); - - if (!m_executors[implId]) { - const auto& impl = m_suitableImplementations[implId].get(); - m_executors[implId] = impl.create(m_attrs, m_postOps, memory, context); - } - - return m_executors[implId]; - } - const Attrs& m_attrs; const PostOps& m_postOps; const ExecutorContext::CPtr m_context; std::vector m_suitableImplementations; - // stores fallback status to avoid performing the check for every make() call - std::vector m_implementationRequiresFallback; - // executors cache - std::vector m_executors; }; -template -using ExecutorFactoryPtr = std::shared_ptr>; +template +using ExecutorFactoryPtr = std::shared_ptr>; -template -using ExecutorFactoryCPtr = std::shared_ptr>; +template +using ExecutorFactoryCPtr = std::shared_ptr>; } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index 5834c3dda4b262..4cf6992985ecd3 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -11,6 +11,7 @@ #include "memory_desc/cpu_memory_desc.h" #include "nodes/executors/convolution_config.hpp" #include "nodes/executors/dnnl/dnnl_convolution_primitive.hpp" +#include "nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp" #include "nodes/executors/dnnl/dnnl_fullyconnected.hpp" #include "nodes/executors/dnnl/dnnl_matmul_primitive.hpp" #include "nodes/executors/dnnl/dnnl_shape_agnostic_data.hpp" diff --git a/src/plugins/intel_cpu/src/nodes/executors/graph_emitter.hpp b/src/plugins/intel_cpu/src/nodes/executors/graph_emitter.hpp index 6aad18c793c8cf..784ed8bc778840 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/graph_emitter.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/graph_emitter.hpp @@ -5,12 +5,11 @@ #pragma once #include -#include #include "graph.h" -#include "memory_desc/cpu_memory_desc.h" #include "node.h" #include "nodes/executors/executor.hpp" +#include "nodes/executors/executor_config.hpp" #include "post_ops.hpp" namespace ov { @@ -72,6 +71,49 @@ class GraphEmitter { return graph; } + static MemoryDescArgs memoryDescsFromMemory(const MemoryArgs& memory) { + MemoryDescArgs memoryDescs; + memoryDescs.reserve(memory.size()); + + for (const auto& mem : memory) { + memoryDescs[mem.first] = mem.second->getDescPtr(); + } + + return memoryDescs; + } + + static executor::Config createConfig(const MemoryArgs& memory, + const Attrs& attrs, + const PostOps& postOps) { + return executor::Config{memoryDescsFromMemory(memory), attrs, postOps}; + } + + static ExecutorPtr fallback(const executor::Config& config, + const executor::Config& fallbackConfig, + const MemoryArgs& memory, + const ExecutorContext::CPtr context, + const std::string& name) { + DEBUG_LOG("Falling back to graph executor for ", + name, + ". Original config: ", + config, + " new config:", + fallbackConfig); + + GraphEmitter graphEmitter(config.descs, config.attrs, config.postOps, memory, context, name); + + const auto& graphExecutor = + graphEmitter.createGraph(fallbackConfig.descs, fallbackConfig.attrs, fallbackConfig.postOps, context) + .ensureAttrsMatch() + .ensureSrcDescsMatch() + .ensureDstDescsMatch() + .ensurePostOpsMatch() + .emit(); + (void)graphExecutor; + + OPENVINO_THROW("Fallback logic is not implemented yet"); // return graphExecutor; + } + private: const MemoryDescArgs& descs; const Attrs& attrs; diff --git a/src/plugins/intel_cpu/src/nodes/executors/variable_executor.hpp b/src/plugins/intel_cpu/src/nodes/executors/variable_executor.hpp new file mode 100644 index 00000000000000..8dfb7a4c63fde4 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/executors/variable_executor.hpp @@ -0,0 +1,140 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "executor.hpp" +#include "executor_config.hpp" +#include "executor_implementation.hpp" +#include "nodes/executors/graph_emitter.hpp" + +namespace ov { +namespace intel_cpu { + +/** + * A stateful (variable) executor + * Contains two or more executors. + * Switches between the executors based on provided Memory (more precisely based on in / out shapes) + */ +template +class VariableExecutor : public Executor { +public: + using ExecutorImplementationRef = std::reference_wrapper>; + + VariableExecutor(const MemoryArgs& memory, + const Attrs& attrs, + const PostOps& postOps, + const ExecutorContext::CPtr context, + std::vector suitableImplementations) + : m_attrs(attrs), + m_postOps(postOps), + m_context(context), + m_suitableImplementations(std::move(suitableImplementations)), + m_implementationRequiresFallback( + cacheFallbackStatus(m_suitableImplementations, + GraphEmitter::createConfig(memory, m_attrs, m_postOps))), + m_executors(m_suitableImplementations.size()) { + const size_t implId = select(memory, 0); + m_executors[implId] = create(implId, memory); + m_implId = implId; + } + + bool update(const MemoryArgs& memory) override { + for (auto implId = select(memory, 0); implId < m_suitableImplementations.size(); + implId = select(memory, implId)) { + if (!m_executors[implId]) { + m_executors[implId] = create(implId, memory); + } + + if (m_executors[implId]->update(memory)) { + m_implId = implId; + return true; + } + } + + return false; + } + + void execute(const MemoryArgs& memory) override { + m_executors[m_implId]->execute(memory); + } + + impl_desc_type implType() const override { + return m_executors[m_implId]->implType(); + } + + void moveMemToNumaNode(int numaID) override { + m_executors[m_implId]->moveMemToNumaNode(numaID); + } + +private: + /** + * @brief Returns a fallback status for each suitable implementation. + */ + static std::vector cacheFallbackStatus(const std::vector& suitableImplementations, + const executor::Config& config) { + std::vector implementationRequiresFallback(suitableImplementations.size()); + std::transform(suitableImplementations.begin(), + suitableImplementations.end(), + implementationRequiresFallback.begin(), + [&config](const ExecutorImplementationRef& impl) { + return impl.get().requiresFallback(config); + }); + + return implementationRequiresFallback; + } + + size_t select(const MemoryArgs& memory, const size_t startIdx) const { + OPENVINO_ASSERT(startIdx < m_suitableImplementations.size(), + "Failed to find an implementation since start indx: ", + startIdx, + " is out of range of the suitable implementations array: ", + m_suitableImplementations.size()); + + auto startIt = m_suitableImplementations.begin() + startIdx; + + const auto selectedImplementation = + std::find_if(startIt, + m_suitableImplementations.end(), + [&memory](const ExecutorImplementationRef& implementation) { + return implementation.get().shapeAgnostic() || implementation.get().acceptsShapes(memory); + }); + + OPENVINO_ASSERT(selectedImplementation != m_suitableImplementations.end(), "Failed to select an implemetation"); + + return std::distance(m_suitableImplementations.begin(), selectedImplementation); + } + + ExecutorPtr create(const size_t implId, const MemoryArgs& memory) { + assert(implId < m_executors.size() && implId < m_suitableImplementations.size()); + + auto createWithFallback = [this](const size_t implId, const MemoryArgs& memory) { + const auto& impl = m_suitableImplementations[implId].get(); + + if (m_implementationRequiresFallback[implId]) { + auto config = GraphEmitter::createConfig(memory, m_attrs, m_postOps); + if (auto fallbackConfig = impl.requiresFallback(config)) { + return GraphEmitter::fallback(config, *fallbackConfig, memory, m_context, impl.name()); + } + } + + return impl.create(m_attrs, m_postOps, memory, m_context); + }; + + return createWithFallback(implId, memory); + } + + const Attrs& m_attrs; + const PostOps& m_postOps; + const ExecutorContext::CPtr m_context; + std::vector m_suitableImplementations; + // stores fallback status to avoid performing the check for every make() call + std::vector m_implementationRequiresFallback; + // executors cache + std::vector m_executors; + size_t m_implId; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 307125ef0069e0..31ae4f26cc08a1 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -66,7 +66,7 @@ void FullyConnected::initTensorParallelConfig(const GraphContext::CPtr context) // init tp_cfg.w_rank and tp_cfg.w_size tp_cfg.w_rank = context->getCPUStreamExecutor()->get_rank()[0]; tp_cfg.w_size = ov::threading::message_manager()->get_num_sub_streams(); - tp_cfg.enable_tensor_parallel = tp_cfg.w_size > 1 ? true : false; + tp_cfg.enable_tensor_parallel = tp_cfg.w_size > 1; tp_cfg.sub_memory = context->getSubMemory(); } } @@ -119,16 +119,12 @@ void FullyConnected::needPrepareParamsForTensorParallel() { } } -ExecutorPtr FullyConnected::createExecutor() { - const auto& executor = factory->make(memory); - getSelectedPrimitiveDescriptor()->setImplementationType(executor->implType()); - - return executor; -} - void FullyConnected::prepareParams() { needPrepareParamsForTensorParallel(); - executor = createExecutor(); + + executor->update(memory); + // @todo avoid updating implementation type in scope of every prepareParams call + getSelectedPrimitiveDescriptor()->setImplementationType(executor->implType()); } void FullyConnected::initTensorParallelSync() { @@ -431,7 +427,7 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { needUpdateZeroPointForTensorParallel(); auto executionContext = std::make_shared(context, getImplPriority(), privateWeightCache); - factory = std::make_shared>(attrs, postOps, executionContext, descs); + factory = std::make_shared>(attrs, postOps, executionContext, descs); const auto nodeDescriptors = factory->getProperMemoryDescriptors(descs); NodeConfig nodeConfig; @@ -496,7 +492,7 @@ void FullyConnected::createPrimitive() { needSplitMemoryForTensorParallel(); // @todo should we preconfigure only for dynamic shapes? // Since for static shapes primitive is created in scope of compile_model() anyway - factory->preconfigure(memory); + executor = factory->make(memory); Node::createPrimitive(); } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index be29342b851988..8c17228e365af4 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -16,7 +16,6 @@ #include "nodes/executors/memory_arguments.hpp" #include "nodes/executors/fullyconnected_config.hpp" #include "post_ops.hpp" -#include "openvino/runtime/threading/cpu_message.hpp" namespace ov { namespace intel_cpu { @@ -85,7 +84,6 @@ class FullyConnected : public Node { static const size_t WEIGHTS_ID = 1; static const size_t BIAS_ID = 2; - ExecutorPtr createExecutor(); void fuseDecompressionConstant(const MemoryCPtr& memory, MemoryCPtr& decompressionValuesPtr); void initTensorParallelConfig(const GraphContext::CPtr context); @@ -103,7 +101,7 @@ class FullyConnected : public Node { FCAttrs attrs; PostOps postOps; MemoryArgs memory; - ExecutorFactoryPtr factory; + ExecutorFactoryPtr factory; ExecutorPtr executor = nullptr; std::string errorPrefix; From fb1810b8ce36f7d8e7be26a0d5e71444f8c8f047 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Fri, 6 Dec 2024 06:52:36 +0100 Subject: [PATCH 11/43] [tests/llm] Reorder imports to avoid onnx-related DDL load fail (#27942) ### Details: There is an issue with ONNX>=1.17 which causes DLL load failures on Windows. Previously it caused WWB import to fail (CVS-158774), it was fixed in https://github.com/openvinotoolkit/openvino.genai/pull/1301. Now this llm tests failure comes from the next import, optimum.intel.openvino, and it doesn't reproduce locally if optimum.intel.openvino is imported before WWB. Signed-off-by: Alina Kladieva --- tests/llm/accuracy_conformance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/llm/accuracy_conformance.py b/tests/llm/accuracy_conformance.py index 41015d7664ecc2..7f75a8e912bbd6 100644 --- a/tests/llm/accuracy_conformance.py +++ b/tests/llm/accuracy_conformance.py @@ -5,9 +5,9 @@ import tempfile import pytest -import whowhatbench as wwb from optimum.intel.openvino import (OVModelForCausalLM, OVWeightQuantizationConfig) +import whowhatbench as wwb from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed logging.basicConfig(level=logging.INFO) From 536bd69ed66a57869aa6d3bbe06692217997e67e Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Fri, 6 Dec 2024 11:50:30 +0400 Subject: [PATCH 12/43] [GPU] Parse runtime_options from model RT info and apply to config (#27900) ### Details: - Added conversion logic from RT Info attributes to plugin property for limited set of properties. Signed-off-by: Vladimir Paramuzov --- .../intel_gpu/runtime/execution_config.hpp | 14 +++ src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 + .../src/runtime/execution_config.cpp | 6 ++ .../tests/functional/behavior/properties.cpp | 99 +++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 src/plugins/intel_gpu/tests/functional/behavior/properties.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 0af98bf1e952d0..3e854e4c9c5ada 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -138,6 +138,10 @@ class ExecutionConfig { void apply_user_properties(const cldnn::device_info& info); + // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call + // So this method should be called after setting all user properties, but before apply_user_properties() call. + void apply_rt_info(const ov::RTMap& rt_info); + std::string to_string() const; protected: @@ -147,6 +151,16 @@ class ExecutionConfig { void apply_priority_hints(const cldnn::device_info& info); void apply_debug_options(const cldnn::device_info& info); + template + void apply_rt_info_property(const ov::Property& property, const ov::RTMap& rt_info) { + if (!is_set_by_user(property)) { + auto rt_info_val = rt_info.find(property.name()); + if (rt_info_val != rt_info.end()) { + set_user_property(property(rt_info_val->second.template as())); + } + } + } + private: ov::AnyMap internal_properties; ov::AnyMap user_properties; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 7775a153a99e8f..c8839472a6d962 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -189,6 +189,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); + if (model->has_rt_info("runtime_options")) + config.apply_rt_info(model->get_rt_info("runtime_options")); config.apply_user_properties(context->get_engine().get_device_info()); set_cache_info(model, config); @@ -278,6 +280,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); + if (model->has_rt_info("runtime_options")) + config.apply_rt_info(model->get_rt_info("runtime_options")); config.apply_user_properties(ctx->get_engine().get_device_info()); ProgramBuilder prog(ctx->get_engine(), config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 4eaccf5540bd2a..30a9477e1600dd 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -257,6 +257,12 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { user_properties.clear(); } +void ExecutionConfig::apply_rt_info(const ov::RTMap& rt_info) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); +} + std::string ExecutionConfig::to_string() const { std::stringstream s; s << "internal properties:\n"; diff --git a/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp new file mode 100644 index 00000000000000..93a00262db35c2 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp @@ -0,0 +1,99 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/properties.hpp" +#include "base/ov_behavior_test_utils.hpp" +#include "openvino/runtime/core.hpp" +#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" + +namespace { + +class TestPropertiesGPU : public ::testing::Test { +public: + std::shared_ptr model; + + void SetUp() override { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + model = ov::test::utils::make_conv_pool_relu(); + } +}; + +TEST_F(TestPropertiesGPU, NoRTInfo) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); +} + +TEST_F(TestPropertiesGPU, RTInfoPropertiesWithDefault) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name()); + model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name()); + model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); + ASSERT_EQ(type.as(), ov::element::f16); + ASSERT_EQ(size.as(), 0); + ASSERT_EQ(scale.as(), 8.0f); +} + +TEST_F(TestPropertiesGPU, RTInfoPropertiesWithUserValuesFromCore) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name()); + model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name()); + model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); + core.set_property(ov::hint::kv_cache_precision(ov::element::u8)); + core.set_property(ov::hint::dynamic_quantization_group_size(16)); + core.set_property(ov::hint::activations_scale_factor(4.0f)); + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); + ASSERT_EQ(type.as(), ov::element::u8); + ASSERT_EQ(size.as(), 16); + ASSERT_EQ(scale.as(), 4.0f); +} + +TEST_F(TestPropertiesGPU, RTInfoPropertiesWithUserValuesFromCompileModel) { + ov::Core core; + ov::Any type; + ov::Any size; + ov::Any scale; + ov::CompiledModel compiled_model; + model->set_rt_info("f16", "runtime_options", ov::hint::kv_cache_precision.name()); + model->set_rt_info("0", "runtime_options", ov::hint::dynamic_quantization_group_size.name()); + model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); + ov::AnyMap config; + config[ov::hint::kv_cache_precision.name()] = "u8"; + config[ov::hint::dynamic_quantization_group_size.name()] = "16"; + config[ov::hint::activations_scale_factor.name()] = "4.0"; + + OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, config)); + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); + OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); + ASSERT_EQ(type.as(), ov::element::u8); + ASSERT_EQ(size.as(), 16); + ASSERT_EQ(scale.as(), 4.0f); +} + +} // namespace From eed4a60be67dbb22825a4fad20245ae806e11634 Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Fri, 6 Dec 2024 10:15:28 +0100 Subject: [PATCH 13/43] [DOCS] test drive doc (#27933) A document for test drive and some minor tweaks in other areas --- .../documentation/openvino-ecosystem.rst | 9 ++ .../openvino-test-drive.rst | 109 ++++++++++++++++++ .../llm_inference_guide/genai-guide.rst | 2 +- .../benchmarks_files/llm_models_7-155H.csv | 1 + .../benchmarks_files/llm_models_7-258V.csv | 1 + .../benchmarks_files/llm_models_9-288V.csv | 3 +- .../_static/download/supported_models.csv | 1 - 7 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst diff --git a/docs/articles_en/documentation/openvino-ecosystem.rst b/docs/articles_en/documentation/openvino-ecosystem.rst index 6735192e95f674..fe4f203428a865 100644 --- a/docs/articles_en/documentation/openvino-ecosystem.rst +++ b/docs/articles_en/documentation/openvino-ecosystem.rst @@ -12,6 +12,7 @@ OpenVINO™ Ecosystem Overview :hidden: openvino-ecosystem/openvino-training-extensions + openvino-ecosystem/openvino-test-drive openvino-ecosystem/datumaro openvino-ecosystem/openvino-security-add-on @@ -102,6 +103,14 @@ development process, empowering teams to produce custom AI models at scale. |hr| +| **Intel® Test Drive** +| :bdg-link-dark:`Github ` + +OpenVINO™ Test Drive is cross-platform graphic user interface application that enables running +generative AI and vision models directly on your computer or edge device using OpenVINO™ Runtime. +|hr| + + | **Tokenizers** | :bdg-link-dark:`Github ` :bdg-link-success:`User Guide ` diff --git a/docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst b/docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst new file mode 100644 index 00000000000000..527a01bf38a6cf --- /dev/null +++ b/docs/articles_en/documentation/openvino-ecosystem/openvino-test-drive.rst @@ -0,0 +1,109 @@ +=============================================================================================== +OpenVINO™ Test Drive +=============================================================================================== + + +.. meta:: + :description: See how to test your models with OpenVINO, using a simple graphic interface of + Test Drive. + + + +OpenVINO™ Test Drive is a cross-platform graphic user interface application for running and +testing AI models, both generative and vision based. +It can run directly on your computer or on edge devices using +`OpenVINO™ Runtime `__. + +OpenVINO™ Test Drive is developed under the `openvino_testdrive repository `__. + +Use OpenVINO™ Test Drive to: + +* **Chat with LLMs** and evaluate model performance on your computer or edge device; +* **Experiment with different text prompts** to generate images, using Stable + Diffusion and Stable DiffusionXL models (coming soon); +* **Transcribe speech from video**, using Whisper models, including generation + of timestamps (coming soon); +* **Run inference of models** trained by Intel® Geti™ and **visualize the results**. + + + +Installation (Windows) +############################################################################################### + +1. Download the latest archive from the + `release repository `__. + To verify the integrity of the downloaded package, use the SHA-256 file attached. + +2. Extract the zip file and run the *MSIX* installation package. Click the `Install` button to + proceed. + +3. Launch OpenVINO™ Test Drive, clicking the application name in the Windows app list. + + +Quick start +############################################################################################### + +When starting the application, you can import an LLM model from Hugging Face Hub +or upload an Intel® Geti™ model from a local drive. + +Inference of models from Hugging Face ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +1. Find a model on `Hugging Face `__ and import it. + +2. Chat with LLMs via the `Playground` tab. + +3. Use the `Performance metrics` tab to get model performance metrics on your + computer or an edge device. + + + +Inference of models trained with Intel® Geti™ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +1. Download the deployment code for a model in the OpenVINO IR format trained + by Intel® Geti™ (refer to the `Intel® Geti™ documentation `__ + for more details). + +2. Import the deployment code into OpenVINO™ Test Drive, using the *Import model* and then + *Local disk* buttons. + +3. Use the *Live inference* tab to run and visualize results of inference of individual images. + +4. For batch inference, use the *Batch inference* tab and provide paths to the folder + with input images, as well as one for batch inference results. You can do so by filling out + the *Source folder* and *Destination folder* fields. Click *Start* to start batch inference. + + +Build the Application +############################################################################################### + +1. Make sure you `Install flutter SDK `__ + and all its platform-specific dependencies. +2. Build the bindings and place them in the **./bindings** folder. + + OpenVINO™ Test Drive uses bindings to `OpenVINO™ GenAI `__ + and `OpenVINO™ Model API `__, + which are located in the **./openvino_bindings** folder. Refer to the + `GitHub page `__ + for more details. + +3. Start the application, using the following command: + + .. code-block:: console + + flutter run + +Additional Resources +############################################################################################### + +- `OpenVINO™ `__ - a software toolkit + for optimizing and deploying deep learning models. +- `GenAI Repository `__ and + `OpenVINO Tokenizers `__ + - resources and tools for developing and optimizing Generative AI applications. +- `Intel® Geti™ `__ - software for building computer + vision models. +- `OpenVINO™ Model API `__ + - a set of wrapper classes for particular tasks and model architectures. + It simplifies routine procedures, preprocessing and postprocessing of data. diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 37b6091eb9b898..42c1c3fb47aa42 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -18,7 +18,7 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi .. image:: ../../assets/images/genai_main_diagram.svg :align: center - :alt: OpenVINO workflow diagram for convenience + :alt: OpenVINO GenAI workflow diagram | Here is sample code for several Generative AI use case scenarios. Note that these are very basic diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv index fa5ae359fa45c0..9481b5619244e2 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-155H.csv @@ -1,3 +1,4 @@ +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec opt-125m-gptq,INT4-MIXED,32,1116,25.8,8.1,123.5 opt-125m-gptq,INT4-MIXED,1024,1187.1,75.2,8.2,122.0 qwen2-0.5b,INT4-MIXED,32,1587.4,45.1,15.4,64.9 diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv index 9aa769e4dd61b9..625ff1d6fe5ed5 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_7-258V.csv @@ -1,3 +1,4 @@ +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec opt-125m-gptq,INT4-MIXED,32,1150.2,35.1,8.2,122.0 opt-125m-gptq,INT4-MIXED,1024,1228,67,8.2,122.0 qwen2-0.5b,INT4-MIXED,1024,1596.2,83.6,14.4,69.4 diff --git a/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv b/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv index dfc98271bcd21b..c1932e678505ff 100644 --- a/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv +++ b/docs/sphinx_setup/_static/benchmarks_files/llm_models_9-288V.csv @@ -1,4 +1,5 @@ -opt-125m-gptq,INT4-MIXED,32,833.1,15.6,3.9,256.4 +Topology,Precision,Input Size,max rss memory,1st latency (ms),2nd latency (ms),2nd tok/sec +opt-125m-gptq,INT4-MIXED,32,833.1,15.6,3.9,256.4 opt-125m-gptq,INT4-MIXED,1024,955.9,553.8,4.8,208.3 bloomz-560m,INT4-MIXED,32,1457.5,48.5,11.1,90.1 qwen2-0.5b,INT4-MIXED,32,1167.8,95.7,11.5,87.0 diff --git a/docs/sphinx_setup/_static/download/supported_models.csv b/docs/sphinx_setup/_static/download/supported_models.csv index 87ea37b0f207c3..39053fa6d3e0a7 100644 --- a/docs/sphinx_setup/_static/download/supported_models.csv +++ b/docs/sphinx_setup/_static/download/supported_models.csv @@ -715,7 +715,6 @@ tiny-random-BeitForImageClassification,Image Classification,pytorch,intel-optimu tiny-random-bert,Natural Language Processing,pytorch,intel-optimum default,+,, tiny-random-BlenderbotModel,Large Language Model,pytorch,INT4,+,, tiny-random-BloomModel,Large Language Model,pytorch,INT4,+,, -tiny-random-chatglm2,Large Language Model,pytorch,INT4,+,, tiny-random-codegen2,Large Language Model,pytorch,INT4,+,, tiny-random-CodeGenForCausalLM,Large Language Model,pytorch,INT4,+,, tiny-random-CohereForCausalLM,Large Language Model,pytorch,INT4,+,, From 0f1e5092b518402248e372c2401651a0bd150f7f Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Fri, 6 Dec 2024 11:41:12 +0100 Subject: [PATCH 14/43] [DOCS] Remove OVMS Button (#27951) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- docs/articles_en/about-openvino/performance-benchmarks.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst index 5d9abfe891584f..a398432925a983 100644 --- a/docs/articles_en/about-openvino/performance-benchmarks.rst +++ b/docs/articles_en/about-openvino/performance-benchmarks.rst @@ -56,7 +56,8 @@ implemented in your solutions. Click the buttons below to see the chosen benchma :material-regular:`table_view;1.4em` LLM performance for AI PC - .. grid-item:: +.. uncomment under + .. .. grid-item:: .. button-link:: # :class: ovms-toolkit-benchmark-llm-result From c3b014c49afa04a838e1778184cf97a1c834e465 Mon Sep 17 00:00:00 2001 From: Tomasz Jankowski Date: Fri, 6 Dec 2024 11:55:16 +0100 Subject: [PATCH 15/43] [Templ test] GroupNormalization: Enable whole Tensor comparison (#27932) ### Details: - Removed legacy comparison method. - Set relative threshold for fp16. ### Tickets: - CVS-137168 Signed-off-by: Tomasz Jankowski --- .../tests/functional/op_reference/group_normalization.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/plugins/template/tests/functional/op_reference/group_normalization.cpp b/src/plugins/template/tests/functional/op_reference/group_normalization.cpp index 322d509aa838ec..b3bd898db4eeec 100644 --- a/src/plugins/template/tests/functional/op_reference/group_normalization.cpp +++ b/src/plugins/template/tests/functional/op_reference/group_normalization.cpp @@ -42,11 +42,14 @@ class ReferenceGroupNormalization : public testing::TestWithParam& obj) { From bf62609711227605d381bedfcd993e6c60475975 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Fri, 6 Dec 2024 23:51:58 +0900 Subject: [PATCH 16/43] [GPU] MLP : 2fcs + swiglu fusion (#27831) ### Details: - 2 FCs + swiglu in MLP pattern are fused - Only applied to cldnn && #EUs > 128 && glu type with swiglu ### Tickets: - 152163 --- .../intel_gpu/runtime/debug_configuration.hpp | 1 + .../include/intel_gpu/runtime/layout.hpp | 5 + .../intel_gpu/src/graph/fully_connected.cpp | 26 +++- .../prepare_primitive_fusing.cpp | 62 +++++++++- .../src/graph/impls/ocl/fully_connected.cpp | 14 ++- .../impls/ocl/kernel_selector_helper.cpp | 10 +- .../src/graph/include/pass_manager.h | 1 + .../intel_gpu/src/graph/include/swiglu_inst.h | 9 ++ .../intel_gpu/src/graph/primitive_inst.cpp | 11 ++ .../intel_gpu/src/graph/program_node.cpp | 22 ++++ .../fully_connected_gpu_bf_tiled.cl | 117 ++++++++++++++++-- .../fully_connected_gpu_bf_tiled_common.cl | 49 +++++++- .../fully_connected_kernel_bf_tiled.cpp | 65 +++++++--- .../fully_connected_kernel_bf_tiled.h | 3 +- .../kernels/swiglu/swiglu_kernel_base.h | 11 ++ .../intel_gpu/src/plugin/ops/swiglu.cpp | 4 +- .../transformations/fc_horizontal_fusion.cpp | 19 ++- .../transformations/fc_horizontal_fusion.hpp | 2 +- .../src/plugin/transformations_pipeline.cpp | 10 +- .../src/runtime/debug_configuration.cpp | 5 +- .../fusions/fully_connected_fusion_test.cpp | 59 ++++++++- .../tests/unit/fusions/fusion_test_common.hpp | 12 ++ 22 files changed, 469 insertions(+), 48 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index a020c5d1cd5ef6..a7a8ae1f229a72 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -147,6 +147,7 @@ class debug_configuration { int use_kv_cache_compression; // Enable KV-cache compression int dynamic_quantize_group_size; // Enable Dynamic quantization for fully connected primitive by specified group size int disable_horizontal_fc_fusion; // Disable fc horizontal fusion + int disable_fc_swiglu_fusion; // Disable swiglu fusion to fc std::set dump_iteration; // Dump n-th execution of network. std::vector load_layers_raw_dump; // List of layers to load dumped raw binary and filenames static const debug_configuration *get_instance(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index ab5cb53454b768..cc753d10aea9cd 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -50,6 +50,11 @@ struct data_type_traits { return et.is_quantized() && et.bitwidth() == 8; } + static bool is_i4_u4(data_types data_type) { + auto et = ov::element::Type(data_type); + return et.bitwidth() == 4; + } + static ov::element::Type max_type(ov::element::Type t1, ov::element::Type t2) { if (t1.bitwidth() < t2.bitwidth()) return t2; diff --git a/src/plugins/intel_gpu/src/graph/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/fully_connected.cpp index bc1e3e2e82b3ca..308d9a9f2fd66b 100644 --- a/src/plugins/intel_gpu/src/graph/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/fully_connected.cpp @@ -7,8 +7,10 @@ #include #include #include "utils.hpp" +#include "swiglu_inst.h" #include "matmul_shape_inference.hpp" +#include "glu_shape_inference.hpp" namespace cldnn { GPU_DEFINE_PRIMITIVE_TYPE_ID(fully_connected) @@ -171,14 +173,32 @@ std::vector fully_connected_inst::calc_output_layouts(fully_connected_no output_type = impl_param.get_output_element_type(); } - ov::op::v0::MatMul op; - op.set_transpose_b(true); + ov::op::v0::MatMul matmul_op; + matmul_op.set_transpose_b(true); std::vector input_shapes = { input_layout.get(), weights_layout.get() }; - std::vector output_shapes = ov::op::v0::shape_infer(&op, input_shapes); + std::vector output_shapes = ov::op::v0::shape_infer(&matmul_op, input_shapes); + bool has_swiglu = false; + auto& fused_prims = node.get_fused_primitives(); + for (auto f : fused_prims) { + if (f.is_type()) { + has_swiglu = true; + OPENVINO_ASSERT(fused_prims.size() == 1, "Other operation is fused in addition to swiglu!"); + } + } + if (has_swiglu) { + ov::op::internal::GLU swiglu_op; + OPENVINO_ASSERT(fused_prims.size() == 1); + OPENVINO_ASSERT(fused_prims[0].typed_desc()->glu_type == ov::op::internal::GLU::GluType::Swish); + swiglu_op.set_axis(fused_prims[0].typed_desc()->axis); + swiglu_op.set_split_lengths(fused_prims[0].typed_desc()->split_lengths); + swiglu_op.set_glu_type(fused_prims[0].typed_desc()->glu_type); + std::vector input_shapes = { output_shapes[0] }; + output_shapes = shape_infer(&swiglu_op, input_shapes); + } bool is_static = input_layout.is_static() && weights_layout.is_static(); bool allow_new_shape_infer = impl_param.get_program().is_new_shape_infer(); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 60d1e8aa7e10b7..29b7cf58a19b54 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // - +#include "intel_gpu/runtime/debug_configuration.hpp" #include "program_helpers.h" #include "pass_manager.h" @@ -37,6 +37,7 @@ #include "strided_slice_inst.h" #include "cum_sum_inst.h" #include "embedding_bag_inst.h" +#include "swiglu_inst.h" #include "extract_image_patches_inst.h" #include "reduce_inst.h" #include "group_normalization_inst.h" @@ -56,6 +57,7 @@ using namespace cldnn; void prepare_primitive_fusing::run(program& p) { fuse_reorders(p); remove_redundant_reshape(p); + fuse_swiglu(p); fuse_bias(p); fuse_simple_primitives(p); fuse_constant_transposes(p); @@ -161,6 +163,46 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { } } +void prepare_primitive_fusing::fuse_swiglu(program &p) { + GPU_DEBUG_GET_INSTANCE(debug_config); + bool disable_fc_swiglu_fusion = false; + GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) + disable_fc_swiglu_fusion = true; + // Apply only for high performant GPU + if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) + return; + // TODO: to support other glu types && other weight data types + auto itr = p.get_processing_order().begin(); + std::map>> fusing_history; + while (itr != p.get_processing_order().end()) { + auto node_itr = itr++; + auto& node = (*node_itr); + if (node->is_type()) { + if (!node->get_dependency(0).is_type()) + continue; + auto swiglu_prim = node->get_kernel_impl_params()->typed_desc(); + auto& fc_node = node->get_dependency(0); + if (node->get_dependencies().size() > 1) + continue; + if (!node->get_dependency(0).get_fused_primitives().empty()) + continue; + auto in_dt = fc_node.get_input_layout(0).data_type; + if (in_dt != data_types::f16) + continue; + auto wt_dt = fc_node.get_input_layout(1).data_type; + if (!data_type_traits::is_i4_u4(wt_dt)) + continue; + if (swiglu_prim->glu_type != ov::op::internal::GLU::GluType::Swish || + !(swiglu_prim->axis == -1 || swiglu_prim->axis == static_cast(node->get_output_layout(0).get_partial_shape().size()) - 1)) + continue; + GPU_DEBUG_TRACE_DETAIL << node->id() << " : fuse swiglu to " << fc_node.id() << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - split axis : " << swiglu_prim->axis << std::endl; + GPU_DEBUG_TRACE_DETAIL << " - split length : " << swiglu_prim->split_lengths << std::endl; + p.fuse_nodes(fc_node, *node, &fusing_history); + } + } +} + void prepare_primitive_fusing::fuse_bias(program &p) { auto itr = p.get_processing_order().begin(); while (itr != p.get_processing_order().end()) { @@ -188,6 +230,17 @@ void prepare_primitive_fusing::fuse_bias(program &p) { if (!is_bias_add) continue; + for (auto& dep : eltw_node.get_dependencies()) { + auto& fused_prims = dep.first->get_fused_primitives(); + if (std::any_of(fused_prims.begin(), fused_prims.end(), [](const fused_primitive_desc& f_desc) { + return f_desc.is_type(); + })) { + GPU_DEBUG_TRACE_DETAIL << "Skip fusing " << eltw_node.id() << " to " << dep.first->id() << " because " + << dep.first->id() << " has fused swiglu." << std::endl; + continue; + } + } + auto is_3d_fully_connected = [](program_node& node) { if (!node.is_type()) return false; @@ -491,6 +544,13 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { }; auto fc_supports_fusings = [&](fully_connected_node& node) -> bool { + auto& fused_prims = node.get_fused_primitives(); + if (std::any_of(fused_prims.begin(), fused_prims.end(), [](const fused_primitive_desc& f_desc) { + return f_desc.is_type(); + })) { + GPU_DEBUG_TRACE_DETAIL << node.id() << " has fused swiglu. Skip fusing more primitives" << std::endl; + return false; + } if (lo.has_all_enabled_onednn_impls_optimization_attribute() && lo.get_preferred_impl_type(node, format::any /*dummy*/) == impl_types::onednn) { return true; diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 04f691c2bd2ca9..110444c2c6255c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -132,15 +132,16 @@ struct fully_connected_impl : typed_primitive_impl_ocl { return layouts; }; - auto get_fc_output_layout = [primitive](const std::vector& input_layouts, const layout& output_layout) { + auto get_fc_output_layout = [primitive](const std::vector& input_layouts, const layout& output_layout, bool swiglu_fused) { auto updated_out_layout = output_layout; auto input0_pshape = input_layouts[0].get_partial_shape(); auto input1_pshape = input_layouts[1].get_partial_shape(); ov::PartialShape updated_out_pshape {input0_pshape[0], input1_pshape[0]}; + const auto output_feature_size = swiglu_fused ? input1_pshape[0] / 2 : input1_pshape[0]; if (primitive->input_size == 3) { - updated_out_pshape = { input0_pshape[0], input0_pshape[1], input1_pshape[0] }; + updated_out_pshape = { input0_pshape[0], input0_pshape[1], output_feature_size}; } updated_out_layout.set_partial_shape(updated_out_pshape); @@ -149,6 +150,13 @@ struct fully_connected_impl : typed_primitive_impl_ocl { bool allow_new_shape_infer = impl_param.get_program().is_new_shape_infer(); auto updated_impl_param = impl_param; + bool swiglu_fused = false; + if (updated_impl_param.fused_desc.size() > 0) { + for (const auto& f : updated_impl_param.fused_desc) { + if (f.is_type()) + swiglu_fused = true; + } + } const auto input_layouts = get_fc_input_layouts(impl_param.input_layouts, allow_new_shape_infer); for (size_t i = 0; i < input_layouts.size(); ++i) { @@ -156,7 +164,7 @@ struct fully_connected_impl : typed_primitive_impl_ocl { } updated_impl_param.weights_layout = input_layouts[1]; - updated_impl_param.output_layouts[0] = get_fc_output_layout(input_layouts, impl_param.get_output_layout()); + updated_impl_param.output_layouts[0] = get_fc_output_layout(input_layouts, impl_param.get_output_layout(), swiglu_fused); return updated_impl_param; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 0a999a5a124d3b..42d83a0265d290 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -32,11 +32,13 @@ #include "intel_gpu/primitives/embedding_bag.hpp" #include "intel_gpu/primitives/extract_image_patches.hpp" +#include "swiglu_inst.h" #include "activation_inst.h" #include "eltwise_inst.h" #include "quantize_inst.h" #include "reorder_inst.h" +#include "kernel_selector/kernels/swiglu/swiglu_kernel_base.h" #include "kernel_selector/kernels/activation/activation_kernel_base.h" #include "kernel_selector/kernels/depth_to_space/depth_to_space_kernel_base.h" #include "kernel_selector/kernels/eltwise/eltwise_kernel_base.h" @@ -1009,7 +1011,13 @@ kernel_selector::activation_function get_kernel_selector_activation_param(activa } std::shared_ptr convert_fuse_params(std::shared_ptr p) { - if (p->type() == activation::type_id()) { + if (p->type() == swiglu::type_id()) { + auto casted = std::dynamic_pointer_cast(p); + auto axis = casted->_desc->axis; + auto split_length = casted->_desc->split_lengths; + auto split_to_glu_idx = casted->_desc->split_to_glu_idx; + return std::make_shared(axis, split_length, split_to_glu_idx); + } else if (p->type() == activation::type_id()) { auto casted = std::dynamic_pointer_cast(p); auto desc = casted->_desc; kernel_selector::base_activation_params p; diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 61c34c0eff548f..490076a37f788e 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -140,6 +140,7 @@ class prepare_primitive_fusing : public base_pass { private: void run(program& p) override; void fuse_bias(program &p); + void fuse_swiglu(program &p); void fuse_reorders(program& p); void fuse_simple_primitives(program &p); void fuse_constant_transposes(program &p); diff --git a/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h b/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h index 6a5ce08dc54bd2..755e9ab33c2db6 100644 --- a/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/swiglu_inst.h @@ -10,6 +10,11 @@ namespace cldnn { +class SwigluFuseParams : public NodeFuseParams { +public: + SwigluFuseParams(std::shared_ptr desc) : NodeFuseParams(swiglu::type_id()), _desc(std::move(desc)) {} + std::shared_ptr _desc; +}; template <> struct typed_program_node : public typed_program_node_base { using parent = typed_program_node_base; @@ -19,6 +24,10 @@ struct typed_program_node : public typed_program_node_base { program_node& input(size_t index = 0) const { return get_dependency(index); } std::vector get_shape_infer_dependencies() const override { return {}; } + + std::shared_ptr get_fuse_params() const override { + return std::make_shared(typed_desc()); + } }; using swiglu_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 5680eedcb8f87c..0737362405ff9c 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -38,6 +38,7 @@ #include "gather_inst.h" #include "broadcast_inst.h" #include "dynamic_quantize_inst.h" +#include "swiglu_inst.h" #include "experimental_detectron_roi_feature_extractor_inst.hpp" #include "impls/registry/implementation_manager.hpp" #include "impls/registry/registry.hpp" @@ -2606,6 +2607,16 @@ bool primitive_inst::is_valid_fusion() const { } else { if (fd.is_type() || fd.is_type()) continue; + if (fd.is_type()) { + OPENVINO_ASSERT(_node->is_type() && _node->get_preferred_impl_type() == impl_types::ocl); + if (!_node->get_selected_impl()) + return false; + // TODO : support ref kernel too + if (_node->get_selected_impl()->get_kernel_name().find("fully_connected_gpu_bf_tiled") != std::string::npos) + return true; + else + return false; + } OPENVINO_THROW("[GPU] Unsupported fused operation in dynamic shape: type=", fd.desc->type_string(), ", id=", fd.desc->id); } diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 201fa3a155caa9..5161887b79e57a 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -10,6 +10,7 @@ #include "activation_inst.h" #include "reorder_inst.h" #include "quantize_inst.h" +#include "swiglu_inst.h" #include "intel_gpu/runtime/debug_configuration.hpp" #ifdef ENABLE_ONEDNN_FOR_GPU #include "convolution_inst.h" @@ -770,6 +771,15 @@ void program_node::save(cldnn::BinaryOutputBuffer& ob) const { ob << casted->_out_hi; ob << casted->_out_scale; ob << casted->_out_shift; + } else if (f_desc.f_param->type() == swiglu::type_id()) { + auto casted = std::dynamic_pointer_cast(f_desc.f_param); + if (get_program().has_node(casted->_desc->id)) { + ob << true; + ob << casted->_desc->id; + } else { + ob << false; + ob << casted->_desc; + } } ob << f_desc.deps.size(); @@ -975,6 +985,18 @@ void program_node::load(cldnn::BinaryInputBuffer& ib) { need_pre_shift, need_clamp, need_min_clamp, need_max_clamp, per_tensor_input_range, per_tensor_input_scale, per_tensor_input_shift, per_tensor_output_range, per_tensor_output_scale, per_tensor_output_shift, in_lo, in_hi, in_scale, in_shift, out_lo, out_hi, out_scale, out_shift); + } else if (f_param_type == swiglu::type_id()) { + ib >> exist_prim; + std::shared_ptr param_desc; + if (exist_prim) { + primitive_id desc_id; + ib >> desc_id; + param_desc = std::dynamic_pointer_cast(get_program().get_node_ptr(desc_id)->desc); + } else { + ib >> param_desc; + } + f_desc.f_param = std::make_shared(param_desc); + } else { f_desc.f_param = std::make_shared(f_param_type); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index 201b59c160cf27..01c8e8853e350d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -95,6 +95,12 @@ KERNEL(quantize_input)( # error "fully_connected_gpu_bf_tiled.cl - TILE_K must be one of {1, 2, 4}" # endif #endif + +#ifdef SWIGLU_LENGTH +# if OUTER_OFM != 2 +# error "fully_connected_gpu_bf_tiled.cl - outer_ofm should be 2 when swiglu is fused" +# endif +#endif #if TILE_K == 4 && COMPRESSED_WEIGHTS_INT4 && FILTER_LAYOUT_OS_IS_YX_OSV32_ISV2 // Data stored in memory : f0k0k1|f16k0k1|f0k2k3|f16k2k3 // => unpack as f0k0k1|f0k2k3|f16k0k1|f16k2k3 so that the weight access order is preserved @@ -210,14 +216,27 @@ inline void FUNC(fc_bf_tiled_kernel_default)( // full dispatch pipeline. uint feature_mini_block = gid % DISPATCH_FSV; uint batch_mini_block = gid / DISPATCH_FSV % DISPATCH_BSV; + #ifdef SWIGLU_LENGTH + uint feature_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV) % (CEIL_DIV(TILE_OUT_F_NUM, TILE_OFM * SIMD) / DISPATCH_FSV); + uint batch_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV * CEIL_DIV(TILE_OUT_F_NUM, TILE_OFM * SIMD) / DISPATCH_FSV); + #else uint feature_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV) % (CEIL_DIV(TILE_OUT_F_NUM, OUTER_OFM * TILE_OFM * SIMD) / DISPATCH_FSV); uint batch_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV * CEIL_DIV(TILE_OUT_F_NUM, OUTER_OFM * TILE_OFM * SIMD) / DISPATCH_FSV); + #endif #if USE_SLM + #ifdef SWIGLU_LENGTH + uint out_f = gid * (TILE_OFM * SIMD); + #else uint out_f = gid * (OUTER_OFM * TILE_OFM * SIMD); + #endif uint out_b = LWS_BATCHES * TILE_B * (uint)get_group_id(2) + local_id * TILE_B; #else + #ifdef SWIGLU_LENGTH + uint out_f = (feature_mega_block * DISPATCH_FSV + feature_mini_block) * (TILE_OFM * SIMD); + #else uint out_f = (feature_mega_block * DISPATCH_FSV + feature_mini_block) * (OUTER_OFM * TILE_OFM * SIMD); + #endif uint out_b = ((batch_mega_block * DISPATCH_BSV + batch_mini_block) * TILE_B); #endif @@ -299,9 +318,20 @@ inline void FUNC(fc_bf_tiled_kernel_default)( ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); #endif + ACTIVATION_VEC_TYPE activated[TILE_B] = { }; #if OUTER_OFM > 1 uint input_offset_init = input_offset; - unroll_for (uint oi = 0; oi < OUTER_OFM; ++oi) { + uint weights_offset_init = weights_offset; + uint out_f_init = out_f; + __attribute__((opencl_unroll_hint(1))) + for (uint oi = 0; oi < OUTER_OFM; ++oi) { + input_offset = input_offset_init; + #ifdef SWIGLU_LENGTH + weights_offset = weights_offset_init + oi * (FILTER_IFM_NUM / (TILE_K_OFM / TILE_K_OFM_PACKED) ) * SWIGLU_LENGTH; + out_f += SWIGLU_LENGTH * oi; + #else + out_f += TILE_OFM * SIMD * oi; + #endif #endif #if REALIGN_FP16_OFFSET @@ -669,14 +699,38 @@ inline void FUNC(fc_bf_tiled_kernel_default)( #endif // MAIN_LOOP_ELEMENTS_COUNT % (TILE_IFM * SIMD) != 0 // ===================================================================================================================================== // Post-processing: bias, activation, fused-ops - ACTIVATION_VEC_TYPE activated[TILE_B] = { }; - for (uint bi = 0; bi < TILE_B; ++bi) { + unroll_for (uint bi = 0; bi < TILE_B; ++bi) { + #ifdef SWIGLU_LENGTH + #if SWIGLU_SPLIT_TO_GLU_IDX == 0 + if (oi == 0) { + // swish + activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + activated[bi] /= (ACCUMULATOR_VAL_ONE + native_exp(-(ACCUMULATOR_VAL_ONE * activated[bi]))); + } else { + activated[bi] *= TO_ACTIVATION_VEC_TYPE(acc[bi]); + } + #else + if (oi == 0) { + // swish + activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + } else { + acc[bi] /= (ACCUMULATOR_VAL_ONE + native_exp(-(ACCUMULATOR_VAL_ONE * acc[bi]))); + activated[bi] *= TO_ACTIVATION_VEC_TYPE(acc[bi]); + } + #endif + #else activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + #endif #if OUTER_OFM > 1 acc[bi] = 0; #endif } +#if OUTER_OFM > 1 && defined(SWIGLU_LENGTH) + } + out_f = out_f_init; +#endif + #if BIAS_TERM #if TILE_OUT_F_NUM % (OUTER_OFM * TILE_OFM * SIMD) == 0 BIAS_VEC_TYPE bias = BIAS_BLOCK_READ(biases, out_f); @@ -746,9 +800,7 @@ inline void FUNC(fc_bf_tiled_kernel_default)( output_offset += TILE_OUT_B_PITCH - TILE_OFM * SIMD; } } -#if OUTER_OFM > 1 - out_f += TILE_OFM * SIMD; - input_offset = input_offset_init; +#if OUTER_OFM > 1 && !defined(SWIGLU_LENGTH) } #endif // ===================================================================================================================================== @@ -816,8 +868,14 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // full dispatch pipeline. uint feature_mini_block = gid % DISPATCH_FSV; uint batch_mini_block = gid / DISPATCH_FSV % DISPATCH_BSV; + #ifdef SWIGLU_LENGTH uint feature_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV) % (CEIL_DIV(TILE_OUT_F_NUM, TILE_OFM * SIMD) / DISPATCH_FSV); uint batch_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV * CEIL_DIV(TILE_OUT_F_NUM, TILE_OFM * SIMD) / DISPATCH_FSV); + #else + uint feature_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV) % (CEIL_DIV(TILE_OUT_F_NUM, OUTER_OFM * TILE_OFM * SIMD) / DISPATCH_FSV); + uint batch_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV * CEIL_DIV(TILE_OUT_F_NUM, OUTER_OFM * TILE_OFM * SIMD) / DISPATCH_FSV); + #endif + FILTER_VEC_TYPE wei = 0; @@ -895,6 +953,22 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); #endif + ACTIVATION_VEC_TYPE activated[TILE_B] = { }; +#if OUTER_OFM > 1 + uint input_offset_init = input_offset; + uint weights_offset_init = weights_offset; + uint out_f_init = out_f; + __attribute__((opencl_unroll_hint(1))) + for (uint oi = 0; oi < OUTER_OFM; ++oi) { + input_offset = input_offset_init; + #ifdef SWIGLU_LENGTH + weights_offset = weights_offset_init + oi * (FILTER_IFM_NUM / (TILE_K_OFM / TILE_K_OFM_PACKED) ) * SWIGLU_LENGTH; + out_f += SWIGLU_LENGTH * oi; + #else + out_f += TILE_OFM * SIMD * oi; + #endif +#endif + // ===================================================================================================================================== // Main computation loop const uint iterations = MAIN_LOOP_ELEMENTS_COUNT / TILE_IFM_ELEMENTS_SIZE; // TILE_IFM_ELEMENTS_SIZE : (TILE_IFM * SIMD) @@ -1164,11 +1238,37 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( // ===================================================================================================================================== // Post-processing: bias, activation, fused-ops - ACTIVATION_VEC_TYPE activated[TILE_B] = { }; for (uint bi = 0; bi < TILE_B; ++bi) { + #ifdef SWIGLU_LENGTH + #if SWIGLU_SPLIT_TO_GLU_IDX == 0 + if (oi == 0) { + activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + activated[bi] /= (ACCUMULATOR_VAL_ONE + native_exp(-(ACCUMULATOR_VAL_ONE * activated[bi]))); + } else { + activated[bi] *= TO_ACTIVATION_VEC_TYPE(acc[bi]); + } + #else + if (oi == 0) { + // swish + activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + } else { + acc[bi] /= (ACCUMULATOR_VAL_ONE + native_exp(-(ACCUMULATOR_VAL_ONE * acc[bi]))); + activated[bi] *= TO_ACTIVATION_VEC_TYPE(acc[bi]); + } + #endif + #else activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + #endif +#if OUTER_OFM > 1 + acc[bi] = 0; +#endif } +#if OUTER_OFM > 1 && defined(SWIGLU_LENGTH) + } + out_f = out_f_init; +#endif + #if BIAS_TERM #if TILE_OUT_F_NUM % (TILE_OFM * SIMD) == 0 BIAS_VEC_TYPE bias = BIAS_BLOCK_READ(biases, out_f); @@ -1240,6 +1340,9 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan)( output_offset += TILE_OUT_B_PITCH - TILE_OFM * SIMD; } } +#if OUTER_OFM > 1 && !defined(SWIGLU_LENGTH) + } +#endif // ===================================================================================================================================== } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl index ddffa87b202816..ca5c1ea3646d02 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/fully_connected_gpu_bf_tiled_common.cl @@ -25,7 +25,6 @@ inline void (FUNC_NAME)( ) { uint gid = (uint)get_group_id(0); uint sglid = (uint)get_sub_group_local_id(); - // Dispatch as bs_fs_bsv_fsv, where bsv = DISPATCH_BSV and fsv = DISPATCH_FSV. // This allows more fine grained control over dispatch order than using work-groups and // avoids requirement of threads being available for whole work-group. @@ -33,10 +32,19 @@ inline void (FUNC_NAME)( // full dispatch pipeline. uint feature_mini_block = gid % DISPATCH_FSV; uint batch_mini_block = gid / DISPATCH_FSV % DISPATCH_BSV; + #ifdef SWIGLU_LENGTH + uint feature_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV) % (CEIL_DIV(TILE_OUT_F_NUM, TILE_OFM * SIMD) / DISPATCH_FSV); + uint batch_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV * CEIL_DIV(TILE_OUT_F_NUM, TILE_OFM * SIMD) / DISPATCH_FSV); + #else uint feature_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV) % (CEIL_DIV(TILE_OUT_F_NUM, OUTER_OFM * TILE_OFM * SIMD) / DISPATCH_FSV); uint batch_mega_block = gid / (DISPATCH_FSV * DISPATCH_BSV * CEIL_DIV(TILE_OUT_F_NUM, OUTER_OFM * TILE_OFM * SIMD) / DISPATCH_FSV); + #endif + #ifdef SWIGLU_LENGTH + uint out_f = (feature_mega_block * DISPATCH_FSV + feature_mini_block) * (TILE_OFM * SIMD); + #else uint out_f = (feature_mega_block * DISPATCH_FSV + feature_mini_block) * (OUTER_OFM * TILE_OFM * SIMD); + #endif uint out_b = ((batch_mega_block * DISPATCH_BSV + batch_mini_block) * FORCED_TILE_B); ACCUMULATOR_VEC_TYPE acc[FORCED_TILE_B] = { }; @@ -90,9 +98,19 @@ inline void (FUNC_NAME)( ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); #endif + ACTIVATION_VEC_TYPE activated[FORCED_TILE_B] = { }; #if OUTER_OFM > 1 uint input_offset_init = input_offset; + uint weights_offset_init = weights_offset; + uint out_f_init = out_f; unroll_for (uint oi = 0; oi < OUTER_OFM; ++oi) { + input_offset = input_offset_init; + #ifdef SWIGLU_LENGTH + weights_offset = weights_offset_init + oi * (FILTER_IFM_NUM / (TILE_K_OFM / TILE_K_OFM_PACKED) ) * SWIGLU_LENGTH; + out_f += SWIGLU_LENGTH * oi; + #else + out_f += TILE_OFM * SIMD * oi; + #endif #endif #if REALIGN_FP16_OFFSET @@ -297,14 +315,37 @@ inline void (FUNC_NAME)( #endif // MAIN_LOOP_ELEMENTS_COUNT % (TILE_IFM * SIMD) != 0 // ===================================================================================================================================== // Post-processing: bias, activation, fused-ops - ACTIVATION_VEC_TYPE activated[FORCED_TILE_B] = { }; for (uint bi = 0; bi < FORCED_TILE_B; ++bi) { + #ifdef SWIGLU_LENGTH + #if SWIGLU_SPLIT_TO_GLU_IDX == 0 + if (oi == 0) { + activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + activated[bi] /= (ACCUMULATOR_VAL_ONE + native_exp(-(ACCUMULATOR_VAL_ONE * activated[bi]))); + } else { + activated[bi] *= TO_ACTIVATION_VEC_TYPE(acc[bi]); + } + #else + if (oi == 0) { + // swish + activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + } else { + acc[bi] /= (ACCUMULATOR_VAL_ONE + native_exp(-(ACCUMULATOR_VAL_ONE * acc[bi]))); + activated[bi] *= TO_ACTIVATION_VEC_TYPE(acc[bi]); + } + #endif + #else activated[bi] = TO_ACTIVATION_VEC_TYPE(acc[bi]); + #endif #if OUTER_OFM > 1 acc[bi] = 0; #endif } +#if OUTER_OFM > 1 && defined(SWIGLU_LENGTH) + } + out_f = out_f_init; +#endif + #if BIAS_TERM #if TILE_OUT_F_NUM % (OUTER_OFM * TILE_OFM * SIMD) == 0 BIAS_VEC_TYPE bias = BIAS_BLOCK_READ(biases, out_f); @@ -396,9 +437,7 @@ inline void (FUNC_NAME)( output_offset += TILE_OUT_B_PITCH - TILE_OFM * SIMD; } } -#if OUTER_OFM > 1 - out_f += TILE_OFM * SIMD; - input_offset = input_offset_init; +#if OUTER_OFM > 1 && !defined(SWIGLU_LENGTH) } #endif // ===================================================================================================================================== diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 02304512637783..46e8f7f1104f0d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -4,6 +4,7 @@ #include "fully_connected_kernel_bf_tiled.h" #include "kernel_selector_utils.h" +#include "swiglu/swiglu_kernel_base.h" #include #include #include "common_types.h" @@ -163,7 +164,21 @@ static bool is_weight_small_kn(const fully_connected_params& params, size_t outp return output_f / 2 /*most frequently used tile_ofm*/ <= min_num_threads; } +static bool is_swiglu_fused(const fully_connected_params& params) { + bool swiglu_fused = false; + if (!params.fused_ops.empty()) { + for (auto p : params.fused_ops) { + if (p.GetType() == kernel_selector::KernelType::SWIGLU) + swiglu_fused = true; + } + } + if (swiglu_fused) + OPENVINO_ASSERT(params.fused_ops.size() == 1); + return swiglu_fused; +} static bool is_suitable_outer_ofm(const fully_connected_params& params, size_t output_f) { + if (is_swiglu_fused(params)) + return true; size_t min_num_threads = params.engineInfo.computeUnitsCount * simd; return (params.weights.OFM().v > params.weights.IFM().v * 6 && output_f / 8 /* tile_ofm=4 and outer_ofm=2 */ > min_num_threads * 1.5); @@ -406,6 +421,8 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, while (max_tile_ofm * 2 * simd <= output_f && max_tile_ofm < 4) max_tile_ofm *= 2; + bool swiglu_fused = is_swiglu_fused(params); + if (params.weights.GetDType() == WeightsType::UINT4 || params.weights.GetDType() == WeightsType::INT4 || (is_weight_dyn_quantizable(params) && should_dynamic_quantize(params))) { // Only 4bit weight type is fully optimized to use SLM. In default kernel, SLM is not applied to 8bit weight. @@ -426,30 +443,39 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) { return selector.Default(tune_params(1, 1, 4, 4, 1, 1, 1, EXE_MODE_DEFAULT)); } else if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) { - selector.Case(tune_params(1, 4, 4, 2, 2, 1, 1, EXE_MODE_DEFAULT)) - .Case(tune_params(1, 4, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + // Here : b1 static + if (swiglu_fused) { + return selector.Default(tune_params(1, 4, 4, 2, 2, 1, 1, EXE_MODE_DEFAULT)); + } else { + selector.Case(tune_params(1, 4, 4, 2, 2, 1, 1, EXE_MODE_DEFAULT)) + .Case(tune_params(1, 4, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + } } else { - return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + if (swiglu_fused) { + return selector.Default(tune_params(1, 2, 4, 2, 2, 1, 1, EXE_MODE_DEFAULT)); + } else { + return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + } } } } else { // Try to use SLM kernels if possible + unsigned int forced_outer_ofm = swiglu_fused ? 2 : 1; if (preferred_kernel_type != KernelType::DEFAULT) { if (params.is_shape_agnostic && !should_dynamic_quantize(params)) { - selector.Case(tune_params(16, 2, 2, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) - .Case(tune_params(16, 2, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); + selector.Case(tune_params(16, 2, 2, 4, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) + .Case(tune_params(16, 2, 1, 4, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); } - - selector.Case(tune_params(8, 2, 2, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) - .Case(tune_params(8, 2, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); + selector.Case(tune_params(8, 2, 2, 4, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)) + .Case(tune_params(8, 2, 1, 4, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT, KernelType::SLM)); } if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) - return selector.Default(tune_params(8, 1, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT)); + return selector.Default(tune_params(8, 1, 1, 4, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT)); else if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) - return selector.Default(tune_params(8, 4, 1, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + return selector.Default(tune_params(8, 4, 1, 2, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT)); else - return selector.Default(tune_params(8, 2, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT)); + return selector.Default(tune_params(8, 2, 1, 4, forced_outer_ofm, 1, 1, EXE_MODE_DEFAULT)); } } else if (params.compressed && params.engineInfo.supports_immad) { return selector.Default(tune_params(1, 1, 1, 4, 1, 1, 1, EXE_MODE_DEFAULT)); @@ -526,8 +552,12 @@ FullyConnected_bf_tiled::SetDefault(const fully_connected_params& params, int au kernel_type = kernel_number == 0 ? KernelType::DEFAULT : KernelType::SLM; auto tparams = GetAutoTuneParams(params, kernel_type, autoTuneIndex); + std::pair threads; + if (is_swiglu_fused(params)) + threads = get_output_aligned_bf_size(params, true, tparams.tile_b, tparams.tile_ofm * simd); + else + threads = get_output_aligned_bf_size(params, true, tparams.tile_b, tparams.tile_ofm * tparams.outer_ofm * simd); - auto threads = get_output_aligned_bf_size(params, true, tparams.tile_b, tparams.tile_ofm * tparams.outer_ofm * simd); auto batch_threads = threads.first; auto feature_threads = threads.second; @@ -575,6 +605,13 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para size_t tile_k_ofm_packed = tile_k_ofm; size_t quantize_grp_size = get_dynamic_quantize_group_size(params); + if (is_swiglu_fused(params)) { + auto split_length = params.fused_ops[0].GetOpParams()->split_length; + auto split_to_glu_idx = params.fused_ops[0].GetOpParams()->split_to_glu_idx; + jit.AddConstant(MakeJitConstant("SWIGLU_LENGTH", split_length)); + jit.AddConstant(MakeJitConstant("SWIGLU_SPLIT_TO_GLU_IDX", split_to_glu_idx)); + } + bool add_decompress_scale_post_op = false; WeightsType weights_dt = params.weights.GetDType(); if (weights_dt == WeightsType::UINT4 || weights_dt == WeightsType::INT4) { @@ -723,7 +760,7 @@ JitConstants FullyConnected_bf_tiled::GetJitConstants(const fully_connected_para jit.AddConstant(MakeJitConstant("BATCH_SIZE", "(OUTPUT_BATCH_NUM)")); } - if (!params.fused_ops.empty()) { + if (!params.fused_ops.empty() && !is_swiglu_fused(params)) { std::vector idx_order_scalar = { "(out_b + bi)", "(out_f + sglid)", "0", "0" }; std::vector idx_order_vec = { "(out_b + bi)", "(out_f + sglid + fi * SIMD)", "0", "0" }; if (params.outputs[0].GetLayout() == DataLayout::bfyx) { @@ -828,7 +865,7 @@ KernelsData FullyConnected_bf_tiled::GetTunedKernelsDataByIndex(const Params &pa auto output_f = get_output_aligned_bf_size(fc_params, false).second; WeightsLayout weights_layout = WeightsLayout::os_iyx_osv16; - if (fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 + if (!is_swiglu_fused(fc_params) && fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 && (fc_params.weights.GetLayout() == WeightsLayout::oiyx || fc_params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) && (fc_params.weights.GetDType() == WeightsType::INT4 || fc_params.weights.GetDType() == WeightsType::UINT4) && is_weight_horizontal(fc_params, output_f)) { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.h index cbbf52adf344ce..1093c7377bf76f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.h @@ -76,7 +76,8 @@ class FullyConnected_bf_tiled : public FullyConnectedKernelBase { std::vector GetSupportedFusedOps() const override { return { FusedOpType::ACTIVATION, FusedOpType::ELTWISE, - FusedOpType::QUANTIZE }; + FusedOpType::QUANTIZE, + FusedOpType::SWIGLU }; } JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& dispatchData) const override; bool Validate(const Params& params) const override; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/swiglu/swiglu_kernel_base.h b/src/plugins/intel_gpu/src/kernel_selector/kernels/swiglu/swiglu_kernel_base.h index 2f5c046690f78d..bb5625ba087a2d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/swiglu/swiglu_kernel_base.h +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/swiglu/swiglu_kernel_base.h @@ -21,6 +21,17 @@ struct swiglu_params : public base_params { int32_t split_to_glu_idx; }; +struct swiglu_fuse_params : fuse_params { + explicit swiglu_fuse_params(int32_t axis, size_t split_lengths, size_t split_to_glu_idx) + : fuse_params(KernelType::SWIGLU), + axis(axis), + split_length(split_lengths), + split_to_glu_idx(split_to_glu_idx) {} + int32_t axis; + size_t split_length; + size_t split_to_glu_idx; +}; + class SwiGLUKernelBase : public KernelBaseOpenCL { public: using KernelBaseOpenCL::KernelBaseOpenCL; diff --git a/src/plugins/intel_gpu/src/plugin/ops/swiglu.cpp b/src/plugins/intel_gpu/src/plugin/ops/swiglu.cpp index 5df2cafd41a41f..23b44dcc1a4677 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/swiglu.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/swiglu.cpp @@ -21,7 +21,7 @@ static void CreateGLUOp(ProgramBuilder& p, const std::shared_ptr& op) { if (p.use_new_shape_infer()) { auto prim = cldnn::swiglu(primitive_name, inputs[0], - op->get_axis(), + (op->get_axis() < 0 ? op->get_input_partial_shape(0).size() + op->get_axis() : op->get_axis()), op->get_split_lengths(), op->get_glu_type(), op->get_split_to_glu_idx(), @@ -31,7 +31,7 @@ static void CreateGLUOp(ProgramBuilder& p, const std::shared_ptr& op) { } else { auto prim = cldnn::swiglu(primitive_name, inputs[0], - op->get_axis(), + (op->get_axis() < 0 ? op->get_input_partial_shape(0).size() + op->get_axis() : op->get_axis()), op->get_split_lengths(), op->get_glu_type(), op->get_split_to_glu_idx(), diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp index fcb339531c1883..327de1424c34c9 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.cpp @@ -18,16 +18,25 @@ namespace ov { namespace intel_gpu { -FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { +FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion(bool fuse_mlp_swiglu) { using namespace ov::pass::pattern; - auto is_target_pattern = [](const Output& output) { + GPU_DEBUG_GET_INSTANCE(debug_config); + // Three FCs connected to the same input + size_t min_num_fcs_to_fuse = 3; + // Note: + // For cldnn, two fcs in mlp will be fused at horizontal fc fusion, and then swiglu will be fused at prepare_primitive_fusion + // i.e., eltwise((fc + swish), fc) => fused_fc + swiglu => fused_fc_swilgu + // Onednn gemms are to be handled in a different way (TBD) + if (fuse_mlp_swiglu) + min_num_fcs_to_fuse = 2; + auto is_target_pattern = [min_num_fcs_to_fuse](const Output& output) { + const int max_num_fcs_to_fuse = 3; // Currently this pass targets only compressed FCs (QKV) on dynamic generative models // inputs: input, weight, bias, scale, [zp] // Bias/scale/zp are constant or none // if it is not constant, the only allowed cases are Constant => convert // All FCs have same # of valid inputs (e.g., if one of the fc has zp, all fcs have zp) - auto is_constant = [](const std::shared_ptr node) { if (std::dynamic_pointer_cast(node)) return true; @@ -40,9 +49,7 @@ FullyConnectedHorizontalFusion::FullyConnectedHorizontalFusion() { auto is_placeholder = [](const std::shared_ptr node) { return std::dynamic_pointer_cast(node); }; - // Three FCs connected to the same input - const int min_num_fcs_to_fuse = 3; - const int max_num_fcs_to_fuse = 3; + const auto& fc = std::dynamic_pointer_cast(output.get_node_shared_ptr()); const auto& input = fc->get_input_node_shared_ptr(0); if (!fc->get_input_partial_shape(0).is_dynamic()) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.hpp b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.hpp index b6a852354bad8d..67abaa3df54357 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/fc_horizontal_fusion.hpp @@ -12,7 +12,7 @@ namespace intel_gpu { class FullyConnectedHorizontalFusion: public ov::pass::MatcherPass { public: OPENVINO_RTTI("FullyConnectedHorizontalFusion", "0"); - FullyConnectedHorizontalFusion(); + FullyConnectedHorizontalFusion(bool fuse_mlp_swiglu = false); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index fcb88560944854..e47ccbb09a9c43 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -913,12 +913,18 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); bool disable_horizontal_fc_fusion = false; + bool disable_fc_swiglu_fusion = false; GPU_DEBUG_GET_INSTANCE(debug_config); GPU_DEBUG_IF(debug_config->disable_horizontal_fc_fusion == 1) disable_horizontal_fc_fusion = true; - + GPU_DEBUG_IF(debug_config->disable_fc_swiglu_fusion == 1) + disable_fc_swiglu_fusion = true; + // mlp fusion is only supported for cldnn on high performant GPUis + bool fuse_mlp_swiglu = !device_info.supports_immad && + device_info.execution_units_count >= 128 && + !disable_fc_swiglu_fusion; if (!disable_horizontal_fc_fusion) - manager.register_pass(); + manager.register_pass(fuse_mlp_swiglu); // ZP should not be folded for FC. But still, ZP should be folded for Gather. // Therefore, run MarkDequantizationSubgraph again to fold ZP constant. diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 4a68355e1bc8ba..65ca31f16c720c 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -191,6 +191,7 @@ static void print_help_messages() { message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable " "dynamic quantization for Fully-connected primitive."); message_list.emplace_back("OV_GPU_DisableHorizontalFCFusion", "Disable horizontal fc fusion"); + message_list.emplace_back("OV_GPU_DisableFCSwigluFusion", "Disable fc + swiglu fusion"); message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); message_list.emplace_back("OV_GPU_MemPreallocationOptions", "Controls buffer pre-allocation feature. Expects 4 values separated by space in " "the following order: number of iterations for pre-allocation(int), max size of single iteration in bytes(int), " @@ -259,7 +260,8 @@ debug_configuration::debug_configuration() , use_usm_host(0) , use_kv_cache_compression(-1) , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) - , disable_horizontal_fc_fusion(0) { + , disable_horizontal_fc_fusion(0) + , disable_fc_swiglu_fusion(0) { #ifdef GPU_DEBUG_CONFIG get_gpu_debug_env_var("Help", help); get_common_debug_env_var("Verbose", verbose); @@ -314,6 +316,7 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression); get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size); get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion); + get_gpu_debug_env_var("DisableFCSwigluFusion", disable_fc_swiglu_fusion); std::string dump_iteration_str; get_gpu_debug_env_var("DumpIteration", dump_iteration_str); std::string mem_preallocation_params_str; diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp index 5e9b5134fb3802..09e164742f3fd9 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fully_connected_fusion_test.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -73,7 +74,7 @@ class FullyConnectedFusingTest : public ::BaseFusingTest{ + fully_connected_test_params{ CASE_FC_FP16_INT4_SWIGLU_1, 2, 3 }, + fully_connected_test_params{ CASE_FC_FP16_INT4_SWIGLU_2, 2, 3 }, + fully_connected_test_params{ CASE_FC_FP16_INT4_SWIGLU_3, 2, 3 }, +})); + class fc_imad_int8_eltwise_add_ocl_dynamic : public FullyConnectedFusingTest { public: void run_test() { diff --git a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp index eb0f63c651e50d..c469925083b775 100644 --- a/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp +++ b/src/plugins/intel_gpu/tests/unit/fusions/fusion_test_common.hpp @@ -147,6 +147,12 @@ class BaseFusingTest : public ::testing::TestWithParam { } else if (l.data_type == data_types::i8) { VF rnd_vec(s.count(), static_cast(fill_value)); set_values(prim, rnd_vec); + } else if (l.data_type == data_types::u4) { + VF rnd_vec(s.count()/2, static_cast(fill_value)); + set_values(prim, rnd_vec); + } else if (l.data_type == data_types::i4) { + VF rnd_vec(s.count()/2, static_cast(fill_value)); + set_values(prim, rnd_vec); } else { throw std::runtime_error("get_mem: Unsupported precision"); } @@ -186,6 +192,12 @@ class BaseFusingTest : public ::testing::TestWithParam { } else if (l.data_type == data_types::u8) { VF rnd_vec = rg.generate_random_1d(s.count(), min, max); set_values(prim, rnd_vec); + } else if (l.data_type == data_types::i4) { + VF rnd_vec = rg.generate_random_1d(s.count()/2, min, max); + set_values(prim, rnd_vec); + } else if (l.data_type == data_types::u4) { + VF rnd_vec = rg.generate_random_1d(s.count()/2, min, max); + set_values(prim, rnd_vec); } return prim; From 94f647dc5f0e3a349aadabf4ae377aa6a2b063b4 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 6 Dec 2024 16:50:59 +0100 Subject: [PATCH 17/43] [core] Extend Core API to accept std::filesystem::path when build with cpp17 (#27950) ### Details: - The `ov::Core` accepts `std::filesytem::path` in functions where string as path is used. ### Tickets: - CVS-157908 --------- Signed-off-by: Pawel Raasz --- .../shape_inference/include/ov_optional.hpp | 4 ++ src/core/tests/pattern.cpp | 6 +- .../include/openvino/runtime/core.hpp | 50 ++++++++++++++ .../tests/functional/ov_core_test.cpp | 69 +++++++++++++++++-- .../tests/functional/ov_extension_test.cpp | 6 ++ 5 files changed, 126 insertions(+), 9 deletions(-) diff --git a/src/core/shape_inference/include/ov_optional.hpp b/src/core/shape_inference/include/ov_optional.hpp index f7f8b474f9a5a6..15973ae0c8a5f8 100644 --- a/src/core/shape_inference/include/ov_optional.hpp +++ b/src/core/shape_inference/include/ov_optional.hpp @@ -7,6 +7,9 @@ #include namespace ov { +#ifdef OPENVINO_CPP_17_VER +using optional = std::optional; +#else /** * @brief Store optional object of type T (basic version of std::optional). @@ -132,4 +135,5 @@ class optional { bool m_has_value = false; Storage m_opt{}; }; +#endif } // namespace ov diff --git a/src/core/tests/pattern.cpp b/src/core/tests/pattern.cpp index 050c36b65baad1..982e59b55f0f97 100644 --- a/src/core/tests/pattern.cpp +++ b/src/core/tests/pattern.cpp @@ -558,8 +558,8 @@ TEST(pattern, multiple_optionals_in_row) { // Pattern: auto in = wrap_type(); - auto pattern_convert = optional(in); - auto pattern_relu = optional(pattern_convert); + auto pattern_convert = pattern::optional(in); + auto pattern_relu = pattern::optional(pattern_convert); auto pattern_sigmoid = wrap_type({pattern_relu}); // Test: @@ -1255,4 +1255,4 @@ TEST(pattern, pattern_optional_root) { // Should perfectly match ASSERT_TRUE(tm.match(pattern_relu, model_relu)); -} \ No newline at end of file +} diff --git a/src/inference/include/openvino/runtime/core.hpp b/src/inference/include/openvino/runtime/core.hpp index f0ba27c1cf5daa..c13432d664e736 100644 --- a/src/inference/include/openvino/runtime/core.hpp +++ b/src/inference/include/openvino/runtime/core.hpp @@ -25,6 +25,10 @@ #include "openvino/runtime/remote_context.hpp" #include "openvino/runtime/tensor.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { /** @@ -95,9 +99,18 @@ class OPENVINO_RUNTIME_API Core { * * TF (*.pb) * * TFLite (*.tflite) * @return A model. + * @{ */ std::shared_ptr read_model(const std::string& model_path, const std::string& bin_path = {}) const; +#ifdef OPENVINO_CPP_VER_17 + template >* = nullptr> + std::shared_ptr read_model(const Path& model_path, const Path& bin_path = {}) const { + return read_model(model_path.string(), bin_path.string()); + } +#endif + /// @} + /** * @brief Reads models from IR / ONNX / PDPD / TF / TFLite formats. * @param model String with a model in IR / ONNX / PDPD / TF / TFLite format. @@ -197,6 +210,13 @@ class OPENVINO_RUNTIME_API Core { */ CompiledModel compile_model(const std::string& model_path, const AnyMap& properties = {}); +#ifdef OPENVINO_CPP_VER_17 + template >* = nullptr> + auto compile_model(const Path& model_path, const AnyMap& properties = {}) const { + return compile_model(model_path.string(), properties); + } +#endif + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT CompiledModel compile_model(const std::wstring& model_path, const AnyMap& properties = {}); #endif @@ -223,6 +243,13 @@ class OPENVINO_RUNTIME_API Core { return compile_model(model_path, AnyMap{std::forward(properties)...}); } +#ifdef OPENVINO_CPP_VER_17 + template >* = nullptr> + auto compile_model(const Path& model_path, Properties&&... properties) { + return compile_model(model_path.string(), std::forward(properties)...); + } +#endif + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT template util::EnableIfAllStringAny compile_model(const std::wstring& model_path, @@ -250,6 +277,13 @@ class OPENVINO_RUNTIME_API Core { const std::string& device_name, const AnyMap& properties = {}); +#ifdef OPENVINO_CPP_VER_17 + template >* = nullptr> + auto compile_model(const Path& model_path, const std::string& device_name, const AnyMap& properties = {}) { + return compile_model(model_path.string(), device_name, properties); + } +#endif + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT CompiledModel compile_model(const std::wstring& model_path, const std::string& device_name, @@ -279,6 +313,13 @@ class OPENVINO_RUNTIME_API Core { return compile_model(model_path, device_name, AnyMap{std::forward(properties)...}); } +#ifdef OPENVINO_CPP_VER_17 + template >* = nullptr> + auto compile_model(const Path& model_path, const std::string& device_name, Properties&&... properties) { + return compile_model(model_path.string(), device_name, std::forward(properties)...); + } +#endif + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT template util::EnableIfAllStringAny compile_model(const std::wstring& model_path, @@ -359,9 +400,18 @@ class OPENVINO_RUNTIME_API Core { /** * @brief Registers an extension to a Core object. * @param library_path Path to the library with ov::Extension. + * @{ */ void add_extension(const std::string& library_path); +#ifdef OPENVINO_CPP_VER_17 + template >* = nullptr> + void add_extension(const Path& model_path) { + add_extension(model_path.string()); + } +#endif + /// @} + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT /** * @brief Registers an extension to a Core object. diff --git a/src/inference/tests/functional/ov_core_test.cpp b/src/inference/tests/functional/ov_core_test.cpp index 26eb38e3fd13e5..60f91b85b3338a 100644 --- a/src/inference/tests/functional/ov_core_test.cpp +++ b/src/inference/tests/functional/ov_core_test.cpp @@ -8,9 +8,26 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" +#include "functional_test_utils/test_model/test_model.hpp" #include "openvino/runtime/core.hpp" #include "openvino/util/file_util.hpp" +class CoreBaseTest : public testing::Test { +protected: + void generate_test_model_files(const std::string& name) { + auto prefix = ov::test::utils::generateTestFilePrefix(); + model_file_name = prefix + name + ".xml"; + weight_file_name = prefix + name + ".bin"; + ov::test::utils::generate_test_model(model_file_name, weight_file_name); + } + + void TearDown() override { + ov::test::utils::removeIRFiles(model_file_name, weight_file_name); + } + + std::string model_file_name, weight_file_name; +}; + #ifndef OPENVINO_STATIC_LIBRARY static void create_plugin_xml(const std::string& file_name, const std::string& plugin_name = "1") { @@ -33,7 +50,7 @@ static void remove_plugin_xml(const std::string& file_name) { ov::test::utils::removeFile(file_name); } -TEST(CoreBaseTest, LoadPluginXML) { +TEST_F(CoreBaseTest, LoadPluginXML) { std::string xml_file_name = "test_plugin.xml"; std::string xml_file_path = ov::test::utils::getOpenvinoLibDirectory() + ov::util::FileTraits::file_separator + xml_file_name; @@ -42,7 +59,7 @@ TEST(CoreBaseTest, LoadPluginXML) { remove_plugin_xml(xml_file_path); } -TEST(CoreBaseTest, LoadPluginDifferentXMLExtension) { +TEST_F(CoreBaseTest, LoadPluginDifferentXMLExtension) { std::string xml_file_name = "test_plugin.test"; std::string xml_file_path = ov::test::utils::getOpenvinoLibDirectory() + ov::util::FileTraits::file_separator + xml_file_name; @@ -51,7 +68,7 @@ TEST(CoreBaseTest, LoadPluginDifferentXMLExtension) { remove_plugin_xml(xml_file_path); } -TEST(CoreBaseTest, LoadAbsoluteOVPathPluginXML) { +TEST_F(CoreBaseTest, LoadAbsoluteOVPathPluginXML) { std::string xml_file_name = "test_plugin.xml"; std::string xml_file_path = ov::test::utils::getOpenvinoLibDirectory() + ov::util::FileTraits::file_separator + xml_file_name; @@ -60,7 +77,7 @@ TEST(CoreBaseTest, LoadAbsoluteOVPathPluginXML) { remove_plugin_xml(xml_file_path); } -TEST(CoreBaseTest, LoadAbsoluteCWPathPluginXML) { +TEST_F(CoreBaseTest, LoadAbsoluteCWPathPluginXML) { std::string xml_file_name = "test_plugin.xml"; std::string xml_file_path = ov::test::utils::getCurrentWorkingDir() + ov::util::FileTraits::file_separator + xml_file_name; @@ -69,7 +86,7 @@ TEST(CoreBaseTest, LoadAbsoluteCWPathPluginXML) { remove_plugin_xml(xml_file_path); } -TEST(CoreBaseTest, LoadRelativeCWPathPluginXML) { +TEST_F(CoreBaseTest, LoadRelativeCWPathPluginXML) { std::string xml_file_name = "test_plugin.xml"; std::string xml_file_path = ov::test::utils::getCurrentWorkingDir() + ov::util::FileTraits::file_separator + xml_file_name; @@ -78,7 +95,7 @@ TEST(CoreBaseTest, LoadRelativeCWPathPluginXML) { remove_plugin_xml(xml_file_path); } -TEST(CoreBaseTest, LoadOVFolderOverCWPathPluginXML) { +TEST_F(CoreBaseTest, LoadOVFolderOverCWPathPluginXML) { std::string xml_file_name = "test_plugin.xml"; std::string cwd_file_path = ov::test::utils::getCurrentWorkingDir() + ov::util::FileTraits::file_separator + xml_file_name; @@ -96,3 +113,43 @@ TEST(CoreBaseTest, LoadOVFolderOverCWPathPluginXML) { } #endif + +#if defined(OPENVINO_CPP_VER_17) && defined(ENABLE_OV_IR_FRONTEND) +namespace ov::test { +TEST_F(CoreBaseTest, read_model_with_std_fs_path) { + generate_test_model_files("test-model"); + + const auto model_path = std::filesystem::path(model_file_name); + const auto weight_path = std::filesystem::path(weight_file_name); + + ov::Core core; + { + const auto model = core.read_model(model_path); + EXPECT_NE(model, nullptr); + } + { + const auto model = core.read_model(model_path, weight_path); + EXPECT_NE(model, nullptr); + } +} + +TEST_F(CoreBaseTest, compile_model_with_std_fs_path) { + generate_test_model_files("model2"); + + const auto model_path = std::filesystem::path(model_file_name); + const auto weight_path = std::filesystem::path(weight_file_name); + + ov::Core core; + { + const auto model = core.compile_model(model_path); + EXPECT_TRUE(model); + } + { + const auto devices = core.get_available_devices(); + + const auto model = core.compile_model(model_path, devices.at(0), ov::AnyMap{}); + EXPECT_TRUE(model); + } +} +} // namespace ov::test +#endif diff --git a/src/inference/tests/functional/ov_extension_test.cpp b/src/inference/tests/functional/ov_extension_test.cpp index 6f93a8acdaf2fa..b840c430d092e9 100644 --- a/src/inference/tests/functional/ov_extension_test.cpp +++ b/src/inference/tests/functional/ov_extension_test.cpp @@ -82,6 +82,12 @@ class CustomReLU : public ov::op::Op { }; #if defined(ENABLE_OV_IR_FRONTEND) +# ifdef OPENVINO_CPP_VER_17 +TEST_F(OVExtensionTests, ReshapeIRWithNewExtensionsPathLib) { + core.add_extension(std::filesystem::path(getOVExtensionPath())); + test(); +} +# endif TEST_F(OVExtensionTests, ReshapeIRWithNewExtensionsLib) { core.add_extension(getOVExtensionPath()); From e8fa9f7b84d1d19e4581f56ef4dd8e88934b878e Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Fri, 6 Dec 2024 20:29:49 +0400 Subject: [PATCH 18/43] [TF FE] Run HSVToRGB tests on all platforms (#27945) **Details:** Run HSVToRGB tests on all platforms **Ticket:** TBD --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow_tests/test_tf_HSVToRGB.py | 53 +++++++------------ 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_HSVToRGB.py b/tests/layer_tests/tensorflow_tests/test_tf_HSVToRGB.py index 9f3ab9845fb24f..17df8c52430ec5 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_HSVToRGB.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_HSVToRGB.py @@ -1,32 +1,28 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest +rng = np.random.default_rng(23345) + + class TestHSVToRGB(CommonTFLayerTest): def _prepare_input(self, inputs_info): assert 'images:0' in inputs_info - if self.special_case == "Black Image": - images_shape = inputs_info['images:0'] - inputs_data = {} - inputs_data['images:0'] = np.zeros(images_shape).astype(self.input_type) - elif self.special_case == "Grayscale Image": - images_shape = inputs_info['images:0'] - inputs_data = {} + images_shape = inputs_info['images:0'] + inputs_data = {} + if self.special_case == 'Black Image': + inputs_data['images:0'] = np.zeros(images_shape).astype(self.input_type) + elif self.special_case == 'Grayscale Image': inputs_data['images:0'] = np.broadcast_to([0, 0, 0.5], images_shape).astype(self.input_type) else: - images_shape = inputs_info['images:0'] - inputs_data = {} - inputs_data['images:0'] = np.random.rand(*images_shape).astype(self.input_type) - + inputs_data['images:0'] = rng.uniform(0.0, 1.0, images_shape).astype(self.input_type) return inputs_data - def create_hsv_to_rgb_net(self, input_shape, input_type, special_case=False): + def create_hsv_to_rgb_net(self, input_shape, input_type, special_case): self.special_case = special_case self.input_type = input_type tf.compat.v1.reset_default_graph() @@ -39,27 +35,16 @@ def create_hsv_to_rgb_net(self, input_shape, input_type, special_case=False): return tf_net, None - # Each input is a tensor of with values in [0,1]. - # The last dimension must be size 3. - test_data_basic = [ - dict(input_shape=[7, 7, 3], input_type=np.float32, special_case="Black Image"), - dict(input_shape=[7, 7, 3], input_type=np.float32, special_case="Grayscale Image"), - dict(input_shape=[5, 5, 3], input_type=np.float32), - dict(input_shape=[5, 23, 27, 3], input_type=np.float64), - dict(input_shape=[3, 4, 13, 15, 3], input_type=np.float64), - ] - - @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.parametrize('input_shape', [[3], [5, 3], [4, 5, 3], [5, 21, 21, 3]]) + @pytest.mark.parametrize('input_type', [np.float16, np.float32, np.float64]) + @pytest.mark.parametrize('special_case', [None, 'Black Image', 'Grayscale Image']) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='Ticket - 126314, 132699') - def test_hsv_to_rgb_basic(self, params, ie_device, precision, ir_version, temp_dir, - use_legacy_frontend): + def test_hsv_to_rgb_basic(self, input_shape, input_type, special_case, + ie_device, precision, ir_version, temp_dir, + use_legacy_frontend): if ie_device == 'GPU': - pytest.skip("Accuracy mismatch on GPU") - self._test(*self.create_hsv_to_rgb_net(**params), + pytest.skip('158898: accuracy issue on GPU') + self._test(*self.create_hsv_to_rgb_net(input_shape, input_type, special_case), ie_device, precision, ir_version, temp_dir=temp_dir, - use_legacy_frontend=use_legacy_frontend) + use_legacy_frontend=use_legacy_frontend, custom_eps=3 * 1e-3) From b840082ac11b1608f349d9554b020498c328164f Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Mon, 9 Dec 2024 14:09:30 +0900 Subject: [PATCH 19/43] [GPU] Integrate dynamic quantization for onednn (#26940) ### Details: - Integrated grouped dynamic quantization from onednn - Integrated asymmetric per-token dynamic quantization from onednn - Those are not enabled by default, yet ### Tickets: - 148732, 157869, 157589 --- .../op/fully_connected_compressed.hpp | 1 + .../intel_gpu/primitives/dynamic_quantize.hpp | 13 +- .../intel_gpu/primitives/fully_connected.hpp | 18 +++ .../intel_gpu/runtime/debug_configuration.hpp | 1 + .../prepare_primitive_fusing.cpp | 2 + .../src/graph/impls/ocl/dynamic_quantize.cpp | 8 +- .../impls/onednn/fully_connected_onednn.cpp | 47 +++++-- .../impls/onednn/fully_connected_onednn.hpp | 2 +- .../cl_kernels/dynamic_quantize_gpu_opt.cl | 133 ++++++++++++++++-- .../cl_kernels/dynamic_quantize_gpu_ref.cl | 50 ++++--- .../dynamic_quantize_kernel_opt.cpp | 56 +++++--- .../dynamic_quantize_kernel_ref.cpp | 18 ++- .../fully_connected_kernel_bf_tiled.cpp | 20 +-- .../src/plugin/ops/dynamic_quantize.cpp | 3 +- .../src/plugin/ops/fully_connected.cpp | 4 +- .../intel_gpu/src/plugin/program_builder.cpp | 4 + .../dynamic_quantize_fully_connected.cpp | 30 ++-- .../op/fully_connected_compressed.cpp | 5 +- .../src/plugin/transformations_pipeline.cpp | 22 ++- .../src/runtime/debug_configuration.cpp | 3 + .../src/runtime/execution_config.cpp | 7 +- .../dynamic/matmul_weights_decompression.cpp | 33 +++-- .../test_cases/dynamic_quantize_gpu_test.cpp | 61 +++++--- .../test_cases/fully_connected_gpu_test.cpp | 24 ++-- .../unit/test_cases/hash_key_gpu_test.cpp | 8 +- 25 files changed, 420 insertions(+), 153 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp index 1112a3785317a3..e58c6ab4cb17f1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/op/fully_connected_compressed.hpp @@ -22,6 +22,7 @@ class FullyConnectedCompressed : public FullyConnected { const ov::Output &w_decompression_scale, const ov::Output &w_decompression_zero_point, const ov::Output &a_decompression_scale, + const ov::Output &a_decompression_zero_point, const ov::element::Type output_type = ov::element::undefined); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp index 79af223e32cdaa..8dd1ebf2809782 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/dynamic_quantize.hpp @@ -26,9 +26,11 @@ struct dynamic_quantize : public primitive_base { /// @param output_size Output data size of the primitive dynamic_quantize(const primitive_id& id, const input_info& input, - const Attributes& attrs) + const Attributes& attrs, + const size_t input_size = 3) : primitive_base(id, {input}) - , attrs(attrs) { + , attrs(attrs) + , input_size(input_size) { num_outputs = 2; if (attrs.quantization_type == ov::op::internal::DynamicQuantize::QuantizationType::Asymmetric && attrs.output_storage_type == ov::op::internal::DynamicQuantize::OutputStorageType::Planar) @@ -36,6 +38,7 @@ struct dynamic_quantize : public primitive_base { } Attributes attrs; + size_t input_size; size_t hash() const override { size_t seed = primitive::hash(); @@ -46,6 +49,7 @@ struct dynamic_quantize : public primitive_base { seed = hash_combine(seed, attrs.scale_dt.hash()); seed = hash_combine(seed, attrs.zp_dt.hash()); seed = hash_combine(seed, attrs.output_storage_type); + seed = hash_combine(seed, input_size); return seed; } @@ -62,7 +66,8 @@ struct dynamic_quantize : public primitive_base { attrs.quantization_dt == rhs_casted.attrs.quantization_dt && attrs.scale_dt == rhs_casted.attrs.scale_dt && attrs.zp_dt == rhs_casted.attrs.zp_dt && - attrs.quantization_type == rhs_casted.attrs.quantization_type;; + attrs.quantization_type == rhs_casted.attrs.quantization_type && + input_size == rhs_casted.input_size; } void save(BinaryOutputBuffer& ob) const override { @@ -75,6 +80,7 @@ struct dynamic_quantize : public primitive_base { ob << make_data(&attrs.output_storage_type, sizeof(attrs.output_storage_type)); ob << attrs.scales_zp_output_order; ob << attrs.group_sizes; + ob << input_size; } void load(BinaryInputBuffer& ib) override { @@ -87,6 +93,7 @@ struct dynamic_quantize : public primitive_base { ib >> make_data(&attrs.output_storage_type, sizeof(attrs.output_storage_type)); ib >> attrs.scales_zp_output_order; ib >> attrs.group_sizes; + ib >> input_size; } }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp index e39078cb1011cc..0819a39534696d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fully_connected.hpp @@ -96,6 +96,7 @@ struct fully_connected : public primitive_base { decompression_scale(decompression_scale), decompression_zero_point(decompression_zero_point), dynamic_quantized_activation(false), + dynamic_quantized_activation_zp(false), input_size(input_size), weights_rank(weights_rank) { OPENVINO_ASSERT(!decompression_scale.empty(), "[GPU] Compressed fully connected requires at least decompression scale input"); @@ -109,6 +110,7 @@ struct fully_connected : public primitive_base { /// @param compression_scale Primitive id containing scale factors for weights decompression. /// @param compression_zero_point Primitive id containing zero points for weights decompression. /// @param activation_scale Primitive id containing scale factor for activation. + /// @param activation_zero_point Primitive id containing zero point for activation. fully_connected(const primitive_id& id, const input_info& input, const primitive_id& weights, @@ -116,6 +118,7 @@ struct fully_connected : public primitive_base { const primitive_id& decompression_scale, const primitive_id& decompression_zero_point, const input_info& activation_scale, + const input_info& activation_zero_point, const data_types data_type, const size_t input_size = 2, const size_t weights_rank = 2) @@ -126,11 +129,15 @@ struct fully_connected : public primitive_base { decompression_scale(decompression_scale), decompression_zero_point(decompression_zero_point), dynamic_quantized_activation(false), + dynamic_quantized_activation_zp(false), activation_scale(activation_scale), + activation_zero_point(activation_zero_point), input_size(input_size), weights_rank(weights_rank) { if (activation_scale.is_valid()) dynamic_quantized_activation = true; + if (activation_zero_point.is_valid()) + dynamic_quantized_activation_zp = true; OPENVINO_ASSERT(!decompression_scale.empty(), "[GPU] Compressed fully connected requires at least decompression scale input"); } @@ -144,7 +151,9 @@ struct fully_connected : public primitive_base { primitive_id decompression_scale = ""; primitive_id decompression_zero_point = ""; bool dynamic_quantized_activation = false; + bool dynamic_quantized_activation_zp = false; input_info activation_scale = {"", 0}; + input_info activation_zero_point = {"", 0}; optional_value decompression_zero_point_scalar = optional_value(); /// @brief Primitive dimension size. @@ -161,6 +170,7 @@ struct fully_connected : public primitive_base { seed = hash_combine(seed, !decompression_scale.empty()); seed = hash_combine(seed, !decompression_zero_point.empty()); seed = hash_combine(seed, activation_scale.is_valid()); + seed = hash_combine(seed, activation_zero_point.is_valid()); seed = hash_combine(seed, decompression_zero_point_scalar.has_value()); seed = hash_combine(seed, decompression_zero_point_scalar.value_or(0.0f)); return seed; @@ -179,6 +189,7 @@ struct fully_connected : public primitive_base { decompression_scale.empty() == rhs_casted.decompression_scale.empty() && decompression_zero_point.empty() == rhs_casted.decompression_zero_point.empty() && activation_scale.is_valid() == rhs_casted.activation_scale.is_valid() && + activation_zero_point.is_valid() == rhs_casted.activation_zero_point.is_valid() && decompression_zero_point_scalar.value_or(0.0f) == rhs_casted.decompression_zero_point_scalar.value_or(0.0f); } @@ -190,9 +201,11 @@ struct fully_connected : public primitive_base { ob << decompression_scale; ob << decompression_zero_point; ob << activation_scale; + ob << activation_zero_point; ob << input_size; ob << weights_rank; ob << dynamic_quantized_activation; + ob << dynamic_quantized_activation_zp; if (decompression_zero_point_scalar.has_value()) { ob << true; @@ -211,9 +224,11 @@ struct fully_connected : public primitive_base { ib >> decompression_scale; ib >> decompression_zero_point; ib >> activation_scale; + ib >> activation_zero_point; ib >> input_size; ib >> weights_rank; ib >> dynamic_quantized_activation; + ib >> dynamic_quantized_activation_zp; bool has_value; ib >> has_value; @@ -243,6 +258,9 @@ struct fully_connected : public primitive_base { if (activation_scale.is_valid()) ret.push_back(activation_scale); + if (activation_zero_point.is_valid()) + ret.push_back(activation_zero_point); + return ret; } }; diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp index a7a8ae1f229a72..52d828353fa155 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/debug_configuration.hpp @@ -146,6 +146,7 @@ class debug_configuration { std::vector dynamic_quantize_layers_without_onednn; // Specify Fully-connected layers which enable Dynamic quantization int use_kv_cache_compression; // Enable KV-cache compression int dynamic_quantize_group_size; // Enable Dynamic quantization for fully connected primitive by specified group size + int dynamic_quantize_asym; // Use asymmetric dynamic quantization int disable_horizontal_fc_fusion; // Disable fc horizontal fusion int disable_fc_swiglu_fusion; // Disable swiglu fusion to fc std::set dump_iteration; // Dump n-th execution of network. diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 29b7cf58a19b54..93f0905b3a1ef7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -463,7 +463,9 @@ void prepare_primitive_fusing::fuse_bias(program &p) { if (desc->decompression_zero_point_scalar.has_value()) fc_with_bias_prim->decompression_zero_point_scalar = desc->decompression_zero_point_scalar.value(); fc_with_bias_prim->activation_scale = desc->activation_scale; + fc_with_bias_prim->activation_zero_point = desc->activation_zero_point; fc_with_bias_prim->dynamic_quantized_activation = desc->dynamic_quantized_activation; + fc_with_bias_prim->dynamic_quantized_activation_zp = desc->dynamic_quantized_activation_zp; } auto& new_fc_node = p.get_or_create(fc_with_bias_prim); fuse_bias_f(fc, new_fc_node, bias_node, eltw_node); diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp index b9fe00ac525720..ca628a48ac76e0 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/dynamic_quantize.cpp @@ -35,6 +35,7 @@ struct dynamic_quantize_impl : typed_primitive_impl_ocl { static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) { auto params = get_default_params(impl_param, is_shape_agnostic); + const auto& primitive = impl_param.typed_desc(); params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(1))); // In Some model, the feature size could be dynamic in input0. @@ -48,6 +49,10 @@ struct dynamic_quantize_impl : typed_primitive_impl_ocl { if (impl_param.output_layouts.size() > 2) params.outputs.push_back(convert_data_tensor(impl_param.get_output_layout(2))); + // Keep 2d data as bf layout + if (primitive->input_size == 2) + params.outputs[0] = params.outputs[0].FlattenFeatureAndSpatials(); + const auto& desc = impl_param.typed_desc(); params.group_sizes = desc->attrs.group_sizes; params.scales_output_order = desc->attrs.scales_zp_output_order; @@ -68,7 +73,8 @@ namespace detail { attach_dynamic_quantize_impl::attach_dynamic_quantize_impl() { auto types = { data_types::f16, - data_types::i8 + data_types::i8, + data_types::u8 }; auto formats = { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 6b93b279129812..6cca9848af3472 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -83,10 +83,16 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { if (prim->activation_scale.is_valid()) { auto activation_scale_idx = idx++; auto act_scale_mem = instance.dep_memory_ptr(activation_scale_idx); - // TODO: handle group_size here - dnnl::memory::desc desc = onednn::layout_to_memory_desc(act_scale_mem->get_layout(), dnnl::memory::format_tag::a, true); + dnnl::memory::desc desc = onednn::layout_to_memory_desc(act_scale_mem->get_layout(), dnnl::memory::format_tag::ab, true); args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0, act_scale_mem->get_onednn_memory(desc)}); } + + if (prim->activation_zero_point.is_valid()) { + auto activation_zp_idx = idx++; + auto act_zp_mem = instance.dep_memory_ptr(activation_zp_idx); + dnnl::memory::desc desc = onednn::layout_to_memory_desc(act_zp_mem->get_layout(), dnnl::memory::format_tag::ab, true); + args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC_0, act_zp_mem->get_onednn_memory(desc)}); + } } return args; @@ -245,6 +251,7 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { ob << has_bias; ob << is_compressed; ob << prim->dynamic_quantized_activation; + ob << prim->dynamic_quantized_activation_zp; bool has_decompression_scale = !prim->decompression_scale.empty(); if (has_decompression_scale) { @@ -271,10 +278,12 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { bool has_bias = false; bool is_compressed = false; bool dynamic_quantized_activation; + bool dynamic_quantized_activation_zp; ib >> input_size; ib >> has_bias; ib >> is_compressed; ib >> dynamic_quantized_activation; + ib >> dynamic_quantized_activation_zp; const kernel_impl_params* impl_params = reinterpret_cast(ib.getKernelImplParams()); auto prim = impl_params->typed_desc(); @@ -293,11 +302,12 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { bool has_decompression_zp = !prim->decompression_zero_point.empty() || prim->decompression_zero_point_scalar.has_value(); auto& arg = impl_params->get_program().get_node(impl_params->desc->id).as(); - int idx = !arg.bias_term() ? 3 : 4; + int idx = !arg.bias_term() ? 2 : 3; if (has_decompression_zp) { ib >> make_data(&_dzp_data_type, sizeof(dnnl::memory::data_type)); - auto dzp_layout = arg.get_dependency(idx++).get_output_layout(); + auto decompression_zp_idx = ++idx; + auto dzp_layout = arg.get_dependency(decompression_zp_idx).get_output_layout(); if (dzp_layout.count() == 1) { _attrs->set_zero_points(DNNL_ARG_WEIGHTS, COMMON, dnnl::memory::dims{}, _dzp_data_type); @@ -312,12 +322,17 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { } if (dynamic_quantized_activation) { - // TODO: it supports per-token activation scale only + auto src_scale_idx = ++idx; auto partial_shape = impl_params->get_input_layout(0).get_partial_shape(); auto innermost_len = partial_shape[partial_shape.size() - 1].get_length(); - - auto act_scale_data_type = convert_data_type(impl_params->get_input_layout(idx).data_type); - _attrs->set_scales(DNNL_ARG_SRC, GROUPED, dnnl::memory::dims{1, innermost_len}, act_scale_data_type); + auto& src_scale_shape = impl_params->input_layouts[src_scale_idx].get_partial_shape(); + int src_scale_ngroups = src_scale_shape[src_scale_shape.size() - 1].get_length(); + int src_group_size = innermost_len / src_scale_ngroups; + + auto act_scale_data_type = convert_data_type(impl_params->get_input_layout(src_scale_idx).data_type); + _attrs->set_scales(DNNL_ARG_SRC, GROUPED, dnnl::memory::dims{1, src_group_size}, act_scale_data_type); + if (dynamic_quantized_activation_zp) + _attrs->set_zero_points(DNNL_ARG_SRC, GROUPED, dnnl::memory::dims{1, src_group_size}, dnnl::memory::data_type::u8); } if (is_compressed) { @@ -387,15 +402,21 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { } if (prim->dynamic_quantized_activation) { - // Note: it supports per-token activation scale only - ++idx; - auto partial_shape = impl_params.input_layouts[0].get_partial_shape(); + auto src_scale_idx = ++idx; + auto& partial_shape = impl_params.input_layouts[0].get_partial_shape(); auto innermost_len = partial_shape[partial_shape.size() - 1].get_length(); + auto& src_scale_shape = impl_params.input_layouts[src_scale_idx].get_partial_shape(); + int src_scale_ngroups = src_scale_shape[src_scale_shape.size() - 1].get_length(); + int src_group_size = innermost_len / src_scale_ngroups; - auto act_scale_data_type = convert_data_type(impl_params.input_layouts[idx].data_type); - attr->set_scales(DNNL_ARG_SRC, GROUPED, dnnl::memory::dims{1, innermost_len}, act_scale_data_type); + auto act_scale_data_type = convert_data_type(impl_params.input_layouts[src_scale_idx].data_type); + attr->set_scales(DNNL_ARG_SRC, GROUPED, dnnl::memory::dims{1, src_group_size}, act_scale_data_type); + + if (prim->activation_zero_point.is_valid()) + attr->set_zero_points(DNNL_ARG_SRC, GROUPED, dnnl::memory::dims{1, src_group_size}, dnnl::memory::data_type::u8); } + auto prim_desc = get_matmul_primitive_descriptor(impl_params, impl_params.prog->get_engine(), prim->input_size, !prim->bias.empty(), *attr); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp index 17498831a542d1..62129866927ea4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.hpp @@ -48,7 +48,7 @@ struct FullyConnectedImplementationManager : public ImplementationManager { one_of(wei_dt, {data_types::i8, data_types::u8}) && one_of(out_dt, {data_types::f16, data_types::f32, data_types::i32, data_types::i8, data_types::u8}); bool compressed_case = fc_prim->compressed_weights && - one_of(in0_dt, {data_types::f16, data_types::f32, data_types::i8}) && + one_of(in0_dt, {data_types::f16, data_types::f32, data_types::i8, data_types::u8}) && one_of(wei_dt, {data_types::u8, data_types::i8, data_types::u4, data_types::i4}) && one_of(out_dt, {data_types::f16, data_types::f32, data_types::u8, data_types::i8}); if (!f16f16_case && !f32f32_case && !u8s8_case && !compressed_case) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl index 6db1790844e501..22c620d712770c 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_opt.cl @@ -4,77 +4,180 @@ #include "include/batch_headers/fetch_data.cl" -#if OUTPUT_DIMS != 4 +#if OUTPUT_DIMS != 4 && OUTPUT_DIMS != 2 #error "dynamic_quantize_gpu_opt.cl: Unsupported output dimension" #endif #define VLOAD_N CAT(vload, VEC_SIZE) #define VSTORE_N CAT(vstore, VEC_SIZE) +#define CONVERT_UCHAR_N CAT(convert_uchar, VEC_SIZE) #define CONVERT_CHAR_N CAT(convert_char, VEC_SIZE) #define AS_TYPE_N_(type, n, x) as_##type##n(x) #define AS_TYPE_N(type, n, x) AS_TYPE_N_(type, n, x) #define AS_INPUT_TYPE_N(x) AS_TYPE_N(INPUT0_TYPE, VEC_SIZE, x) +#if QUANTIZE_GROUP_SIZE <= 128 + +#if ASYMMETRIC_QUANTIZATION +#error "UNIMPLMENTED: asymmetric quantization when group size is small" +#endif + +KERNEL(dynamic_quantize_gpu_opt)( + OPTIONAL_SHAPE_INFO_ARG + const __global INPUT0_TYPE* input, + __global OUTPUT_TYPE* output, + __global OUTPUT1_TYPE* output_scale + ) { + +#if OUTPUT_DIMS == 2 + const uint b = get_global_id(0); + const uint f_grp = get_global_id(1); + const uint input_offset = INPUT0_GET_INDEX(b, f_grp * QUANTIZE_GROUP_SIZE, 0, 0); + const uint output_offset = OUTPUT_GET_INDEX(b, f_grp * QUANTIZE_GROUP_SIZE, 0, 0); +#else + const uint bf = get_global_id(0); + const uint b = bf / INPUT0_FEATURE_NUM; + const uint f = bf % INPUT0_FEATURE_NUM; + const uint y_grp = get_global_id(1); + const uint input_offset = INPUT0_GET_INDEX(b, f, y_grp * QUANTIZE_GROUP_SIZE, 0); + const uint output_offset = OUTPUT_GET_INDEX(b, f, y_grp * QUANTIZE_GROUP_SIZE, 0); + +#endif + const uint quantize_block = QUANTIZE_GROUP_SIZE / 4; + half4 input_0[quantize_block]; + char4 quantized_value[quantize_block]; + half max[quantize_block]; + + unroll_for (uint i = 0 ; i < quantize_block; ++i) { + input_0[i] = vload4(0, &input[input_offset + i * 4]); + max[i] = fmax(fmax(fabs(input_0[i][0]), fabs(input_0[i][1])), fmax(fabs(input_0[i][2]), fabs(input_0[i][3]))); + } + + half max_value = fmax(0.001h, max[0]); + for (uint i = 1; i < quantize_block; i++) { + max_value = fmax(max_value, max[i]); + } + + half quan_scale = 128.0h / max_value; + + unroll_for (uint i = 0 ; i < quantize_block; ++i) { + quantized_value[i] = convert_char4(input_0[i] * (half4)quan_scale); + vstore4(quantized_value[i], 0, &output[output_offset + i * 4]); + } + +#if OUTPUT_DIMS == 2 + output_scale[OUTPUT1_GET_INDEX(b, f_grp, 0, 0)] = 1.0h / quan_scale; +#else + output_scale[OUTPUT1_GET_INDEX(b, f, y_grp, 0)] = 1.0h / quan_scale; +#endif +} + +#else // !(QUANTIZE_GROUP_SIZE <= 128) + REQD_SUB_GROUP_SIZE(SIMD) KERNEL(dynamic_quantize_gpu_opt)( OPTIONAL_SHAPE_INFO_ARG const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output, - __global OUTPUT1_TYPE* output_scale) + __global OUTPUT1_TYPE* output_scale +#if ASYMMETRIC_QUANTIZATION + , __global OUTPUT2_TYPE* output_zp +#endif + ) { const uint bf = (uint)get_global_id(2); const uint sglid = get_sub_group_local_id(); const uint local_id = (uint)get_local_id(1); const uint block_size = SIMD * VEC_SIZE; +#if OUTPUT_DIMS == 2 + const uint b_offset = bf * INPUT0_BATCH_PITCH; +#else const uint b_offset = bf * INPUT0_FEATURE_PITCH; - +#endif const uint offset = b_offset + VEC_SIZE * sglid; const uint iteration = ALIGNED_BLOCK_NUM / BLOCK_NUM; - __local half local_mem[BLOCK_NUM]; + __local half local_mem_max[BLOCK_NUM]; + __local half local_mem_min[BLOCK_NUM]; MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE) val[iteration]; MAKE_VECTOR_TYPE(INPUT0_TYPE, VEC_SIZE) abs_val; - half max = 0.0h; half grp_max = 0.001h; - half max_value; + half grp_min = 0.001h; + half max_value = 0.0h; + half min_value = 0.0h; unroll_for(int i = 0; i < iteration; ++i) { if ((local_id * iteration + i) >= TOTAL_BLOCK_NUM) continue; val[i] = AS_INPUT_TYPE_N(VLOAD_N(0, input + offset + ((local_id * iteration + i) * block_size))); - abs_val = fabs(val[i]); - +#if ASYMMETRIC_QUANTIZATION unroll_for (int j = 0; j < VEC_SIZE; j++) { - max = fmax(max, abs_val[j]); + max_value = fmax(max_value, val[i][j]); + min_value = fmin(min_value, val[i][j]); } + grp_max = fmax(grp_max, max_value); + grp_min = fmin(grp_min, min_value); +#else + abs_val = fabs(val[i]); + + unroll_for (int j = 0; j < VEC_SIZE; j++) + max_value = fmax(max_value, abs_val[j]); - grp_max = fmax(grp_max, max); + grp_max = fmax(grp_max, max_value); +#endif } max_value = sub_group_reduce_max(grp_max); - if (sglid == 0) - local_mem[local_id] = max_value; +#if ASYMMETRIC_QUANTIZATION + min_value = sub_group_reduce_min(grp_min); +#endif + + if (sglid == 0) { + local_mem_max[local_id] = max_value; +#if ASYMMETRIC_QUANTIZATION + local_mem_min[local_id] = min_value; +#endif + } barrier(CLK_LOCAL_MEM_FENCE); for (int j = 0; j < BLOCK_NUM; j++) { - max_value = fmax(max_value, local_mem[j]); + max_value = fmax(max_value, local_mem_max[j]); +#if ASYMMETRIC_QUANTIZATION + min_value = fmin(min_value, local_mem_min[j]); +#endif } - half scale = 127.0h / max_value; +#if ASYMMETRIC_QUANTIZATION + OUTPUT1_TYPE scale = (OUTPUT1_TYPE)((CHAR_MAX - CHAR_MIN) / (max_value - min_value)); + OUTPUT2_TYPE zp = (OUTPUT2_TYPE)(-min_value * scale); +#else + OUTPUT1_TYPE scale = 127.0h / max_value; +#endif + unroll_for(int i = 0; i < iteration; ++i) { if ((local_id * iteration + i) >= TOTAL_BLOCK_NUM) continue; val[i] *= scale; +#if ASYMMETRIC_QUANTIZATION + val[i] += zp; + VSTORE_N(CAT(CONVERT_UCHAR_N, _rte)(val[i]), 0, output + offset + ((local_id * iteration + i) * block_size)); +#else VSTORE_N(CAT(CONVERT_CHAR_N, _rte)(val[i]), 0, output + offset + ((local_id * iteration + i) * block_size)); +#endif } - if (sglid == 0 && local_id == 0) + if (sglid == 0 && local_id == 0) { output_scale[bf] = 1.0h / scale; +#if ASYMMETRIC_QUANTIZATION + output_zp[bf] = convert_uchar_rte(zp); +#endif + } } +#endif // QUANTIZE_GROUP_SIZE <= 128 diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl index 62482b8b9b5047..4acf87eb37ceb0 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/dynamic_quantize_gpu_ref.cl @@ -4,6 +4,16 @@ #include "include/batch_headers/fetch_data.cl" +#define UINT64_MAX 0xFFFFFFFFFFFFFFFF + +#if ASYMMETRIC_QUANTIZATION && UNSIGNED_OUTPUT + #define TO_OUTPUT_TYPE_RTE(val) convert_uchar_rte(val) + #define TO_OUTPUT_VEC_TYPE_RTE(val) convert_uchar8_rte(val) +#else + #define TO_OUTPUT_TYPE_RTE(val) convert_char_rte(val) + #define TO_OUTPUT_VEC_TYPE_RTE(val) convert_char8_rte(val) +#endif + #if OUTPUT_DIMS != 4 #error "dynamic_quantize_gpu_ref.cl: Unsupported output dimension" #endif @@ -33,19 +43,21 @@ KERNEL(dynamic_quantize_gpu_ref)( const uint bf = (uint)get_global_id(0); const uint b = bf / INPUT0_FEATURE_NUM; const uint f = bf % INPUT0_FEATURE_NUM; - const uint y = (uint)get_global_id(1); + const uint out_y = (uint)get_global_id(1); + const uint y = out_y * GROUP_SIZE_DIM2; // quantization may be grouped for y axis const uint x = (uint)get_global_id(2); #ifdef SCALES_OUTPUT_ORDER - const uint scale_idx = FUNC_CALL(get_scales_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, y, x); + const uint scale_idx = FUNC_CALL(get_scales_offset)(OPTIONAL_SHAPE_INFO_TENSOR b, f, out_y, x); #else - const uint scale_idx = OUTPUT1_GET_INDEX_SAFE(b, f, y, x); + const uint scale_idx = OUTPUT1_GET_INDEX_SAFE(b, f, out_y, x); #endif half max_val = INPUT0_VAL_MIN; half min_val = INPUT0_VAL_MAX; for (int b_off = 0; b_off < (GROUP_SIZE_DIM0 == 1 ? 1 : INPUT0_BATCH_NUM); b_off++) { for (int f_off = 0; f_off < (GROUP_SIZE_DIM1 == 1 ? 1 : INPUT0_FEATURE_NUM); f_off++) { - for (int y_off = 0; y_off < (GROUP_SIZE_DIM2 == 1 ? 1 : INPUT0_SIZE_Y); y_off++) { + for (int y_off = 0; y_off < (GROUP_SIZE_DIM2 == UINT64_MAX ? INPUT0_SIZE_Y : GROUP_SIZE_DIM2); y_off++) { + // It is assumed that grouped quantization happens only for 3d input case where we don't have x axis #if GROUP_SIZE_DIM3 == 1 const uint offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, x); half val = input[offset]; @@ -88,53 +100,49 @@ KERNEL(dynamic_quantize_gpu_ref)( #if ASYMMETRIC_QUANTIZATION OUTPUT1_TYPE scale = (OUTPUT1_TYPE)((CHAR_MAX - CHAR_MIN) / (max_val - min_val)); +# if UNSIGNED_OUTPUT + OUTPUT1_TYPE zp = (OUTPUT1_TYPE)(-min_val * scale); +# else // !UNSIGNED_OUTPUT OUTPUT1_TYPE zp = (OUTPUT1_TYPE)(-min_val * scale) - CHAR_MAX; -#else +# endif +#else // !ASYMMETRIC_QUANTIZATION max_val = work_group_reduce_max(max_val); OUTPUT1_TYPE scale = 127.0h / max_val; #endif for (int b_off = 0; b_off < (GROUP_SIZE_DIM0 == 1 ? 1 : INPUT0_BATCH_NUM); b_off++) { for (int f_off = 0; f_off < (GROUP_SIZE_DIM1 == 1 ? 1 : INPUT0_FEATURE_NUM); f_off++) { - for (int y_off = 0; y_off < (GROUP_SIZE_DIM2 == 1 ? 1 : INPUT0_SIZE_Y); y_off++) { + for (int y_off = 0; y_off < (GROUP_SIZE_DIM2 == UINT64_MAX ? INPUT0_SIZE_Y : GROUP_SIZE_DIM2); y_off++) { #if GROUP_SIZE_DIM3 == 1 const uint in_offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, x); const uint out_offset = OUTPUT_GET_INDEX(b + b_off, f + f_off, y + y_off, x); half val = input[in_offset]; -#if ASYMMETRIC_QUANTIZATION val *= scale; +#if ASYMMETRIC_QUANTIZATION val += zp; - output[out_offset] = convert_char_rte(val); -#else - val *= scale; - output[out_offset] = convert_char_rte(val); #endif + output[out_offset] = TO_OUTPUT_TYPE_RTE(val); #else const uint in_offset = INPUT0_GET_INDEX(b + b_off, f + f_off, y + y_off, 0); const uint out_offset = OUTPUT_GET_INDEX(b + b_off, f + f_off, y + y_off, 0); int x; for (x = 0; x < INPUT0_SIZE_X / 8; x++) { half8 val = as_half8(vload8(0, (ushort*)input + in_offset + x * 8)); -#if ASYMMETRIC_QUANTIZATION val *= scale; +#if ASYMMETRIC_QUANTIZATION val += zp; -#else - val *= scale; #endif - vstore8(convert_char8_rte(val), 0, output + out_offset + x * 8); + vstore8(TO_OUTPUT_VEC_TYPE_RTE(val), 0, output + out_offset + x * 8); } x *= 8; for (; x < INPUT0_SIZE_X; x++) { half val = input[in_offset + x]; -#if ASYMMETRIC_QUANTIZATION val *= scale; +#if ASYMMETRIC_QUANTIZATION val += zp; - output[out_offset + x] = convert_char_rte(val); -#else - val *= scale; - output[out_offset + x] = convert_char_rte(val); #endif + output[out_offset + x] = TO_OUTPUT_TYPE_RTE(val); } #endif } @@ -145,6 +153,6 @@ KERNEL(dynamic_quantize_gpu_ref)( #if ASYMMETRIC_QUANTIZATION && GROUP_SCALES_WITH_ZP output_scale[scale_idx + 1] = zp; #elif ASYMMETRIC_QUANTIZATION - output_zp[scale_idx] = zp; + output_zp[scale_idx] = convert_uchar_rte(zp); #endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp index 52a648679499f2..b4f667475f26f1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_opt.cpp @@ -30,9 +30,11 @@ static std::pair get_input_bf_size(const dynamic_quantize_params static size_t get_match_vector_size(const dynamic_quantize_params& params) { auto block_sizes = { 8, 4, 2 }; + auto bf = get_input_bf_size(params); + auto f = bf.second; for (auto block_size : block_sizes) { - if (((params.inputs[0].X().v * params.inputs[0].Y().v) / simd) % block_size == 0) { + if ((f / simd) % block_size == 0) { return block_size; } } @@ -43,10 +45,13 @@ static size_t get_match_vector_size(const dynamic_quantize_params& params) { ParamsKey DynamicQuantizeKernelOpt::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::INT8); k.EnableDifferentTypes(); - k.EnableAllInputLayout(); - k.EnableAllOutputLayout(); + k.EnableInputLayout(DataLayout::bf); + k.EnableInputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bf); + k.EnableOutputLayout(DataLayout::bfyx); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); @@ -68,6 +73,8 @@ JitConstants DynamicQuantizeKernelOpt::GetJitConstants(const dynamic_quantize_pa jit.AddConstant(MakeJitConstant("TOTAL_BLOCK_NUM", total_block_num)); jit.AddConstant(MakeJitConstant("ALIGNED_BLOCK_NUM", aligned_block_num)); jit.AddConstant(MakeJitConstant("BLOCK_NUM", block_num)); + jit.AddConstant(MakeJitConstant("QUANTIZE_GROUP_SIZE", params.group_sizes.back())); + jit.AddConstant(MakeJitConstant("ASYMMETRIC_QUANTIZATION", params.use_asymmetric_quantization)); jit.Merge(GetTensorFriendlyWorkGroupsJit(params.outputs[0])); return jit; @@ -76,15 +83,20 @@ JitConstants DynamicQuantizeKernelOpt::GetJitConstants(const dynamic_quantize_pa CommonDispatchData DynamicQuantizeKernelOpt::SetDefault(const dynamic_quantize_params& params) const { CommonDispatchData dispatchData; - auto vec_size = get_match_vector_size(params); - auto bf_size = get_input_bf_size(params); - size_t total_block_num = bf_size.second / (simd * vec_size); - size_t batch = get_input_bf_size(params).first; - size_t block_num = (total_block_num > 32) ? 32 : total_block_num; - - dispatchData.gws = {simd, block_num, batch}; - dispatchData.lws = {simd, block_num, 1}; - + if (params.group_sizes.back() <= 128) { + auto bf_size = get_input_bf_size(params); + dispatchData.gws = {bf_size.first, bf_size.second / params.group_sizes.back(), 1}; + dispatchData.lws = {1, 1, 1}; + } else { + auto vec_size = get_match_vector_size(params); + auto bf_size = get_input_bf_size(params); + size_t total_block_num = bf_size.second / (simd * vec_size); + size_t batch = get_input_bf_size(params).first; + size_t block_num = (total_block_num > 32) ? 32 : total_block_num; + + dispatchData.gws = {simd, block_num, batch}; + dispatchData.lws = {simd, block_num, 1}; + } return dispatchData; } @@ -147,8 +159,9 @@ bool DynamicQuantizeKernelOpt::Validate(const Params& params) const { const auto& dq_params = static_cast(params); - // Todo : Add proper exception here - if (((dq_params.inputs[0].X().v * dq_params.inputs[0].Y().v) % (simd * 2)) != 0) + + auto bf = get_input_bf_size(dq_params); + if (((bf.second) % (simd * 2)) != 0) return false; if (dq_params.inputs[0].GetPaddedVal() != 0 || dq_params.outputs[0].GetPaddedVal() != 0) @@ -157,8 +170,10 @@ bool DynamicQuantizeKernelOpt::Validate(const Params& params) const { if (dq_params.append_axis != -1) return false; - if (dq_params.group_sizes.back() != UINT64_MAX) - return false; + for (size_t i = 0; i < dq_params.group_sizes.size() - 1; i++) { + if (dq_params.group_sizes[i] != 1) + return false; + } // Allow only default scales order const auto& scales_output_order = dq_params.scales_output_order; @@ -168,7 +183,16 @@ bool DynamicQuantizeKernelOpt::Validate(const Params& params) const { return false; } + if (dq_params.use_asymmetric_quantization) { + if (dq_params.combine_scales_and_zp) + return false; + if (dq_params.outputs[0].GetDType() != Datatype::UINT8) + return false; + } + return true; } + + } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp index bd3d0f87cdc931..f432fa6ac5756d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/dynamic_quantize/dynamic_quantize_kernel_ref.cpp @@ -11,6 +11,7 @@ ParamsKey DynamicQuantizeKernelRef::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::bfyx); k.EnableTensorOffset(); @@ -53,6 +54,7 @@ JitConstants DynamicQuantizeKernelRef::GetJitConstants(const dynamic_quantize_pa jit.AddConstant(MakeJitConstant("ASYMMETRIC_QUANTIZATION", params.use_asymmetric_quantization)); jit.AddConstant(MakeJitConstant("GROUP_SCALES_WITH_ZP", params.combine_scales_and_zp)); + jit.AddConstant(MakeJitConstant("UNSIGNED_OUTPUT", params.outputs[0].GetDType() == Datatype::UINT8 ? 1 : 0)); auto group_sizes = params.group_sizes; group_sizes.resize(std::min((size_t)4, group_sizes.size()), 1); @@ -71,12 +73,26 @@ CommonDispatchData DynamicQuantizeKernelRef::SetDefault(const dynamic_quantize_p OPENVINO_ASSERT(params.outputs[0].GetLayout() == DataLayout::bfyx, "It supports only 4d tensor"); auto group_sizes = params.group_sizes; - group_sizes.resize(std::min((size_t)4, group_sizes.size()), 1); + group_sizes.resize(std::max((size_t)4, group_sizes.size()), 1); auto batch_size = group_sizes[0] == 1 ? params.outputs[0].Batch().v : 1; auto feature_size = group_sizes[1] == 1 ? params.outputs[0].Feature().v : 1; auto y_size = group_sizes[2] == 1 ? params.outputs[0].Y().v : 1; auto x_size = group_sizes[3] == 1 ? params.outputs[0].X().v : 1; + OPENVINO_ASSERT( + (group_sizes[0] == 1 || group_sizes[0] == params.outputs[0].Batch().v || group_sizes[0] == UINT64_MAX) && + (group_sizes[1] == 1 || group_sizes[1] == params.outputs[0].Feature().v || group_sizes[1] == UINT64_MAX) && + (group_sizes[2] == 1 || group_sizes[2] == params.outputs[0].Y().v || group_sizes[2] == UINT64_MAX + || (params.outputs[0].Y().v % group_sizes[2] == 0 && params.outputs[0].X().v == 1)) && // Grouped quantization is only supported for 3d case + (group_sizes[3] == 1 || group_sizes[3] == params.outputs[0].X().v || group_sizes[3] == UINT64_MAX), + "[GPU] Unsupported dynamic quantization configuration: (", + group_sizes[0], ",", group_sizes[1], ",", group_sizes[2], ",", group_sizes[3], ") - (", + params.outputs[0].Batch().v, ",", params.outputs[0].Feature().v, ",", params.outputs[0].Y().v, ",", params.outputs[0].X().v, ")"); + + // Grouped quantization is supported only over y axis + if (params.group_sizes[2] > 1 && params.group_sizes[2] != UINT64_MAX) + y_size = params.outputs[0].Y().v / params.group_sizes[2]; + dispatchData.gws = {batch_size * feature_size, y_size, x_size}; dispatchData.lws = {1, 1, 1}; diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 46e8f7f1104f0d..68da7aea7b1fe6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -124,16 +124,16 @@ static bool should_dynamic_quantize(const fully_connected_params& params, bool p if ((scale_group_size % simd == 0) && (input_f % dynamic_quantization_group_size == 0) && (params.is_shape_agnostic || (params.inputs[0].Batch().v > 1 && input_b > min_slm_size)) && params.inputs[0].GetDType() == Datatype::F16 && is_weight_dyn_quantizable(params)) { - if (print_log) { - GPU_DEBUG_TRACE_DETAIL << " Dynamic quantizing for FC : scale_group_size: " << scale_group_size << - ", Dyn-quan group size: " << dynamic_quantization_group_size << - ", Type(I:" << kernel_selector::toString(params.inputs[0].GetDType()) << - ", O:" << kernel_selector::toString(params.outputs[0].GetDType()) << - ", W:" << kernel_selector::toString(params.weights.GetDType()) << - "), Format(W:" << kernel_selector::toString(params.weights.GetLayout()) << - ") B: " << params.inputs[0].Batch().v << ", F: " << params.inputs[0].Feature().v << - ", Y: " << params.inputs[0].Y().v << std ::endl; - } + if (print_log) { + GPU_DEBUG_TRACE_DETAIL << " Dynamic quantizing for FC : scale_group_size: " << scale_group_size << + ", Dyn-quan group size: " << dynamic_quantization_group_size << + ", Type(I:" << kernel_selector::toString(params.inputs[0].GetDType()) << + ", O:" << kernel_selector::toString(params.outputs[0].GetDType()) << + ", W:" << kernel_selector::toString(params.weights.GetDType()) << + "), Format(W:" << kernel_selector::toString(params.weights.GetLayout()) << + ") B: " << params.inputs[0].Batch().v << ", F: " << params.inputs[0].Feature().v << + ", Y: " << params.inputs[0].Y().v << std ::endl; + } return true; } diff --git a/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp b/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp index 85f28cbd711678..4c11bdb21971e9 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/dynamic_quantize.cpp @@ -18,7 +18,8 @@ static void CreateDynamicQuantizeOp(ProgramBuilder& p, const std::shared_ptrget_attrs()); + op->get_attrs(), + op->get_input_partial_shape(0).size()); prim.num_outputs = op->get_output_size(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp index 7b0aa921ef3ad5..5f4fe19c5c4c08 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/fully_connected.cpp @@ -26,7 +26,7 @@ namespace ov { namespace intel_gpu { static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::shared_ptr& op) { - validate_inputs_count(op, {4, 5, 6}); + validate_inputs_count(op, {4, 5, 6, 7}); auto inputs = p.GetInputInfo(op); std::string primitive_name = layer_type_name_ID(op); auto supports_immad = p.get_engine().get_device_info().supports_immad; @@ -39,6 +39,7 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share const size_t W_ZP_IDX = input_idx; std::string zp_name = op->get_input_size() > input_idx ? inputs[input_idx++].pid : ""; auto activation_scale_input = op->get_input_size() > input_idx ? inputs[input_idx++] : cldnn::input_info(); + auto activation_zero_point_input = op->get_input_size() > input_idx ? inputs[input_idx++] : cldnn::input_info(); float zp_value = 0.0f; bool has_scalar_zp = false; @@ -58,6 +59,7 @@ static void CreateFullyConnectedCompressedOp(ProgramBuilder& p, const std::share scale_name, has_scalar_zp && !supports_immad ? "" : zp_name, activation_scale_input, + activation_zero_point_input, cldnn::element_type_to_data_type(op->get_output_element_type(0)), op->get_input_partial_shape(0).size(), op->get_input_partial_shape(1).size()); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index b623c86fabe02c..368e25abe2ddac 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -10,6 +10,7 @@ #include "openvino/op/lstm_sequence.hpp" #include "openvino/op/loop.hpp" #include "openvino/op/search_sorted.hpp" +#include "ov_ops/dynamic_quantize.hpp" #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" @@ -357,6 +358,9 @@ bool ProgramBuilder::requires_new_shape_infer(const std::shared_ptr& o if (ov::is_type(op)) return true; + if (ov::is_type(op)) + return true; + if (ov::is_type(op)) { const auto body_function = std::static_pointer_cast(op)->get_function(); if (body_function->is_dynamic()) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp index c36212713ae717..61dc40e2713800 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/dynamic_quantize_fully_connected.cpp @@ -21,24 +21,11 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size : ov::pass::MatcherPass() { GPU_DEBUG_GET_INSTANCE(debug_config); using namespace ov::pass::pattern; - - // per-token quantization is supported - if (group_size != UINT64_MAX) { - GPU_DEBUG_TRACE << "Dynamic quantization is disabled " << group_size << std::endl; - return; - } - auto is_dynamic = [](const ov::Output& output) -> bool { - bool is_dynamic = output.get_node_shared_ptr()->get_output_partial_shape(0).is_dynamic(); - size_t num_inputs = output.get_node_shared_ptr()->get_input_size(); - for (size_t idx = 0; idx < num_inputs; idx++) { - is_dynamic |= output.get_node_shared_ptr()->get_input_partial_shape(idx).is_dynamic(); - } - return is_dynamic; - }; + using QuantizationType = ov::op::internal::DynamicQuantize::QuantizationType; auto data = any_input(); - auto fully_connected_compressed3 = wrap_type({data, any_input(), any_input(), any_input()}, is_dynamic); - auto fully_connected_compressed4 = wrap_type({data, any_input(), any_input(), any_input(), any_input()}, is_dynamic); + auto fully_connected_compressed3 = wrap_type({data, any_input(), any_input(), any_input()}); + auto fully_connected_compressed4 = wrap_type({data, any_input(), any_input(), any_input(), any_input()}); auto fully_connected_compressed = std::make_shared(OutputVector{fully_connected_compressed3, fully_connected_compressed4}); @@ -65,12 +52,20 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size ov::op::internal::DynamicQuantize::Attributes config; config.quantization_dt = element::i8; - config.quantization_type = ov::op::internal::DynamicQuantize::QuantizationType::Symmetric; + config.quantization_type = QuantizationType::Symmetric; config.scale_dt = element::f16; config.group_sizes = shape_group_size; + if (debug_config->dynamic_quantize_asym) { + config.quantization_type = QuantizationType::Asymmetric; + config.quantization_dt = element::u8; + config.zp_dt = element::u8; // it supports u8 only now + } + auto dyn_quan = std::make_shared(m_data, config); auto optional_w_zp = m_fc->get_input_size() > 4 ? m_fc->get_input_node_shared_ptr(4) : std::make_shared(); + auto optional_a_zp = config.quantization_type == QuantizationType::Symmetric ? + std::make_shared() : dyn_quan->output(2); auto output_type = m_fc->get_output_type(); if (output_type == ov::element::undefined) @@ -82,6 +77,7 @@ DynamicQuantizeFullyConnected::DynamicQuantizeFullyConnected(uint64_t group_size m_fc->get_input_node_shared_ptr(3), optional_w_zp, dyn_quan->output(1), + optional_a_zp, output_type); ov::replace_node(m_fc, new_fc); diff --git a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp index 2e3819d7e850ee..dd5c555b1e6bc8 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/op/fully_connected_compressed.cpp @@ -14,11 +14,13 @@ FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& A, const ov::Output& w_decompression_scale, const ov::Output& w_decompression_zero_point, const ov::Output& a_decompression_scale, + const ov::Output& a_decompression_zero_point, const ov::element::Type output_type) : FullyConnected(A, B, bias, output_type) { set_argument(3, w_decompression_scale); set_argument(4, w_decompression_zero_point); set_argument(5, a_decompression_scale); + set_argument(6, a_decompression_zero_point); validate_and_infer_types(); } @@ -60,12 +62,13 @@ std::shared_ptr FullyConnectedCompressed::clone_with_new_inputs(const new_args.at(3), new_args.at(4), m_output_type); - else if (new_args.size() == 6) + else if (new_args.size() == 7) return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), new_args.at(4), + new_args.at(5), new_args.at(6), m_output_type); else diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index e47ccbb09a9c43..50eecf51b945b7 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -975,18 +975,34 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // This Validate is needed for proper data type propagation after applying IncreasePositionIdsPrecision pass manager.register_pass(); - auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); if (device_info.supports_immad) { + auto dynamic_quantization_group_size = config.get_property(ov::hint::dynamic_quantization_group_size); pass_config->set_callback([=](const_node_ptr& root) -> bool { if (root->get_input_node_shared_ptr(0)->get_element_type() == ov::element::Type_t::f32) { - GPU_DEBUG_TRACE << root->get_friendly_name() << " Dynamic quantization is turned off because input type is not supported" << std::endl; + GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: input type is not supported" << std::endl; return true; } auto weight_shape = root->get_input_partial_shape(1); const size_t innermost_size = weight_shape[weight_shape.size() - 1].get_length(); if (innermost_size < 32) { - GPU_DEBUG_TRACE << "Dynamic quantization: shape is too small " << innermost_size << " / " << dynamic_quantization_group_size << std::endl; + GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: shape is too small - " << innermost_size << std::endl; + return true; + } + + // AZP does not support 8bit weight + if (debug_config->dynamic_quantize_asym + && (root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8)) { + GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off: asym quantization does not support 8bit weight" << std::endl; + return true; + } + + bool has_wzp = root->get_input_size() > 4; + if ((root->get_input_element_type(1) == ov::element::i8 || root->get_input_element_type(1) == ov::element::u8) + && has_wzp + && dynamic_quantization_group_size != UINT64_MAX) { + GPU_DEBUG_TRACE << root->get_friendly_name() << " dyn_quan is turned off:" + " asym 8bit weight does not support grouped quantization" << std::endl; return true; } return false; diff --git a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp index 65ca31f16c720c..380480dccc68bf 100644 --- a/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp +++ b/src/plugins/intel_gpu/src/runtime/debug_configuration.cpp @@ -190,6 +190,7 @@ static void print_help_messages() { "separated by space. Support case-insensitive and regular expression. For example .*fully_connected.*"); message_list.emplace_back("OV_GPU_DynamicQuantizeGroupSize", "Specify a group size of dynamic quantization to enable " "dynamic quantization for Fully-connected primitive."); + message_list.emplace_back("OV_GPU_DynamicQuantizeAsym", "Enable asymmetric dynamic quantization when set as 1."); message_list.emplace_back("OV_GPU_DisableHorizontalFCFusion", "Disable horizontal fc fusion"); message_list.emplace_back("OV_GPU_DisableFCSwigluFusion", "Disable fc + swiglu fusion"); message_list.emplace_back("OV_GPU_DumpIteration", "Dump n-th execution of network, separated by space."); @@ -260,6 +261,7 @@ debug_configuration::debug_configuration() , use_usm_host(0) , use_kv_cache_compression(-1) , dynamic_quantize_group_size(DYNAMIC_QUANTIZE_GROUP_SIZE_NOT_SET) + , dynamic_quantize_asym(0) , disable_horizontal_fc_fusion(0) , disable_fc_swiglu_fusion(0) { #ifdef GPU_DEBUG_CONFIG @@ -315,6 +317,7 @@ debug_configuration::debug_configuration() get_gpu_debug_env_var("UseUsmHost", use_usm_host); get_gpu_debug_env_var("KVCacheCompression", use_kv_cache_compression); get_gpu_debug_env_var("DynamicQuantizeGroupSize", dynamic_quantize_group_size); + get_gpu_debug_env_var("DynamicQuantizeAsym", dynamic_quantize_asym); get_gpu_debug_env_var("DisableHorizontalFCFusion", disable_horizontal_fc_fusion); get_gpu_debug_env_var("DisableFCSwigluFusion", disable_fc_swiglu_fusion); std::string dump_iteration_str; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 30a9477e1600dd..804ad81f2d3735 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -57,7 +57,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::internal::query_model_ratio, 1.0f), std::make_tuple(ov::cache_mode, ov::CacheMode::OPTIMIZE_SPEED), std::make_tuple(ov::cache_encryption_callbacks, EncryptionCallbacks{}), - std::make_tuple(ov::hint::dynamic_quantization_group_size, 32), + std::make_tuple(ov::hint::dynamic_quantization_group_size, 0), std::make_tuple(ov::hint::kv_cache_precision, ov::element::undefined), std::make_tuple(ov::intel_gpu::hint::enable_kernels_reuse, false), std::make_tuple(ov::weights_path, ""), @@ -254,6 +254,11 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { set_property(ov::hint::kv_cache_precision(ov::element::i8)); } + // Enable dynamic quantization by default for non-systolic platforms + if (!is_set_by_user(ov::hint::dynamic_quantization_group_size) && !info.supports_immad) { + set_property(ov::hint::dynamic_quantization_group_size(32)); + } + user_properties.clear(); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index 27c57aa072878d..b430884decb71a 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -58,7 +58,8 @@ using MatmulWeightsDecompressionParams = std::tuple; class MatmulWeightsDecompression : public testing::WithParamInterface, @@ -74,6 +75,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(dyn_input_ps.size(), 1); - group_sizes.back() = UINT64_MAX; + group_sizes.back() = group_size; - auto input_data = rg.generate_random_1d(ov::shape_size(data_shape), -16.0f, 16.0f); + auto input_data = rg.generate_random_1d(ov::shape_size(data_shape), -16.0f, 20.0f); set_values(input_mem, input_data); auto in_layout_f32 = input_shape.is_dynamic() ? layout{ dyn_input_ps, data_types::f32, format::bfyx } @@ -53,17 +58,15 @@ class dynamic_quantization_gpu_tests: public ::testing::Test { dynamic_quantize::Attributes dq_config; dq_config.quantization_type = quantization_type; - dq_config.quantization_dt = data_types::i8; + dq_config.quantization_dt = quant_dt; dq_config.scale_dt = data_types::f16; - dq_config.zp_dt = data_types::undefined; + dq_config.zp_dt = zp_dt; dq_config.group_sizes = group_sizes; - dq_config.scales_zp_output_order = { 0, 1, 2, 3 }; - dq_config.output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::Planar; + dq_config.scales_zp_output_order = { 0, 1, 2}; - if (quantization_type == QuantizationType::Asymmetric) { - dq_config.zp_dt = data_types::f16; - dq_config.output_storage_type = ov::op::internal::DynamicQuantize::OutputStorageType::InterleavedScalesZP; - } + if (data_shape.size() == 4) + dq_config.scales_zp_output_order.emplace_back(3); + dq_config.output_storage_type = storage_type; auto reorder_1 = reorder("reorder_1", input_info("input"), layout{ input_ps, data_types::f16, format::bfyx }); auto dyn_quan_prim = dynamic_quantize("dyn_quan_prim", input_info("reorder_1"), dq_config); @@ -156,6 +159,19 @@ TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_single_batch) { this->test_dynamic_quantization(false, {-1, 1, 1, 4096}, {1, 1, 1, 4096}); } +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_asym_act) { + this->test_dynamic_quantization(false, {-1, 1, 1, 4096}, {1, 1, 1, 4096}, QuantizationType::Asymmetric, UINT64_MAX, + data_types::u8, data_types::u8, OutputStorageType::Planar); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_small_size_grouped) { + this->test_dynamic_quantization(false, {1, 1, 4096}, {64, 1, 4096}, QuantizationType::Symmetric, 32); +} + +TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_single_batch_grouped) { + this->test_dynamic_quantization(false, {-1, 1, 4096}, {1, 1, 4096}, QuantizationType::Symmetric, 32); +} + TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_ref_only) { this->test_dynamic_quantization(false, {-1, 1, 1, 33}, {16, 1, 1, 33}); } @@ -177,33 +193,36 @@ TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_unaligned_dynamic) { } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache) { - this->test_dynamic_quantization(false, {-1, 8, -1, 96}, {1, 8, 1, 96}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, 8, -1, 96}, {1, 8, 1, 96}, QuantizationType::Symmetric, UINT64_MAX, + data_types::i8, data_types::undefined, OutputStorageType::Planar, "dynamic_quantize_gpu_kv_cache"); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched) { - this->test_dynamic_quantization(false, {-1, 4, -1, 64}, {1, 4, 35, 64}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, 4, -1, 64}, {1, 4, 35, 64}, QuantizationType::Symmetric, UINT64_MAX, + data_types::i8, data_types::undefined, OutputStorageType::Planar, "dynamic_quantize_gpu_kv_cache"); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_reordered) { - this->test_dynamic_quantization(false, {-1, -1, 8, 96}, {1, 1, 8, 96}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, -1, 8, 96}, {1, 1, 8, 96}, QuantizationType::Symmetric, UINT64_MAX, + data_types::i8, data_types::undefined, OutputStorageType::Planar, "dynamic_quantize_gpu_kv_cache"); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched_reordered) { - this->test_dynamic_quantization(false, {-1, -1, 4, 64}, {1, 35, 4, 64}, QuantizationType::Symmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, -1, 4, 64}, {1, 35, 4, 64}, QuantizationType::Symmetric, UINT64_MAX, + data_types::i8, data_types::undefined, OutputStorageType::Planar, "dynamic_quantize_gpu_kv_cache"); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_asym) { - this->test_dynamic_quantization(false, {-1, 8, -1, 96}, {1, 8, 1, 96}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, 8, -1, 96}, {1, 8, 1, 96}, QuantizationType::Asymmetric, UINT64_MAX, + data_types::i8, data_types::f16, OutputStorageType::InterleavedScalesZP, "dynamic_quantize_gpu_kv_cache"); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched_asym) { - this->test_dynamic_quantization(false, {-1, 4, -1, 64}, {1, 4, 35, 64}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, 4, -1, 64}, {1, 4, 35, 64}, QuantizationType::Asymmetric, UINT64_MAX, + data_types::i8, data_types::f16, OutputStorageType::InterleavedScalesZP, "dynamic_quantize_gpu_kv_cache"); } TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_reordered_asym) { - this->test_dynamic_quantization(false, {-1, -1, 8, 96}, {1, 1, 8, 96}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); -} - -TEST_F(dynamic_quantization_gpu_tests, simple_quantizing_kv_cache_batched_reordered_asym) { - this->test_dynamic_quantization(false, {-1, -1, 4, 64}, {1, 35, 4, 64}, QuantizationType::Asymmetric, "dynamic_quantize_gpu_kv_cache"); + this->test_dynamic_quantization(false, {-1, -1, 8, 96}, {1, 1, 8, 96}, QuantizationType::Asymmetric, UINT64_MAX, + data_types::i8, data_types::f16, OutputStorageType::InterleavedScalesZP, "dynamic_quantize_gpu_kv_cache"); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 6bf44a31add0f4..f59dc5c42cffc1 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -1555,7 +1555,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1643,7 +1643,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1669,7 +1669,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1753,7 +1753,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1780,9 +1780,9 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); if (is_dyn_quan) { - config.set_property(ov::hint::dynamic_quantization_group_size(32)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(32)); } else { - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); } network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -1923,7 +1923,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl = { in_layout.format, "", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim1", fc_impl }, { "fc_prim2", fc_impl } })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topology, config); network.set_input_data("input", input_mem); @@ -1952,7 +1952,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test); @@ -2905,7 +2905,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bfyx_ref", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -2931,7 +2931,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); @@ -3031,7 +3031,7 @@ class fully_connected_gpu_tests: public ::testing::Test { config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); ov::intel_gpu::ImplementationDesc fc_impl_desc = { format::bfyx, "fully_connected_gpu_bf_tiled", impl_types::ocl }; config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"fc_prim", fc_impl_desc} })); - config.set_property(ov::hint::dynamic_quantization_group_size(0)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(0)); network network(engine, topo, config); network.set_input_data("input", input_mem); @@ -3057,7 +3057,7 @@ class fully_connected_gpu_tests: public ::testing::Test { auto config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); - config.set_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); + config.set_user_property(ov::hint::dynamic_quantization_group_size(quantize_group_size)); network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), false); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp index fb30222998008b..3384fb1ed514f6 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/hash_key_gpu_test.cpp @@ -71,11 +71,11 @@ class check_hash_value: public ::testing::Test { const auto primitive_hash = primitve->hash(); const auto params_hash = primitve->type->get_fake_aligned_params(*prim_inst->get_impl_params()).hash(); if (!engine.get_device_info().supports_immad) { - ASSERT_EQ(primitive_hash, 8017451717095756666UL); - ASSERT_EQ(params_hash, 8889154389021912103UL); + ASSERT_EQ(primitive_hash, 9510988594087947885UL); + ASSERT_EQ(params_hash, 7833603199176871790UL); } else { - ASSERT_EQ(primitive_hash, 8017451717095756666UL); - ASSERT_EQ(params_hash, 10847775446937354749UL); + ASSERT_EQ(primitive_hash, 9510988594087947885UL); + ASSERT_EQ(params_hash, 16259702189938020305UL); } } From a3f4edb3d8f12769c7ae7d39206730502fae711f Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Mon, 9 Dec 2024 14:47:37 +0900 Subject: [PATCH 20/43] [GPU] Fix crash on swiglu fused case (due to outer_ofm == 1) (#27972) ### Details: - fixed crash happens in minicpm-1b-sft int4 model ### Tickets: - *ticket-id* --- .../fully_connected_kernel_bf_tiled.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 68da7aea7b1fe6..d0f881adcd88b1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -435,10 +435,14 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, return selector.Default(tune_params(1, 1, 4, 4, 1, 1, 1, EXE_MODE_DEFAULT)); } } else if (is_weight_small_kn(params, output_f)) { - if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) - return selector.Default(tune_params(1, 1, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); - else + if (params.weights.GetLayout() == WeightsLayout::os_is_yx_osv32_isv2) { + if (swiglu_fused) + return selector.Default(tune_params(1, 1, 4, 2, 2, 1, 1, EXE_MODE_DEFAULT)); + else + return selector.Default(tune_params(1, 1, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + } else { return selector.Default(tune_params(1, 2, 4, 2, 1, 1, 1, EXE_MODE_DEFAULT)); + } } else { if (params.weights.GetLayout() == WeightsLayout::os_iyx_osv16) { return selector.Default(tune_params(1, 1, 4, 4, 1, 1, 1, EXE_MODE_DEFAULT)); @@ -865,7 +869,9 @@ KernelsData FullyConnected_bf_tiled::GetTunedKernelsDataByIndex(const Params &pa auto output_f = get_output_aligned_bf_size(fc_params, false).second; WeightsLayout weights_layout = WeightsLayout::os_iyx_osv16; - if (!is_swiglu_fused(fc_params) && fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 + if (is_swiglu_fused(fc_params)) { + weights_layout = WeightsLayout::os_is_yx_osv32_isv2; + } else if (fc_params.compressed && fc_params.inputs[0].GetDType() == Datatype::F16 && (fc_params.weights.GetLayout() == WeightsLayout::oiyx || fc_params.weights.GetLayout() == WeightsLayout::os_is_yx_osv64_isv2) && (fc_params.weights.GetDType() == WeightsType::INT4 || fc_params.weights.GetDType() == WeightsType::UINT4) && is_weight_horizontal(fc_params, output_f)) { From 27138a8af6b9cd8e79b394ab5b56b4c61fd7deba Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Mon, 9 Dec 2024 07:40:37 +0100 Subject: [PATCH 21/43] [DOCS] saveModelSync method in Node.js addon (#27960) Porting: #27958 Signed-off-by: sgolebiewski-intel --- docs/sphinx_setup/api/nodejs_api/addon.rst | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/sphinx_setup/api/nodejs_api/addon.rst b/docs/sphinx_setup/api/nodejs_api/addon.rst index f6ee4ab7b15836..7c42824bcd88a3 100644 --- a/docs/sphinx_setup/api/nodejs_api/addon.rst +++ b/docs/sphinx_setup/api/nodejs_api/addon.rst @@ -49,6 +49,7 @@ The **openvino-node** package exports ``addon`` which contains the following pro resizeAlgorithm: typeof resizeAlgorithm; PrePostProcessor: PrePostProcessorConstructor; }; + saveModelSync(model: Model, path: string, compressToFp16?: boolean): void; element: typeof element; } @@ -142,3 +143,39 @@ Properties - **Defined in:** `addon.ts:674 `__ + +.. rubric:: saveModelSync + +* + + .. code-block:: ts + + saveModelSync(model: Model, path: string, compressToFp16?: boolean): void; + + + This method saves a model to IR (xml and bin files), applying all + necessary transformations that are usually added during model conversion. + Particularly, weights are compressed to FP16 by default, and debug information + in model nodes is cleaned up. + + * **Parameters:** + + - model: :doc:`Model ` + + A model which will be converted to IR and saved. + + - path: string + + A path for saving the model. + + - ``Optional`` + + - compressToFp16: boolean + + Compression of weights to FP16 floating point precision. The default value is `true` . + + * **Returns:** void + + * **Defined in:** + `addon.ts:692 `__ + From 15a9b617fcfd591a14daf632cdeecbe99255bd64 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Mon, 9 Dec 2024 12:33:16 +0400 Subject: [PATCH 22/43] [TF FE] Run If tests on all platforms (#27966) **Details:** Run If tests on all platforms **Ticket:** TBD --------- Signed-off-by: Kazantsev, Roman --- .../tensorflow_tests/test_tf_If.py | 44 ++++++++----------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_If.py b/tests/layer_tests/tensorflow_tests/test_tf_If.py index 67686ef53a5750..21dee5aa28616d 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_If.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_If.py @@ -1,13 +1,13 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest +rng = np.random.default_rng(32345) + class TestIfFloat(CommonTFLayerTest): def _prepare_input(self, inputs_info): @@ -18,9 +18,9 @@ def _prepare_input(self, inputs_info): x_shape = inputs_info['x:0'] y_shape = inputs_info['y:0'] inputs_data = {} - inputs_data['cond:0'] = np.random.randint(0, 2, cond_shape).astype(bool) - inputs_data['x:0'] = np.random.randint(1, 10, x_shape).astype(np.float32) - inputs_data['y:0'] = np.random.randint(-50, 50, y_shape).astype(np.float32) + inputs_data['cond:0'] = rng.integers(0, 2, cond_shape).astype(bool) + inputs_data['x:0'] = rng.integers(1, 10, x_shape).astype(np.float32) + inputs_data['y:0'] = rng.integers(-50, 50, y_shape).astype(np.float32) return inputs_data def create_if_net(self, x_shape, y_shape, lower_control_flow): @@ -69,12 +69,10 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', - reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if ie_device == 'GPU': - pytest.xfail('104855') + pytest.xfail('104855: If operation is not supported by GPU') self._test(*self.create_if_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) @@ -89,9 +87,9 @@ def _prepare_input(self, inputs_info): ind_shape = inputs_info['ind:0'] y_shape = inputs_info['y:0'] inputs_data = {} - inputs_data['cond:0'] = np.random.randint(0, 2, cond_shape).astype(bool) - inputs_data['ind:0'] = np.random.randint(1, 10, ind_shape).astype(np.int32) - inputs_data['y:0'] = np.random.randint(-50, 50, y_shape).astype(np.float32) + inputs_data['cond:0'] = rng.integers(0, 2, cond_shape).astype(bool) + inputs_data['ind:0'] = rng.integers(1, 10, ind_shape).astype(np.int32) + inputs_data['y:0'] = rng.integers(-50, 50, y_shape).astype(np.float32) return inputs_data def create_if_net(self, ind_shape, y_shape, lower_control_flow): @@ -141,12 +139,10 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', - reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if ie_device == 'GPU': - pytest.xfail('104855') + pytest.xfail('104855: If operation is not supported by GPU') self._test(*self.create_if_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) @@ -161,9 +157,9 @@ def _prepare_input(self, inputs_info): y_shape = inputs_info['y:0'] z_shape = inputs_info['z:0'] inputs_data = {} - inputs_data['x:0'] = np.random.randint(0, 6, x_shape).astype(np.int32) - inputs_data['y:0'] = np.random.randint(1, 10, y_shape).astype(np.float32) - inputs_data['z:0'] = np.random.randint(-50, 50, z_shape).astype(np.float32) + inputs_data['x:0'] = rng.integers(0, 6, x_shape).astype(np.int32) + inputs_data['y:0'] = rng.integers(1, 10, y_shape).astype(np.float32) + inputs_data['z:0'] = rng.integers(-50, 50, z_shape).astype(np.float32) return inputs_data def create_if_net(self, y_shape, z_shape, lower_control_flow): @@ -221,12 +217,10 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', - reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if ie_device == 'GPU': - pytest.xfail('104855') + pytest.xfail('104855: If operation is not supported by GPU') self._test(*self.create_if_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) @@ -241,9 +235,9 @@ def _prepare_input(self, inputs_info): x_shape = inputs_info['x:0'] y_shape = inputs_info['y:0'] inputs_data = {} - inputs_data['cond:0'] = np.random.randint(0, 2, cond_shape).astype(bool) - inputs_data['x:0'] = np.random.randint(1, 10, x_shape).astype(np.float32) - inputs_data['y:0'] = np.random.randint(-50, 50, y_shape).astype(np.float32) + inputs_data['cond:0'] = rng.integers(0, 2, cond_shape).astype(bool) + inputs_data['x:0'] = rng.integers(1, 10, x_shape).astype(np.float32) + inputs_data['y:0'] = rng.integers(-50, 50, y_shape).astype(np.float32) return inputs_data def create_sequential_ifs_net(self, x_shape, y_shape, lower_control_flow): @@ -313,12 +307,10 @@ def else_branch(): @pytest.mark.parametrize("params", test_data_basic) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', - reason='Ticket - 122716') def test_if_basic(self, params, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if ie_device == 'GPU': - pytest.xfail('104855') + pytest.xfail('104855: If operation is not supported by GPU') self._test(*self.create_sequential_ifs_net(**params), ie_device, precision, ir_version, temp_dir=temp_dir, use_legacy_frontend=use_legacy_frontend) From 408a5e065200b1fcb41200f9361094fa1c7df5d7 Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Mon, 9 Dec 2024 17:49:45 +0900 Subject: [PATCH 23/43] [GPU] update onednn to latest 3.7-pc (#27811) --- src/plugins/intel_gpu/thirdparty/onednn_gpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index 0f269193c74663..36e090a367a431 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit 0f269193c7466313888d3338209d0d06a22cc6fa +Subproject commit 36e090a367a4312a1caa2db9e95fb94d17d7573b From de949b4a2b59faf1bf701528dd37b7ecd076d4e0 Mon Sep 17 00:00:00 2001 From: Yuan Hu Date: Mon, 9 Dec 2024 17:08:40 +0800 Subject: [PATCH 24/43] [CPU] enable brdgmm kernel in CPU plugin (#27589) ### Details: - *replace impl string brdgmm with brgconv* - *add test case* - *remove skip CVS-56143 config, CVS-56143 is already closed* - *remove skip CVS-53578 config, CVS-53578 is already closed* - *use new ticket CVS-157596 to track leftover test case* ### Tickets: - *CVS-156792* --------- Signed-off-by: HU Yuan2 --- src/plugins/intel_cpu/src/nodes/conv.cpp | 13 +- .../intel_cpu/src/onednn/iml_type_mapper.cpp | 3 + .../intel_cpu/src/onednn/iml_type_mapper.h | 3 + .../single_layer_tests/group_convolution.cpp | 126 +++++++++++++++++- .../skip_tests_config.cpp | 10 +- 5 files changed, 140 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 7cf7698e989343..53d53d093cfabf 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -343,6 +343,7 @@ const std::vector& Convolution::getDefaultImplPriority() { impl_desc_type::winograd_acl, impl_desc_type::gemm_acl, impl_desc_type::acl, + impl_desc_type::brgconv_avx512_dw, impl_desc_type::brgconv_avx512_amx_1x1, impl_desc_type::brgconv_avx512_amx, impl_desc_type::jit_avx512_amx_dw, @@ -353,6 +354,7 @@ const std::vector& Convolution::getDefaultImplPriority() { impl_desc_type::jit_avx512_dw, impl_desc_type::jit_avx512_1x1, impl_desc_type::jit_avx512, + impl_desc_type::brgconv_avx2_dw, impl_desc_type::brgconv_avx2_1x1, impl_desc_type::brgconv_avx2, impl_desc_type::jit_uni_dw, @@ -815,7 +817,11 @@ void Convolution::initSupportedPrimitiveDescriptors() { #endif for (size_t dIdx = 0; dIdx < descs.size(); dIdx++) { auto& desc = descs[dIdx]; - auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get())); + auto primitive_desc = desc.get(true); //true mean allow empty + if (primitive_desc == nullptr) { + continue; + } + auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(primitive_desc)); auto add_supported_desc = [&](dnnl::primitive_desc& desc) { addSupportedPrimitiveDescriptor(desc); @@ -823,7 +829,7 @@ void Convolution::initSupportedPrimitiveDescriptors() { }; const bool first_match = customImplPriorities.empty(); - DEBUG_LOG("#", getName(), + DEBUG_LOG("#", getName(), ",descIndex:", dIdx + 1, "/", descs.size(), ", itpd.impl_info_str(): ", desc.impl_info_str(), ", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())), ", first_match: ", first_match ? "true" : "false"); @@ -944,8 +950,7 @@ void Convolution::createDescriptor(const std::vector& inputDesc, const auto desc = createDescriptorInternal(getEngine(), inDnnlDesc, weightDnnlDesc, biasDnnlDesc, outDnnlDesc, withBiases, stride, dilation, paddingL, paddingR, alg, attr); - if (desc) - descs.emplace_back(desc); + descs.emplace_back(desc); } } } diff --git a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp index d7a1e5979ddad9..5c57a94f69f67d 100644 --- a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp +++ b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp @@ -17,6 +17,7 @@ impl_desc_type parse_impl_name(std::string impl_desc_name) { if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); } // Replace the ONEDNN pd name with OV definition. REPLACE_WORD(brg_conv, brgconv); + REPLACE_WORD(brdgmm, brgconv); REPLACE_WORD(avx10_1_512, avx512); REPLACE_WORD(brg_matmul, brgemm); @@ -119,6 +120,8 @@ const char* impl_type_to_string(impl_desc_type type) { CASE(brgconv_sse42_1x1); CASE(brgconv_uni_1x1); CASE(brgconv_avx512_amx_1x1); + CASE(brgconv_avx512_dw); + CASE(brgconv_avx2_dw); CASE(brgemm_avx512); CASE(brgemm_avx2); CASE(brgemm_avx); diff --git a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.h b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.h index 3fd79716c7cd72..45a71bdb88dd33 100644 --- a/src/plugins/intel_cpu/src/onednn/iml_type_mapper.h +++ b/src/plugins/intel_cpu/src/onednn/iml_type_mapper.h @@ -98,6 +98,9 @@ enum impl_desc_type : int64_t { brgconv_uni_1x1 = brgconv | uni | _1x1, brgconv_avx512_amx_1x1 = brgconv | avx512 | amx | _1x1, + brgconv_avx2_dw = brgconv_avx2 | _dw, + brgconv_avx512_dw = brgconv_avx512 | _dw, + brgemm_avx512 = brgemm | avx512, brgemm_avx2 = brgemm | avx2, brgemm_avx = brgemm | avx, diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp index 47d7d3072b7337..f3f5b1f2e07975 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/group_convolution.cpp @@ -5,6 +5,7 @@ #include "shared_test_classes/single_op/group_convolution.hpp" #include "common_test_utils/node_builders/group_convolution.hpp" +#include "openvino/runtime/system_conf.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "utils/convolution_params.hpp" #include "utils/cpu_test_utils.hpp" @@ -176,14 +177,15 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface()) { - selectedType += "_bf16"; - rel_threshold = 1e-2f; - } else { - selectedType = makeSelectedTypeStr(selectedType, netType); + const auto& it = configuration.find(ov::hint::inference_precision.name()); + if (it != configuration.end()) { + if (ov::element::bf16 == it->second.as()) { + rel_threshold = 1e-2f; + } else if (ov::element::f16 == it->second.as()) { + rel_threshold = 0.00125f; + } } + selectedType = makeSelectedTypeStr(selectedType, deduce_expected_precision(netType, configuration)); // according to range propagation feature, resolution of generated inputs data for parameters moved from 32 to 32768 // 'real' part of input data was changed and some fails became visible for cases with Elu and FakeQuantize, so let's setup abs_threshold @@ -289,6 +291,7 @@ std::vector filterCPUInfoForDeviceSupportBF16(std::vector fusingParamsSetBF16{emptyFusingSpec, // sum fusingSum}; +const std::vector fusingParamsSet_Brdgmm{emptyFusingSpec, + // eltwise + fusingRelu, + fusingPRelu1D, + // depthwise + fusingReluScaleShift, + // fake quantize + fusingFakeQuantizePerTensorRelu, + fusingFakeQuantizePerChannelRelu + // sum + // comment out sum due to MFDNN-12841 + //fusingSumEluFQ, + //fusingSum + }; + +const std::vector fusingParamsSetBF16_Brdgmm{emptyFusingSpec, + // eltwise + fusingRelu, + // depthwise + fusingReluScaleShift + // sum + // comment out sum due to MFDNN-12841 + //fusingSum + }; + +const std::vector fusingParamsSetFP16_Brdgmm = fusingParamsSetBF16_Brdgmm; + /* ============= GroupConvolution params (planar layout) ============= */ const std::vector numOutChannels_Gemm = {6}; const std::vector numGroups_Gemm = {2, 3}; @@ -1299,6 +1329,38 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP32, ::testing::Values(empty_plugin_config)), GroupConvolutionLayerCPUTest::getTestCaseName); +const std::vector> dilations2d_Brdgmm = {{1, 1}}; +const auto groupConvParams_ExplicitPadding_DW_2D_Brdgmm = ::testing::Combine(::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d_Brdgmm), + ::testing::ValuesIn(numOutChannels_DW), + ::testing::ValuesIn(numGroups_DW), + ::testing::Values(ov::op::PadType::EXPLICIT)); +const auto BrdgmmCPUSpec = []()-> std::vector { + std::string isaStr; + if (ov::with_cpu_x86_avx512f()) { + isaStr = "avx512"; + } else { + isaStr = "avx2"; + } + return {CPUSpecificParams{{}, {}, {}, "brgconv_" + isaStr + "_dw"}}; +}; + +INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP32_Brdgmm, + GroupConvolutionLayerCPUTest, + ::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm, + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2dDW), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())), + ::testing::ValuesIn(fusingParamsSet_Brdgmm), + ::testing::Values(empty_plugin_config)), + GroupConvolutionLayerCPUTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16, GroupConvolutionLayerCPUTest, ::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D, @@ -1313,6 +1375,32 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16, ::testing::Values(cpu_bf16_plugin_config)), GroupConvolutionLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16_Brdgmm, + GroupConvolutionLayerCPUTest, + ::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm, + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2dDW), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDeviceSupportBF16(BrdgmmCPUSpec())), + ::testing::ValuesIn(fusingParamsSetBF16_Brdgmm), + ::testing::Values(cpu_bf16_plugin_config)), + GroupConvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP16_Brdgmm, + GroupConvolutionLayerCPUTest, + ::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm, + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes2dDW), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())), + ::testing::ValuesIn(fusingParamsSetFP16_Brdgmm), + ::testing::Values(cpu_f16_plugin_config)), + GroupConvolutionLayerCPUTest::getTestCaseName); + /* ============= GroupConvolution (DW 3D) ============= */ const auto groupConvParams_ExplicitPadding_DW_3D = ::testing::Combine(::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), @@ -1349,6 +1437,30 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_DW_FP32, ::testing::ValuesIn(fusingParamsSet), ::testing::Values(empty_plugin_config)), GroupConvolutionLayerCPUTest::getTestCaseName); + +const std::vector> dilations3d_Brdgmm = {{1, 1, 1}}; +const auto groupConvParams_ExplicitPadding_DW_3D_Brdgmm = ::testing::Combine(::testing::ValuesIn(kernels3d), + ::testing::ValuesIn(strides3d), + ::testing::ValuesIn(padBegins3d), + ::testing::ValuesIn(padEnds3d), + ::testing::ValuesIn(dilations3d_Brdgmm), + ::testing::ValuesIn(numOutChannels_DW), + ::testing::ValuesIn(numGroups_DW), + ::testing::Values(ov::op::PadType::EXPLICIT)); + +INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_DW_FP32_Brdgmm, + GroupConvolutionLayerCPUTest, + ::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_3D_Brdgmm, + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes3dDW), + ::testing::Values(ov::test::utils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())), + ::testing::ValuesIn(fusingParamsSet_Brdgmm), + ::testing::Values(empty_plugin_config)), + GroupConvolutionLayerCPUTest::getTestCaseName); + /* ========= */ /* ============= SINGLE TEST CASES ============= */ diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index b675a7c2da7d42..089a03b4d6bba7 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -40,10 +40,12 @@ std::vector disabledTestPatterns() { R"(.*BinaryConvolutionLayerTest.*)", // TODO: 53618. BF16 gemm ncsp convolution crash R"(.*_GroupConv.*_inFmts=nc.*_primitive=jit_gemm.*ENFORCE_BF16=YES.*)", - // TODO: 53578. fork DW bf16 convolution does not support 3d cases yet - R"(.*_DW_GroupConv.*_inFmts=(ndhwc|nCdhw16c).*ENFORCE_BF16=YES.*)", - // TODO: 56143. Enable nspc convolutions for bf16 precision - R"(.*ConvolutionLayerCPUTest.*_inFmts=(ndhwc|nhwc).*INFERENCE_PRECISION_HINT=bf16.*)", + // TODO: 157596 convolution bf16 leftover test case + R"(smoke_JIT_AVX512_DW_GroupConv/GroupConvolutionLayerCPUTest.*ndhwc.*jit_avx512_dw.*INFERENCE_PRECISION_HINT=bf16.*)", + R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\]_TS=\(\((1|2)\.6(4|7)\.7\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)", + R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[1\.\.200\.64\.\?\]_TS=\(\(2\.64\.7\)_\(1\.64\.5\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)", + R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\?\.6(4|7)\.1\.\.200\]_TS=\(\(2\.6(4|7)\.7\)_\(1\.6(4|7)\.9\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)", + R"(smoke_GroupConv_brgemm_2D_BF16/GroupConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\]_TS=\(\(1\.64\.7\.7\)_\)_K\(3\.3\)_S\(2\.2\)_PB\((0|1)\.(0|1)\)_PE\(0\.0\)_D=\(2\.2\)_O=64_G=2_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=brgconv_avx512_amx_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)", // TODO: 56827. Sporadic test failures R"(.*smoke_Conv.+_FP32.ConvolutionLayerCPUTest\.CompareWithRefs.*TS=\(\(.\.67.+\).*inFmts=n.+c.*_primitive=jit_avx2.*)", // incorrect jit_uni_planar_convolution with dilation = {1, 2, 1} and output channel 1 From de776f279c87e542c640acc8140aaf87f278c991 Mon Sep 17 00:00:00 2001 From: Andrei Kashchikhin Date: Mon, 9 Dec 2024 09:27:11 +0000 Subject: [PATCH 25/43] [CI] [GHA] Introduce additional Python (3.9-3.12) API tests on macOS (#27666) ### Details: - Based on #27304, should be reviewed after it. ### Tickets: - *152690* --- .github/workflows/job_python_api_tests.yml | 142 ++++++++++++++++++++ .github/workflows/job_python_unit_tests.yml | 54 ++------ .github/workflows/job_samples_tests.yml | 14 +- .github/workflows/linux_arm64.yml | 10 ++ .github/workflows/mac.yml | 60 ++++++++- .github/workflows/mac_arm64.yml | 57 +++++++- .github/workflows/ubuntu_22.yml | 10 ++ .github/workflows/ubuntu_24.yml | 10 ++ 8 files changed, 304 insertions(+), 53 deletions(-) create mode 100644 .github/workflows/job_python_api_tests.yml diff --git a/.github/workflows/job_python_api_tests.yml b/.github/workflows/job_python_api_tests.yml new file mode 100644 index 00000000000000..541a14e2b1b6df --- /dev/null +++ b/.github/workflows/job_python_api_tests.yml @@ -0,0 +1,142 @@ +name: Python API tests + +on: + workflow_call: + inputs: + runner: + description: 'Machine on which the tests would run' + type: string + required: true + container: + description: 'JSON to be converted to the value of the "container" configuration for the job' + type: string + required: false + default: '{"image": null}' + python-version: + description: 'Python version to setup. E.g., "3.11"' + type: string + required: true + +permissions: read-all + +env: + PIP_CACHE_PATH: /mount/caches/pip/linux + +jobs: + Python_Unit_Tests: + name: Python API tests + timeout-minutes: 30 + runs-on: ${{ inputs.runner }} + container: ${{ fromJSON(inputs.container) }} + defaults: + run: + shell: bash + env: + DEBIAN_FRONTEND: noninteractive # to prevent apt-get from waiting user input + OPENVINO_REPO: ${{ github.workspace }}/openvino + INSTALL_DIR: ${{ github.workspace }}/install + INSTALL_TEST_DIR: ${{ github.workspace }}/install/openvino_tests + INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/openvino_wheels + steps: + - name: Download OpenVINO artifacts (tarballs and wheels) + uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8 + with: + pattern: openvino_@(wheels|tests) + path: ${{ env.INSTALL_DIR }} + + # Needed as ${{ github.workspace }} is not working correctly when using Docker + - name: Setup Variables + run: | + echo "OPENVINO_REPO=$GITHUB_WORKSPACE/openvino" >> "$GITHUB_ENV" + echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" + echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/openvino_tests" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/openvino_wheels" >> "$GITHUB_ENV" + + - name: Install OpenVINO dependencies (mac) + if: runner.os == 'macOS' + run: brew install pigz + + - name: Extract OpenVINO packages + run: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_TEST_DIR} + working-directory: ${{ env.INSTALL_TEST_DIR }} + + - name: Fetch setup_python and install wheels actions + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + sparse-checkout: | + .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml + sparse-checkout-cone-mode: false + path: 'action_root' + + - name: Setup Python ${{ inputs.python-version }} + uses: ./action_root/.github/actions/setup_python + with: + version: ${{ inputs.python-version }} + pip-cache-path: ${{ runner.os == 'Linux' && env.PIP_CACHE_PATH || '' }} + should-setup-pip-paths: ${{ runner.os == 'Linux' }} + self-hosted-runner: ${{ runner.os == 'Linux' }} + + # + # Tests + # + - name: Install OpenVINO Python wheels + uses: ./action_root/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' + + - name: Install Python API tests dependencies + run: python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/bindings/python/requirements_test.txt + + # + # Tests + # + + - name: Python API Tests + run: | + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH + python3 -m pytest -sv ${INSTALL_TEST_DIR}/tests/pyopenvino \ + --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ + --ignore=${INSTALL_TEST_DIR}/tests/pyopenvino/tests/test_utils/test_utils.py + + - name: Python API Tests -- numpy>=2.0.0 + run: | + python3 -m pip uninstall -y numpy + python3 -m pip install "numpy~=2.0.0" + python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/bindings/python/requirements_test.txt + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH + python3 -m pytest -sv ${INSTALL_TEST_DIR}/tests/pyopenvino \ + --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph_new_numpy.xml \ + --ignore=${INSTALL_TEST_DIR}/tests/pyopenvino/tests/test_utils/test_utils.py + + - name: Clone API snippets + if: runner.os != 'macOS' + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + with: + sparse-checkout: docs/articles_en/assets/snippets + path: ${{ env.OPENVINO_REPO }} + submodules: 'false' + + - name: Docs Python snippets + if: runner.os != 'macOS' + run: | + # torch, onnx + python3 -m pip install -r ${INSTALL_TEST_DIR}/tests/python/preprocess/torchvision/requirements.txt -r ${INSTALL_TEST_DIR}/tests/requirements_onnx + # to find 'snippets' module in docs + export PYTHONPATH=${OPENVINO_REPO}/docs/articles_en/assets + # for 'template' extension + export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}/tests/:$LD_LIBRARY_PATH + python3 ${OPENVINO_REPO}/docs/articles_en/assets/snippets/main.py + + - name: Upload Test Results + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0 + if: ${{ !cancelled() }} + with: + name: test-results-python-api-${{ inputs.python-version }} + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.html + ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + if-no-files-found: 'warn' diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index 8075f3299fe063..47506c83bf0945 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -65,21 +65,22 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "LAYER_TESTS_INSTALL_DIR=$GITHUB_WORKSPACE/install/tests/layer_tests" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' run: brew install pigz - name: Extract OpenVINO packages - run: | - pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} + run: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python action + - name: Fetch setup_python and install wheels actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'action_root' @@ -92,11 +93,10 @@ jobs: self-hosted-runner: ${{ runner.os == 'Linux' }} - name: Install OpenVINO Python wheels - run: | - # Install the core OV wheel - python3 -m pip install ./openvino-*.whl - - working-directory: ${{ env.INSTALL_WHEELS_DIR }} + uses: ./action_root/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' - name: Install Python API tests dependencies run: | @@ -121,15 +121,6 @@ jobs: # Tests # - - name: Python API Tests - if: ${{ fromJSON(inputs.affected-components).Python_API.test }} - run: | - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH - python3 -m pytest -sv ${INSTALL_TEST_DIR}/pyopenvino \ - --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ - --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - - name: Python ONNX operators tests if: (fromJSON(inputs.affected-components).Python_API.test || fromJSON(inputs.affected-components).ONNX_FE.test) && @@ -185,35 +176,6 @@ jobs: TEST_DEVICE: CPU TEST_PRECISION: FP16 - - name: Clone API snippets - if: runner.os != 'macOS' - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - sparse-checkout: docs/articles_en/assets/snippets - path: ${{ env.OPENVINO_REPO }} - submodules: 'false' - - - name: Docs Python snippets - if: runner.os != 'macOS' - run: | - # to find 'snippets' module in docs - export PYTHONPATH=${OPENVINO_REPO}/docs/articles_en/assets - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH - python3 ${OPENVINO_REPO}/docs/articles_en/assets/snippets/main.py - - - name: Python API Tests -- numpy>=2.0.0 - if: ${{ fromJSON(inputs.affected-components).Python_API.test }} - run: | - python3 -m pip uninstall -y numpy - python3 -m pip install "numpy>=2.0.0,<2.2.0" - python3 -m pip install -r ${INSTALL_TEST_DIR}/bindings/python/requirements_test.txt - # for 'template' extension - export LD_LIBRARY_PATH=${INSTALL_TEST_DIR}:$LD_LIBRARY_PATH - python3 -m pytest -sv ${INSTALL_TEST_DIR}/pyopenvino \ - --junitxml=${INSTALL_TEST_DIR}/TEST-Pyngraph.xml \ - --ignore=${INSTALL_TEST_DIR}/pyopenvino/tests/test_utils/test_utils.py - - name: Upload Test Results uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 if: ${{ !cancelled() }} diff --git a/.github/workflows/job_samples_tests.yml b/.github/workflows/job_samples_tests.yml index e144aa0cfb95aa..6f95d316abfc3f 100644 --- a/.github/workflows/job_samples_tests.yml +++ b/.github/workflows/job_samples_tests.yml @@ -54,6 +54,7 @@ jobs: echo "INSTALL_DIR=$GITHUB_WORKSPACE/install" >> "$GITHUB_ENV" echo "INSTALL_TEST_DIR=$GITHUB_WORKSPACE/install/tests" >> "$GITHUB_ENV" echo "BUILD_DIR=$GITHUB_WORKSPACE/build" >> "$GITHUB_ENV" + echo "INSTALL_WHEELS_DIR=$GITHUB_WORKSPACE/install/wheels" >> "$GITHUB_ENV" - name: Install OpenVINO dependencies (mac) if: runner.os == 'macOS' @@ -65,13 +66,12 @@ jobs: pigz -dc openvino_tests.tar.gz | tar -xf - -C ${INSTALL_DIR} working-directory: ${{ env.INSTALL_DIR }} - - name: Fetch setup_python action - # Python is already installed on Ubuntu within Dockerfile - if: runner.os != 'Linux' + - name: Fetch setup_python and install wheels actions uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: sparse-checkout: | .github/actions/setup_python/action.yml + .github/actions/install_ov_wheels/action.yml sparse-checkout-cone-mode: false path: 'openvino' @@ -113,6 +113,12 @@ jobs: # Tests # + - name: Install OpenVINO Python wheels + uses: ./openvino/.github/actions/install_ov_wheels + with: + wheels-dir-path: ${{ env.INSTALL_WHEELS_DIR }} + wheels-to-install: 'openvino' + - name: Samples tests if: fromJSON(inputs.affected-components).samples.test run: | @@ -122,7 +128,7 @@ jobs: export SHARE=$INSTALL_TEST_DIR/smoke_tests/samples_smoke_tests_data # Install Python benchmark_app by installing openvino-*.whl - python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt $INSTALL_WHEELS_DIR/openvino-*.whl + python3 -m pip install --ignore-installed PyYAML -r $INSTALL_TEST_DIR/smoke_tests/requirements.txt export LD_LIBRARY_PATH=${IE_APP_PATH}:$LD_LIBRARY_PATH source ${INSTALL_DIR}/setupvars.sh diff --git a/.github/workflows/linux_arm64.yml b/.github/workflows/linux_arm64.yml index 66ce9461f05fe8..e1aaa886d631c7 100644 --- a/.github/workflows/linux_arm64.yml +++ b/.github/workflows/linux_arm64.yml @@ -169,6 +169,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + Python_API_Tests: + name: Python API tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_python_api_tests.yml + with: + runner: 'aks-linux-16-cores-arm' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_20_04_arm64 }}", "volumes": ["/mount:/mount"]}' + python-version: '3.11' + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Build, Docker, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index c587c5ad7323b3..26289e969c4e00 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -151,6 +151,7 @@ jobs: -DENABLE_CPPLINT=OFF \ -DENABLE_NCC_STYLE=OFF \ -DENABLE_TESTS=ON \ + -DENABLE_WHEEL=OFF \ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ -DENABLE_STRICT_DEPENDENCIES=OFF \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ @@ -168,7 +169,6 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_WHEELS_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | @@ -179,6 +179,48 @@ jobs: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz popd + # Setup additional Python versions for wheels building + - name: Setup Python 3.9 + uses: ./openvino/.github/actions/setup_python + with: + version: "3.9" + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Setup Python 3.10 + uses: ./openvino/.github/actions/setup_python + with: + version: "3.10" + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Setup Python 3.12 + uses: ./openvino/.github/actions/setup_python + with: + version: "3.12" + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Build additional Python wheels + run: | + for py_version in "3.9" "3.10" "3.11" "3.12" + do + python_exec_path=$(python$py_version -c "import sys; print(sys.executable)") + $python_exec_path -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt + + cmake -DPython3_EXECUTABLE=$python_exec_path -DENABLE_WHEEL=ON -DOpenVINODeveloperPackage_DIR=${{ env.BUILD_DIR }} -S ${{ env.OPENVINO_REPO }}/src/bindings/python -B ${{ github.workspace }}/py$py_version + cmake --build ${{ github.workspace }}/py$py_version --parallel + cmake --install ${{ github.workspace }}/py$py_version --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels + done + + # Setup Python 3.11 as the default one + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + - name: Cmake & Build - OpenVINO Contrib run: | cmake \ @@ -199,6 +241,7 @@ jobs: cmake --build ${{ env.BUILD_DIR }} --parallel $(nproc) cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR_JS }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake + # # Upload build artifacts # @@ -210,7 +253,7 @@ jobs: name: openvino_package path: ${{ env.BUILD_DIR }}/openvino_package.tar.gz if-no-files-found: 'error' - + - name: Upload openvino wheels uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: @@ -270,6 +313,19 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} os: 'mac_13' + Python_API_Tests: + name: Python API tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_python_api_tests.yml + strategy: + fail-fast: false + matrix: + python-version: [ '3.9', '3.10', '3.11', '3.12' ] + with: + runner: 'macos-13' + python-version: ${{ matrix.python-version }} + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + Python_Unit_Tests: name: Python unit tests needs: [ Build, Smart_CI ] diff --git a/.github/workflows/mac_arm64.yml b/.github/workflows/mac_arm64.yml index 0708a844fe6b8b..d3fb10082adfd4 100644 --- a/.github/workflows/mac_arm64.yml +++ b/.github/workflows/mac_arm64.yml @@ -151,6 +151,7 @@ jobs: -DENABLE_CPPLINT=OFF \ -DENABLE_NCC_STYLE=OFF \ -DENABLE_TESTS=ON \ + -DENABLE_WHEEL=OFF \ -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF \ -DENABLE_STRICT_DEPENDENCIES=OFF \ -DCMAKE_CXX_COMPILER_LAUNCHER=${{ env.CMAKE_CXX_COMPILER_LAUNCHER }} \ @@ -168,7 +169,6 @@ jobs: run: | cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_TEST_DIR }} -DCOMPONENT=tests -P ${{ env.BUILD_DIR }}/cmake_install.cmake - cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_WHEELS_DIR }} -DCOMPONENT=python_wheels -P ${{ env.BUILD_DIR }}/cmake_install.cmake - name: Pack Artifacts run: | @@ -180,6 +180,48 @@ jobs: tar -cvf - * | pigz > ${{ env.BUILD_DIR }}/openvino_tests.tar.gz popd + # Setup additional Python versions for wheels building + - name: Setup Python 3.9 + uses: ./openvino/.github/actions/setup_python + with: + version: "3.9" + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Setup Python 3.10 + uses: ./openvino/.github/actions/setup_python + with: + version: "3.10" + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Setup Python 3.12 + uses: ./openvino/.github/actions/setup_python + with: + version: "3.12" + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + + - name: Build additional Python wheels + run: | + for py_version in "3.9" "3.10" "3.11" "3.12" + do + python_exec_path=$(python$py_version -c "import sys; print(sys.executable)") + $python_exec_path -m pip install -r ${{ env.OPENVINO_REPO }}/src/bindings/python/wheel/requirements-dev.txt + + cmake -DPython3_EXECUTABLE=$python_exec_path -DENABLE_WHEEL=ON -DOpenVINODeveloperPackage_DIR=${{ env.BUILD_DIR }} -S ${{ env.OPENVINO_REPO }}/src/bindings/python -B ${{ github.workspace }}/py$py_version + cmake --build ${{ github.workspace }}/py$py_version --parallel + cmake --install ${{ github.workspace }}/py$py_version --config ${{ env.CMAKE_BUILD_TYPE }} --prefix ${{ env.INSTALL_WHEELS_DIR }} --component python_wheels + done + + # Setup Python 3.11 as the default one + - name: Setup Python ${{ env.PYTHON_VERSION }} + uses: ./openvino/.github/actions/setup_python + with: + version: ${{ env.PYTHON_VERSION }} + should-setup-pip-paths: 'false' + self-hosted-runner: 'false' + - name: Cmake & Build - OpenVINO Contrib run: | cmake \ @@ -279,6 +321,19 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + Python_API_Tests: + name: Python API tests + needs: [ Build, Smart_CI ] + uses: ./.github/workflows/job_python_api_tests.yml + strategy: + fail-fast: false + matrix: + python-version: [ '3.9', '3.10', '3.11', '3.12' ] + with: + runner: 'macos-13-xlarge' + python-version: ${{ matrix.python-version }} + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Build, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/ubuntu_22.yml b/.github/workflows/ubuntu_22.yml index f4caec8b2458a0..4fc93d73213f78 100644 --- a/.github/workflows/ubuntu_22.yml +++ b/.github/workflows/ubuntu_22.yml @@ -300,6 +300,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.11' + Python_API_Tests: + name: Python API tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_python_api_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_22_04_x64 }}", "volumes": ["/mount:/mount"]}' + python-version: '3.11' + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + TensorFlow_Layer_Tests: name: TensorFlow Layer Tests needs: [ Docker, Build, Smart_CI, Openvino_tokenizers ] diff --git a/.github/workflows/ubuntu_24.yml b/.github/workflows/ubuntu_24.yml index d874e06a189232..1ad3951ecd3347 100644 --- a/.github/workflows/ubuntu_24.yml +++ b/.github/workflows/ubuntu_24.yml @@ -134,6 +134,16 @@ jobs: affected-components: ${{ needs.smart_ci.outputs.affected_components }} python-version: '3.12' + Python_API_Tests: + name: Python API tests + needs: [ Docker, Build, Smart_CI ] + uses: ./.github/workflows/job_python_api_tests.yml + with: + runner: 'aks-linux-4-cores-16gb' + container: '{"image": "${{ fromJSON(needs.docker.outputs.images).ov_test.ubuntu_24_04_x64 }}", "volumes": ["/mount:/mount"]}' + python-version: '3.12' + if: fromJSON(needs.smart_ci.outputs.affected_components).Python_API.test + Pytorch_Layer_Tests: name: Pytorch Layer Tests needs: [ Docker, Build, Smart_CI ] From 67f253764c4d0a9b7ab5a8f9706d063e488d7b5b Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Mon, 9 Dec 2024 19:27:32 +0100 Subject: [PATCH 26/43] [GHA][ov-provider] Exclude custom release packages from matching (#27979) To filter out automatically picking unwanted custom release builds like https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.5/windows_vc_mt Test run: https://github.com/openvinotoolkit/openvino_tokenizers/actions/runs/12237578864/job/34133648815?pr=338 (now the regular "windows" package is picked) Signed-off-by: Alina Kladieva --- .github/actions/openvino_provider/get_s3_package.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/actions/openvino_provider/get_s3_package.py b/.github/actions/openvino_provider/get_s3_package.py index df253a422421ec..02ea99cb2f3403 100644 --- a/.github/actions/openvino_provider/get_s3_package.py +++ b/.github/actions/openvino_provider/get_s3_package.py @@ -54,6 +54,10 @@ def main(product, version_pattern, platform, arch, folder): matching_files = filter_files_by_criteria(all_files, product, version_pattern, platform, arch, folder) if matching_files: logger.info(f"Matching packages: {sorted(matching_files)}") + if len(matching_files) > 1: + custom_release_build_pattern = fr".*/{version_pattern}/(linux_|windows_|macos_).*/.*" + # Exclude custom release builds, if any, from matches + matching_files = [file for file in matching_files if not re.search(custom_release_build_pattern, file)] package_url = f"https://storage.openvinotoolkit.org{sorted(matching_files)[-1]}" logger.info(f"Returning package URL: {package_url}") action_utils.set_github_output("package_url", package_url) From f0da7075169b97f6523d8f465cbb6ab76f995324 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Tue, 10 Dec 2024 08:42:26 +0100 Subject: [PATCH 27/43] [tests/requirements_pytorch] Temporarily fix optimum-intel version on last stable commit (#27985) There are failures with newer commits, e.g. https://github.com/openvinotoolkit/openvino/actions/runs/12240792041/job/34146426674 --------- Signed-off-by: Alina Kladieva --- .github/components.yml | 1 + tests/requirements_pytorch | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/components.yml b/.github/components.yml index 8de51a2ced3343..74247e1f051cd5 100644 --- a/.github/components.yml +++ b/.github/components.yml @@ -149,6 +149,7 @@ PyTorch_FE: build: - CPU - Python_API + - TOKENIZERS # PyTorch_FE tests depend on tokenizers build JAX_FE: revalidate: diff --git a/tests/requirements_pytorch b/tests/requirements_pytorch index be304155e2afc0..f42deb81839883 100644 --- a/tests/requirements_pytorch +++ b/tests/requirements_pytorch @@ -44,7 +44,7 @@ super-image==0.1.7 huggingface-hub==0.25.2 # use latest released version once it's available -git+https://github.com/huggingface/optimum-intel.git@main; python_version < "3.12" +git+https://github.com/huggingface/optimum-intel.git@5c735487d4bd3dd8d7dccb242d8d5988e7dd4069; python_version < "3.12" # set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer hf_transfer==0.1.8 From 9e6dfed16a29ddfcddba78f2d1b895d647cd2ec9 Mon Sep 17 00:00:00 2001 From: Pavel Durandin Date: Tue, 10 Dec 2024 11:10:12 +0400 Subject: [PATCH 28/43] [GPU] Fix tests errors, phase 7 (#27953) ### Details: - Fixes in unit tests failures --- .../tests/unit/test_cases/convolution_gpu_test.cpp | 7 ++++--- .../intel_gpu/tests/unit/test_cases/dft_gpu_test.cpp | 2 +- .../intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index 5d01d448dcfc64..f0243f055c3670 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -10784,11 +10784,12 @@ TEST_P(conv_dyn_test, convolution_gpu_fsv16_1x1_no_bias) { auto is_weight_1x1 = (p.wei_shape[p.wei_shape.size() - 1] == 1 && p.wei_shape[p.wei_shape.size() - 2] == 1); auto is_valid_output = p.wei_shape[0] % 16 == 0; - auto is_valid_strid = p.stride[0] == 1 && p.stride[1] == 1; - auto is_valid_padding = p.pad_begin[0] == 0 && p.pad_begin[1] == 0 && p.pad_end[0] == 0 && p.pad_end[1] == 0; + auto is_valid_strid = std::all_of(p.stride.begin(), p.stride.end(), [](size_t i) { return i == 1; }); + auto is_valid_padding = std::all_of(p.pad_begin.begin(), p.pad_begin.end(), [](int i) { return i == 0; }) + && std::all_of(p.pad_end.begin(), p.pad_end.end(), [](int i) { return i == 0; }); if (!is_weight_1x1 || !is_valid_output || !is_valid_strid || !is_valid_padding) { - std::cout << "[ SKIPPED ] The test is skipped (is_weight_1x1:" << is_weight_1x1 << ", is_valid_output" << is_valid_output + std::cout << "[ SKIPPED ] The test is skipped (is_weight_1x1: " << is_weight_1x1 << ", is_valid_output: " << is_valid_output << ", is_valid_strid: " << is_valid_strid << ", is_valid_padding: " << is_valid_padding << std::endl; ASSERT_EQ(1, 1); return; diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/dft_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/dft_gpu_test.cpp index 3099c8dad5d9d3..5d78cdec028724 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/dft_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/dft_gpu_test.cpp @@ -1963,7 +1963,7 @@ const std::vector IRDFT_params_4d = { {{2, 10, 6, 2}, {2, 10, 10}, {1, 2}, {}, expected_rdft2d_results, rinput_data}, {{2, 10, 6, 2}, {2, 10, 10}, {1, 2}, {10, 10}, expected_rdft2d_results, rinput_data}, {{2, 5, 7, 2}, {2, 5, 12}, {1, 2}, {5, 12}, expected_rdft2d_results_2, expected_irdft2d_results_2}, - {{2, 10, 6, 2}, {2, 10, 10}, {0, 1, 2}, {10, 10}, expected_rdft3d_results, rinput_data}, + {{2, 10, 6, 2}, {2, 10, 10}, {0, 1, 2}, {10, 10, 10}, expected_rdft3d_results, rinput_data}, {{2, 10, 6, 2}, {4, 5, 12}, {0, 1, 2}, {4, 5, 12}, expected_rdft3d_results, expected_irdft3d_results_2}, }; const std::vector IRDFT_params_5d = extendByOneDimension(IRDFT_params_4d); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp index 461474335e903a..324f90faf0b70e 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/pooling_gpu_test.cpp @@ -1244,7 +1244,7 @@ static void generic_average_wo_padding_test(format fmt, tensor output, tensor in tpl.add(reorder("reorder", input_info("in"), input_mem->get_layout().with_padding((padding) off.sizes()))); pool_in = "reorder"; } - tpl.add(pooling("pool", input_info(pool_in), pooling_mode::average_no_padding, window, stride, offset)); + tpl.add(pooling("pool", input_info(pool_in), pooling_mode::average_no_padding, window, stride, offset, offset)); auto cfg = get_test_default_config(get_test_engine()); cfg.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"pool", {format::any, "", impl_types::ocl}}})); From 6a4ba4695191b14c215e4613b5327707c0e33008 Mon Sep 17 00:00:00 2001 From: Egor Duplenskii Date: Tue, 10 Dec 2024 08:11:22 +0100 Subject: [PATCH 29/43] [CPU] Introduce FullyConnected, FCQuantized, FCCompressed, Placeholder (#26239) ### Details: 1. Introduce the following operations to the internal opset * `FullyConnected` (`MatMul` with transposed constant second input) * `FullyConnectedCompressed` (`FullyConnected` with weights compression) * `FullyConnectedQuantizedLegacy` (`FullyConnected` with quantized activations and weights and dequantize scale and zero point pulled through the Op by LPT) * `FullyConnectedQuantized` (`FullyConnected` with quantization scales and zero points on activation, weights and outputs). Planned to be used in scope of dynamic quantization. Can be used for a static quantization as well in the future. * Unused inputs are presented as `Constant` input with `Shape{0}` 2. The following transformations were added / updated: * `ConvertFullyConnectedToFullyConnectedCompressed` (replaces proprietary ~`FuseFCAndWeightsDecompression`~) * `ConvertFCToFCQuantizedLegacy` replaces proprietary ~`FuseConvMatmulFCDeconvAndDQScales`~ * `FullyConnectedBiasFusion` (added into CPU folder for now, needs to be checked and review by GPU team before adaptation to internal opset). Replaces proprietary ~`FuseConvolutionMatMulDeconvAndBias`~ * `ConvertMatMulToFC` updated to use `ov::op::internal:FullyConnected`, planned to be moved to internal opset after review from GPU team ### Todo - [x] Clean up debug code - [x] Clean up extra cmake targets - [x] Perf regression check ### Tickets: - 149923 --- .../include/ov_ops/fully_connected.hpp | 46 +++ .../ov_ops/fully_connected_compressed.hpp | 41 +++ .../ov_ops/fully_connected_quantized.hpp | 39 +++ .../fully_connected_quantized_legacy.hpp | 41 +++ .../convert_fc_to_compressed.hpp | 29 ++ .../convert_fc_to_quantized_legacy.hpp | 22 ++ .../src/ov_ops/fully_connected.cpp | 62 ++++ .../src/ov_ops/fully_connected_compressed.cpp | 63 ++++ .../src/ov_ops/fully_connected_quantized.cpp | 59 ++++ .../fully_connected_quantized_legacy.cpp | 71 +++++ .../convert_fc_to_compressed.cpp | 181 +++++++++++ .../convert_fc_to_quantized_legacy.cpp | 77 +++++ src/frontends/ir/src/ir_deserializer.cpp | 5 +- src/plugins/intel_cpu/src/cpu_types.cpp | 7 + src/plugins/intel_cpu/src/cpu_types.h | 6 + .../intel_cpu/src/dnnl_postops_composer.cpp | 105 +++++-- .../intel_cpu/src/dnnl_postops_composer.h | 3 +- src/plugins/intel_cpu/src/edge.cpp | 8 + src/plugins/intel_cpu/src/extension.cpp | 10 +- src/plugins/intel_cpu/src/graph_optimizer.cpp | 262 +--------------- src/plugins/intel_cpu/src/graph_optimizer.h | 1 - .../src/memory_desc/empty_memory_desc.h | 4 +- src/plugins/intel_cpu/src/node.cpp | 3 +- .../executors/acl/acl_fullyconnected.cpp | 24 +- .../dnnl/dnnl_convolution_primitive.cpp | 3 +- .../dnnl/dnnl_fullyconnected_primitive.cpp | 60 ++-- .../dnnl/dnnl_fullyconnected_primitive.hpp | 7 - .../executors/dnnl/dnnl_matmul_primitive.cpp | 10 +- .../src/nodes/executors/executor_config.hpp | 1 - .../src/nodes/executors/executor_factory.hpp | 1 - .../nodes/executors/fullyconnected_config.hpp | 7 +- .../fullyconnected_implementations.cpp | 3 +- .../src/nodes/executors/matmul_config.hpp | 1 - .../src/nodes/executors/memory_arguments.hpp | 8 +- .../src/nodes/executors/mlas/mlas_gemm.cpp | 43 +-- .../intel_cpu/src/nodes/fullyconnected.cpp | 237 +++++++++------ .../intel_cpu/src/nodes/fullyconnected.h | 34 ++- src/plugins/intel_cpu/src/nodes/input.cpp | 50 ++-- src/plugins/intel_cpu/src/nodes/input.h | 2 +- src/plugins/intel_cpu/src/nodes/reference.cpp | 2 +- .../shape_inference/custom/fullyconnected.cpp | 4 +- .../cpu_opset/common/op/fully_connected.cpp | 79 ----- .../cpu_opset/common/op/fully_connected.hpp | 39 --- .../common/pass/convert_matmul_to_fc.cpp | 32 +- .../common/pass/convert_matmul_to_fc.hpp | 2 +- .../common/pass/convert_to_power_static.cpp | 22 +- .../cpu_opset/common/pass/fc_bias_fusion.cpp | 79 +++++ .../cpu_opset/common/pass/fc_bias_fusion.hpp | 19 ++ .../pass/move_fc_reshape_to_weights.cpp | 5 +- .../cpu_opset/common/pass/split_fc.cpp | 207 ------------- .../cpu_opset/common/pass/split_fc.hpp | 81 ----- .../convert_to_cpu_specific_opset.hpp | 43 ++- .../transformation_pipeline.cpp | 3 +- .../intel_cpu/src/transformations/utils.cpp | 4 +- src/plugins/intel_cpu/src/utils/cpu_utils.hpp | 31 ++ .../src/utils/debug_capabilities.cpp | 5 +- .../intel_cpu/src/utils/debug_capabilities.h | 7 + .../instances/arm/matmul.cpp | 3 - .../src/x64/matmul_weights_decompression.cpp | 2 +- .../custom_shape_infer/fullconnect.cpp | 70 ++++- .../transformations/convert_matmul_test.cpp | 231 +++++++++------ .../move_fc_reshape_to_weights.cpp | 9 +- .../unit/transformations/split_fc_test.cpp | 280 ------------------ .../common_test_utils/src/ov_test_utils.cpp | 1 + 64 files changed, 1562 insertions(+), 1334 deletions(-) create mode 100644 src/common/transformations/include/ov_ops/fully_connected.hpp create mode 100644 src/common/transformations/include/ov_ops/fully_connected_compressed.hpp create mode 100644 src/common/transformations/include/ov_ops/fully_connected_quantized.hpp create mode 100644 src/common/transformations/include/ov_ops/fully_connected_quantized_legacy.hpp create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized_legacy.hpp create mode 100644 src/common/transformations/src/ov_ops/fully_connected.cpp create mode 100644 src/common/transformations/src/ov_ops/fully_connected_compressed.cpp create mode 100644 src/common/transformations/src/ov_ops/fully_connected_quantized.cpp create mode 100644 src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.cpp delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.hpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.cpp delete mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.hpp delete mode 100644 src/plugins/intel_cpu/tests/unit/transformations/split_fc_test.cpp diff --git a/src/common/transformations/include/ov_ops/fully_connected.hpp b/src/common/transformations/include/ov_ops/fully_connected.hpp new file mode 100644 index 00000000000000..6f33b5963ffaf8 --- /dev/null +++ b/src/common/transformations/include/ov_ops/fully_connected.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/op/op.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace op { +namespace internal { + +class TRANSFORMATIONS_API FullyConnected : public ov::op::Op { +public: + OPENVINO_OP("FullyConnected", "ie_internal_opset"); + + FullyConnected() = default; + + FullyConnected(const ov::Output& A, + const ov::Output& B, + const ov::Output& bias, + const ov::element::Type output_type = ov::element::undefined); + + FullyConnected(const ov::Output& A, + const ov::Output& B, + const ov::element::Type output_type = ov::element::undefined); + + bool visit_attributes(ov::AttributeVisitor& visitor) override; + + ov::element::Type get_output_type() const { + return m_output_type; + } + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + void validate_and_infer_types() override; + +protected: + ov::element::Type m_output_type; +}; + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/include/ov_ops/fully_connected_compressed.hpp b/src/common/transformations/include/ov_ops/fully_connected_compressed.hpp new file mode 100644 index 00000000000000..d363a339406070 --- /dev/null +++ b/src/common/transformations/include/ov_ops/fully_connected_compressed.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/op/op.hpp" +#include "ov_ops/fully_connected.hpp" + +namespace ov { +namespace op { +namespace internal { + +class TRANSFORMATIONS_API FullyConnectedCompressed : public FullyConnected { +public: + OPENVINO_OP("FullyConnectedCompressed", "ie_internal_opset", FullyConnected); + + FullyConnectedCompressed() = default; + + FullyConnectedCompressed(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& weight_scales, + const ov::Output& weight_zero_points, + const ov::element::Type output_type = ov::element::undefined); + + FullyConnectedCompressed(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& weight_scales, + const ov::element::Type output_type = ov::element::undefined); + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + void validate_and_infer_types() override; +}; + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/include/ov_ops/fully_connected_quantized.hpp b/src/common/transformations/include/ov_ops/fully_connected_quantized.hpp new file mode 100644 index 00000000000000..6eceed0abdef78 --- /dev/null +++ b/src/common/transformations/include/ov_ops/fully_connected_quantized.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/op/op.hpp" +#include "ov_ops/fully_connected.hpp" + +namespace ov { +namespace op { +namespace internal { + +class TRANSFORMATIONS_API FullyConnectedQuantized : public FullyConnected { +public: + OPENVINO_OP("FullyConnectedQuantized", "ie_internal_opset", FullyConnected); + + FullyConnectedQuantized() = default; + + FullyConnectedQuantized(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& weight_scales, + const ov::Output& weight_zero_points, + const ov::Output& input_scales, + const ov::Output& input_zero_points, + const ov::Output& output_scales, + const ov::Output& output_zero_points, + const ov::element::Type output_type = ov::element::undefined); + + void validate_and_infer_types() override; + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; +}; + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/include/ov_ops/fully_connected_quantized_legacy.hpp b/src/common/transformations/include/ov_ops/fully_connected_quantized_legacy.hpp new file mode 100644 index 00000000000000..2c68ec4dc365f9 --- /dev/null +++ b/src/common/transformations/include/ov_ops/fully_connected_quantized_legacy.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/node.hpp" +#include "openvino/op/op.hpp" +#include "ov_ops/fully_connected.hpp" + +namespace ov { +namespace op { +namespace internal { + +class TRANSFORMATIONS_API FullyConnectedQuantizedLegacy : public FullyConnected { +public: + OPENVINO_OP("FullyConnectedQuantizedLegacy", "ie_internal_opset", FullyConnected); + + FullyConnectedQuantizedLegacy() = default; + + FullyConnectedQuantizedLegacy(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& deq_scales, + const ov::Output& deq_zero_points, + const ov::element::Type output_type = ov::element::undefined); + + FullyConnectedQuantizedLegacy(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& deq_scales, + const ov::element::Type output_type = ov::element::undefined); + + std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; + + void validate_and_infer_types() override; +}; + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp new file mode 100644 index 00000000000000..1b6fcfb2bb3684 --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "ov_ops/fully_connected.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertFullyConnectedToFullyConnectedCompressed; + +} // namespace pass +} // namespace ov + +class ov::pass::ConvertFullyConnectedToFullyConnectedCompressed : public ov::pass::MatcherPass { +public: + using SupportsPredicate = + std::function&, size_t, size_t, size_t)>; + + OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0"); + ConvertFullyConnectedToFullyConnectedCompressed(const std::vector& supported_activation_types, + const std::vector& supported_weights_types, + SupportsPredicate supports_config = nullptr, + bool convert_u4zp_to_u8 = false); +}; diff --git a/src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized_legacy.hpp b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized_legacy.hpp new file mode 100644 index 00000000000000..88990f92cb573c --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized_legacy.hpp @@ -0,0 +1,22 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API ConvertFCToFCQuantizedLegacy; + +} // namespace pass +} // namespace ov + +class ov::pass::ConvertFCToFCQuantizedLegacy : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedQuantized", "0"); + ConvertFCToFCQuantizedLegacy(); +}; diff --git a/src/common/transformations/src/ov_ops/fully_connected.cpp b/src/common/transformations/src/ov_ops/fully_connected.cpp new file mode 100644 index 00000000000000..3fa609362b999c --- /dev/null +++ b/src/common/transformations/src/ov_ops/fully_connected.cpp @@ -0,0 +1,62 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_ops/fully_connected.hpp" + +#include + +#include "matmul_shape_inference.hpp" + +namespace ov { +namespace op { +namespace internal { + +FullyConnected::FullyConnected(const ov::Output& A, + const ov::Output& B, + const ov::Output& bias, + const ov::element::Type output_type) + : Op({A, B, bias}), + m_output_type(output_type) { + validate_and_infer_types(); +} + +FullyConnected::FullyConnected(const ov::Output& A, + const ov::Output& B, + const ov::element::Type output_type) + : FullyConnected(A, B, std::make_shared(element::undefined, Shape{0}), output_type) {} + +bool FullyConnected::visit_attributes(ov::AttributeVisitor& visitor) { + visitor.on_attribute("output_type", m_output_type); + return true; +} + +std::shared_ptr FullyConnected::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), m_output_type); +} + +void FullyConnected::validate_and_infer_types() { + const auto input_size = get_input_size(); + NODE_VALIDATION_CHECK(this, + input_size >= 3, + "Number of inputs is incorrect. Current value is: ", + input_size, + ", expected at least 3."); + + ov::op::v0::MatMul op; + op.set_transpose_a(false); + op.set_transpose_b(true); + + auto out_shapes = + ov::op::v0::shape_infer(&op, + std::vector{get_input_partial_shape(0), get_input_partial_shape(1)}); + + auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; + set_output_type(0, output_type, out_shapes[0]); +} + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp b/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp new file mode 100644 index 00000000000000..e0bb13042ea6ff --- /dev/null +++ b/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp @@ -0,0 +1,63 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_ops/fully_connected_compressed.hpp" + +#include + +#include "openvino/core/type/element_type.hpp" +#include "openvino/op/constant.hpp" +#include "ov_ops/fully_connected.hpp" + +namespace ov { +namespace op { +namespace internal { + +FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& weight_scales, + const ov::Output& weight_zero_points, + const ov::element::Type output_type) + : FullyConnected(X, W, bias, output_type) { + set_argument(3, weight_scales); + set_argument(4, weight_zero_points); + validate_and_infer_types(); +} + +FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& weight_scales, + const ov::element::Type output_type) + : FullyConnectedCompressed(X, + W, + bias, + weight_scales, + std::make_shared(element::undefined, Shape{0}), + output_type) {} + +std::shared_ptr FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + m_output_type); +} + +// @todo finalize validate_and_infer_types +void FullyConnectedCompressed::validate_and_infer_types() { + const auto input_size = get_input_size(); + + NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size); + + FullyConnected::validate_and_infer_types(); +} + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp new file mode 100644 index 00000000000000..3f06e14834f7d1 --- /dev/null +++ b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp @@ -0,0 +1,59 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_ops/fully_connected_quantized.hpp" + +#include "openvino/core/type/element_type.hpp" +#include "ov_ops/fully_connected.hpp" + +namespace ov { +namespace op { +namespace internal { + +FullyConnectedQuantized::FullyConnectedQuantized(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& weight_scales, + const ov::Output& weight_zero_points, + const ov::Output& input_scales, + const ov::Output& input_zero_points, + const ov::Output& output_scales, + const ov::Output& output_zero_points, + const ov::element::Type output_type) + : FullyConnected(X, W, bias, output_type) { + set_argument(3, weight_scales); + set_argument(4, weight_zero_points); + set_argument(5, input_scales); + set_argument(6, input_zero_points); + set_argument(7, output_scales); + set_argument(8, output_zero_points); + validate_and_infer_types(); +} + +std::shared_ptr FullyConnectedQuantized::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + new_args.at(5), + new_args.at(6), + new_args.at(7), + new_args.at(8), + m_output_type); +} + +// @todo finalize validate_and_infer_types +void FullyConnectedQuantized::validate_and_infer_types() { + const auto input_size = get_input_size(); + NODE_VALIDATION_CHECK(this, input_size == 9, "Number of inputs is incorrect. Current value is: ", input_size); + + FullyConnected::validate_and_infer_types(); +} + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp new file mode 100644 index 00000000000000..42df0980086199 --- /dev/null +++ b/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ov_ops/fully_connected_quantized_legacy.hpp" + +#include + +#include "matmul_shape_inference.hpp" +#include "openvino/core/type/element_type.hpp" + +namespace ov { +namespace op { +namespace internal { + +FullyConnectedQuantizedLegacy::FullyConnectedQuantizedLegacy(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& deq_scales, + const ov::Output& deq_zero_points, + const ov::element::Type output_type) + : FullyConnected(X, W, bias, output_type) { + set_argument(3, deq_scales); + set_argument(4, deq_zero_points); + validate_and_infer_types(); +} + +FullyConnectedQuantizedLegacy::FullyConnectedQuantizedLegacy(const ov::Output& X, + const ov::Output& W, + const ov::Output& bias, + const ov::Output& deq_scales, + const ov::element::Type output_type) + : FullyConnectedQuantizedLegacy(X, + W, + bias, + deq_scales, + std::make_shared(element::undefined, Shape{0}), + output_type) {} + +std::shared_ptr FullyConnectedQuantizedLegacy::clone_with_new_inputs(const ov::OutputVector& new_args) const { + check_new_args_count(this, new_args); + + return std::make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + m_output_type); +} + +// @todo finalize validate_and_infer_types +void FullyConnectedQuantizedLegacy::validate_and_infer_types() { + const auto input_size = get_input_size(); + + NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size); + + ov::op::v0::MatMul op; + op.set_transpose_a(false); + op.set_transpose_b(true); + + auto out_shapes = + ov::op::v0::shape_infer(&op, + std::vector{get_input_partial_shape(0), get_input_partial_shape(1)}); + + auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; + set_output_type(0, output_type, out_shapes[0]); +} + +} // namespace internal +} // namespace op +} // namespace ov diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp new file mode 100644 index 00000000000000..87c3b669d98c6d --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp @@ -0,0 +1,181 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_fc_to_compressed.hpp" + +#include +#include + +#include "openvino/core/rt_info.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/pass/pattern/op/or.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/fully_connected.hpp" +#include "ov_ops/fully_connected_compressed.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed( + const std::vector& supported_activation_types, + const std::vector& supported_weights_types, + SupportsPredicate supports_config, + bool convert_u4zp_to_u8) { + using namespace ov::pass::pattern; + + auto reshape_3d_to_2d = [](const ov::Output& output) { + auto in_ps = output.get_node()->get_input_partial_shape(0); + auto out_ps = output.get_node()->get_output_partial_shape(0); + return in_ps.rank().is_static() && out_ps.rank().is_static() && in_ps.size() == 3 && out_ps.size() == 2; + }; + + auto activation_m = any_input(ov::pass::pattern::type_matches_any(supported_activation_types)); + auto weights_m = wrap_type(ov::pass::pattern::type_matches_any(supported_weights_types)); + auto convert_m = wrap_type({weights_m}); + + auto sub_const_m = wrap_type(); + auto sub_convert_const_m = wrap_type({sub_const_m}); + auto sub_with_convert_m = wrap_type({convert_m, sub_convert_const_m}); + auto sub_no_convert_m = wrap_type({convert_m, sub_const_m}); + auto subtract_m = std::make_shared(OutputVector{sub_with_convert_m, sub_no_convert_m}); + + auto mul_const_m = wrap_type(); + auto mul_convert_const_m = wrap_type({mul_const_m}); + auto mul_scale_m = std::make_shared(OutputVector{mul_const_m, mul_convert_const_m}); + + auto mul_with_sub_m = wrap_type({subtract_m, mul_scale_m}); + auto mul_no_sub_m = wrap_type({convert_m, mul_scale_m}); + auto mul_m = std::make_shared(OutputVector{mul_with_sub_m, mul_no_sub_m}); + + auto reshape_const_m = wrap_type(); + auto reshape_m = wrap_type({mul_m, reshape_const_m}, reshape_3d_to_2d); + + auto transpose_input = std::make_shared(OutputVector{reshape_m, mul_m}); + auto transpose_const_m = wrap_type(); + auto transpose_m = wrap_type({transpose_input, transpose_const_m}); + + auto bias_m = any_input(); + auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m, mul_m}); + auto fully_connected_m = wrap_type({activation_m, weights_input_m, bias_m}); + + ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + OPENVINO_ASSERT(pattern_map.count(fully_connected_m)); + OPENVINO_ASSERT(pattern_map.count(mul_const_m)); + OPENVINO_ASSERT(pattern_map.count(weights_m)); + OPENVINO_ASSERT(pattern_map.count(bias_m)); + OPENVINO_ASSERT(pattern_map.count(convert_m)); + auto fc = std::dynamic_pointer_cast( + pattern_map.at(fully_connected_m).get_node_shared_ptr()); + if (!fc || transformation_callback(fc)) { + return false; + } + + bool has_transpose = pattern_map.count(transpose_m); + auto scale_shape = pattern_map.at(mul_const_m).get_shape(); + bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { + return d > 1; + }) > 1; + + auto weights_shape = fc->get_input_shape(1); + const size_t IC = *(weights_shape.rbegin()); + const size_t OC = *(weights_shape.rbegin() + 1); + + const size_t G = grouped ? (has_transpose ? *(scale_shape.rbegin() + 2) : *(scale_shape.rbegin() + 1)) : 1; + + if (supports_config && !supports_config(fc, IC, OC, G)) + return false; + + auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr node) { + auto constant = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(constant != nullptr); + ov::Shape current_shape = constant->get_shape(); + if (current_shape.size() <= 2) + return constant; + + OPENVINO_ASSERT(current_shape.size() == 3); + + auto new_shape = (has_transpose || !grouped) + ? ov::Shape{current_shape[0] * current_shape[1], current_shape[2]} + : ov::Shape{current_shape[0], current_shape[1] * current_shape[2]}; + + return std::make_shared(*constant, new_shape); + }; + + auto convert_u4const_to_u8 = [convert_u4zp_to_u8](std::shared_ptr node) -> std::shared_ptr { + auto constant = std::dynamic_pointer_cast(node); + if (constant->get_element_type() != ov::element::u4 || !convert_u4zp_to_u8) + return std::dynamic_pointer_cast(constant); + return std::make_shared(node, ov::element::u8); + }; + + const ov::Output& fc_input_a = fc->input_value(0); + const auto& scale = reshape_const_to_2d(pattern_map.at(mul_const_m).get_node_shared_ptr()); + std::shared_ptr optional_zero_point = nullptr; + + const bool with_zero_point = + pattern_map.count(sub_no_convert_m) > 0 || pattern_map.count(sub_with_convert_m) > 0; + if (with_zero_point) { + // WA: Convert ZP to u8 for OneDNN case to avoid u4 reorder + optional_zero_point = + convert_u4const_to_u8(reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr())); + } + + std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); + std::shared_ptr fc_input_scale = scale; + std::shared_ptr fc_input_zp = optional_zero_point; + std::shared_ptr fc_input_bias = pattern_map.at(bias_m).get_node_shared_ptr(); + std::vector> result_nodes = {}; + if (has_transpose) { + const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); + std::shared_ptr transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); + if (ov::shape_size(transpose_const->get_shape()) != fc_input_b->get_output_partial_shape(0).size()) { + std::vector new_order(fc_input_b->get_output_partial_shape(0).size()); + std::iota(new_order.begin(), new_order.end(), 0); + std::swap(new_order[new_order.size() - 1], new_order[new_order.size() - 2]); + transpose_const = + std::make_shared(ov::element::i32, ov::Shape{new_order.size()}, new_order); + } + + fc_input_b = transpose->clone_with_new_inputs({fc_input_b->output(0), transpose_const}); + ov::disable_constant_folding(fc_input_b); + result_nodes.push_back(fc_input_b); + fc_input_scale = transpose->clone_with_new_inputs({scale->output(0), transpose_const}); + ov::disable_constant_folding(fc_input_scale); + result_nodes.push_back(fc_input_scale); + if (with_zero_point && ov::shape_size(optional_zero_point->output(0).get_shape()) > 1) { + fc_input_zp = transpose->clone_with_new_inputs({optional_zero_point->output(0), transpose_const}); + ov::disable_constant_folding(fc_input_zp); + result_nodes.push_back(fc_input_zp); + } + } + + fc_input_zp = + with_zero_point ? fc_input_zp : std::make_shared(element::undefined, Shape{0}); + ov::disable_constant_folding(fc_input_zp); + result_nodes.push_back(fc_input_zp); + + auto new_fc = std::make_shared(fc_input_a, + fc_input_b, + fc_input_bias, + fc_input_scale, + fc_input_zp, + fc->get_output_type()); + + result_nodes.push_back(new_fc); + new_fc->set_friendly_name(fc->get_friendly_name()); + ov::copy_runtime_info(m.get_matched_nodes(), result_nodes); + ov::replace_node(fc, new_fc); + return true; + }; + + auto m = std::make_shared(fully_connected_m, + "ConvertFullyConnectedToFullyConnectedCompressed"); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp new file mode 100644 index 00000000000000..908e36a51a7eb9 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp @@ -0,0 +1,77 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_fc_to_quantized_legacy.hpp" + +#include + +#include "openvino/core/rt_info.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/pass/pattern/op/label.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/fully_connected.hpp" +#include "ov_ops/fully_connected_quantized_legacy.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() { + using namespace ov::pass::pattern; + + std::vector activation_types{ov::element::u8, ov::element::i8}; + std::vector weights_types{ov::element::i8}; + + auto activations_m = pattern::any_input(ov::pass::pattern::type_matches_any(activation_types)); + auto weights_m = wrap_type(ov::pass::pattern::type_matches_any(weights_types)); + auto bias_m = pattern::any_input(); + + auto fully_connected_m = wrap_type({activations_m, weights_m, bias_m}); + auto dequantization_scales_m = wrap_type(); + auto multiply_m = wrap_type({fully_connected_m, dequantization_scales_m}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + + auto fc_output = pattern_map.at(fully_connected_m); + auto activations = pattern_map.at(activations_m); + auto weights = pattern_map.at(weights_m); + auto bias = pattern_map.at(bias_m); + auto multiply = pattern_map.at(multiply_m); + auto dequantization_scales = pattern_map.at(dequantization_scales_m); + const auto& fc_output_shape = fc_output.get_partial_shape(); + const auto& multiply_output_shape = multiply.get_partial_shape(); + + if (*fc_output_shape.rbegin() != *multiply_output_shape.rbegin()) { + return false; + } + + auto fc_node = std::dynamic_pointer_cast( + pattern_map.at(fully_connected_m).get_node_shared_ptr()); + + ov::NodeVector new_ops; + auto zp = std::make_shared(element::undefined, Shape{0}); + new_ops.push_back(zp); + + auto fc_quantized = + std::make_shared(activations, + weights, + bias, + dequantization_scales, + zp, + fc_node->get_output_type()); + new_ops.push_back(fc_quantized); + + const auto& multiply_node = multiply.get_node_shared_ptr(); + fc_quantized->set_friendly_name(multiply_node->get_friendly_name()); + + ov::copy_runtime_info({multiply_node, fc_node}, new_ops); + ov::replace_node(multiply_node, fc_quantized); + + return true; + }; + + auto m = std::make_shared(multiply_m, "ConvertFullyConnectedToFullyConnectedQuantized"); + this->register_matcher(m, callback); +} diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 7c8b6e9d4b97ab..2d1dfba956ea72 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -10,6 +10,7 @@ #include "openvino/core/except.hpp" #include "openvino/core/meta_data.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" +#include "openvino/core/type.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/loop.hpp" @@ -831,7 +832,9 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(inputs[i].get_node_shared_ptr()) && + ov::element::Type_t::undefined == inputs[i].get_element_type()) OPENVINO_THROW(params.type, " layer ", params.name, diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 3b6440e56c3272..30884bbe649962 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -41,6 +41,9 @@ static const TypeToNameMap& get_type_to_name_tbl() { {"GroupConvolution", Type::Convolution}, {"MatMul", Type::MatMul}, {"FullyConnected", Type::FullyConnected}, + {"FullyConnectedCompressed", Type::FullyConnected}, + {"FullyConnectedQuantizedLegacy", Type::FullyConnected}, + {"FullyConnectedQuantized", Type::FullyConnected}, {"MaxPool", Type::Pooling}, {"AvgPool", Type::Pooling}, {"AdaptiveMaxPool", Type::AdaptivePooling}, @@ -469,6 +472,10 @@ std::string algToString(const Algorithm alg) { CASE(FQCommon); CASE(FQQuantization); CASE(FQBinarization); + CASE(FullyConnectedCommon); + CASE(FullyConnectedCompressed); + CASE(FullyConnectedQuantized); + CASE(FullyConnectedQuantizedLegacy); CASE(ROIPoolingMax); CASE(ROIPoolingBilinear); CASE(ROIAlignMax); diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 9461526184b0bf..71088c22af8336 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -213,6 +213,12 @@ enum class Algorithm { EltwiseBitwiseLeftShift, EltwiseBitwiseRightShift, + // FullyConnected algorithms + FullyConnectedCommon, + FullyConnectedCompressed, + FullyConnectedQuantized, + FullyConnectedQuantizedLegacy, + // FakeQuantize algorithms FQCommon, FQQuantization, diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index 2f82fbe553ae19..70d28f1f4ac739 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -11,21 +11,69 @@ #include #include +#include "cpu_types.h" #include "memory_desc/dnnl_blocked_memory_desc.h" +#include "nodes/executors/memory_arguments.hpp" #include "openvino/core/type/element_type.hpp" +#include "utils/cpu_utils.hpp" #include "utils/debug_capabilities.h" namespace ov { namespace intel_cpu { +static std::vector getDeQuantizedScales(const MemoryArgs& memory) { + if (!memory.count(ARG_DST_DEQ_SCALE)) + return {}; + + auto scalesMemory = memory.at(ARG_DST_DEQ_SCALE); + + auto scalesData = static_cast(scalesMemory->getData()); + + if (!scalesData) + return {}; + + auto dstShape = memory.at(ARG_DST)->getShape(); + auto dqScalesShape = scalesMemory->getShape(); + + auto scalesDims = getNormalizedDimsBySize(dqScalesShape.getDims(), dstShape.getDims().size()); + + auto scaleSize = std::accumulate(scalesDims.begin(), scalesDims.end(), std::size_t(1), std::multiplies()); + + std::vector DQScales(scaleSize, 1.0); + + OPENVINO_ASSERT(scaleSize == 1 || DQScales.size() == 1 || DQScales.size() == scaleSize, + "set invalid scales size , DQScales vector size: ", + DQScales.size(), + ", scale data size: ", + scaleSize); + + // @todo do we really need to broadcast dq scales and then resize them back? + if (scaleSize > DQScales.size()) + DQScales.resize(scaleSize, DQScales[0]); + if (1 == scaleSize) { + std::transform(DQScales.begin(), DQScales.end(), DQScales.begin(), [=](float val) { + return (scalesData[0] * val); + }); + } else { + for (size_t i = 0; i < DQScales.size(); i++) { + DQScales[i] *= scalesData[i]; + } + } + if (std::all_of(DQScales.begin(), DQScales.end(), [&](float val) { + return (val == DQScales[0]); + })) + DQScales.resize(1); + + return DQScales; +} + DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps, const dnnl::engine& engine, const VectorDims& outputDims, const size_t indexOfOutputChannelDim, const bool isInt8, const int weiScaleMaskPerChannel, - const std::vector& DQScales, - const bool hasBias, + const MemoryArgs& memory, const dnnl::memory::data_type outDataType) : engine(engine), postOps(postOps), @@ -39,6 +87,7 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps, dimsPerOC = dimsPerTensor = VectorDims(outputDims.size(), 1); dimsPerOC[idxOC] = OC; + const auto& DQScales = getDeQuantizedScales(memory); // generalise dq scales, so extra logic is necessary here. if (isINT8) { wei_scale_values = DQScales.empty() ? std::vector{1.0} : DQScales; @@ -49,6 +98,7 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps, updateWeiScales(); // If having the bias, attr weight scale can't be updated for further ops-ops optimization. // ONEDNN 3.x quantization for scheme: QuantizedInput * QuantizedWeight * DQScale + Bias. + const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty(); weightScaleAvailable = !hasBias; } else if (!DQScales.empty()) { // DQ scale is fused but swiching back to non-INT8 for execution in some cases. @@ -325,9 +375,9 @@ static OptimizedFormula updateOptimizedFormula(const FakeQuantizePostOp& postOp, } bool DnnlPostOpsComposer::appendAttrPostOps(const FakeQuantizePostOp& postOp, - bool isLastPostOp, - bool doRounding, - bool allowBinary) { + bool isLastPostOp, + bool doRounding, + bool allowBinary) { DEBUG_LOG("isLastPostOp=", isLastPostOp, ", outDataType=", @@ -541,9 +591,9 @@ bool DnnlPostOpsComposer::appendShift(const std::vector& shift, bool allo } bool DnnlPostOpsComposer::appendLinear(const std::vector& scale, - const std::vector& shift, - bool isLastPostOp, - bool allowBinary) { + const std::vector& shift, + bool isLastPostOp, + bool allowBinary) { if (scale.size() == 1 && shift.size() == 1) { if (shift[0] == 0.0f) return appendScale(scale, isLastPostOp, allowBinary); @@ -599,15 +649,27 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr, if (shape.size() == 1 && shape[0] == 1) { shape.push_back(1); } + if (shape.size() != 2 && shape.size() != 3) - OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape"); + OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape"); - Shape dstShape = needTranspose ? Shape({shape[0], shape[1]}) : Shape({shape[shape.size() - 1], shape[0]}); - DnnlBlockedMemoryDesc dstMemoryDesc(dstShape, DnnlExtensionUtils::ElementTypeToDataType(dstPrc), dnnl::memory::format_tag::io); - auto dstMem = std::make_shared(engine, dstMemoryDesc); + // weights without batch: (OC, G) + // weights with batch: (B, OC, G) + const size_t OC = shape[shape.size() - 2]; + const size_t G = shape[shape.size() - 1]; + + Shape dstShape = Shape({OC, G}); + DnnlBlockedMemoryDesc dstMemoryDesc(dstShape, + DnnlExtensionUtils::ElementTypeToDataType(dstPrc), + dnnl::memory::format_tag::io); + auto dstMem = std::make_shared(engine, dstMemoryDesc); auto srcFormat = needTranspose ? dnnl::memory::format_tag::oi : dnnl::memory::format_tag::io; - DnnlBlockedMemoryDesc srcMemoryDesc(dstShape, DnnlExtensionUtils::ElementTypeToDataType(paramsPtr->getDescPtr()->getPrecision()), srcFormat); + + DnnlBlockedMemoryDesc srcMemoryDesc( + dstShape, + DnnlExtensionUtils::ElementTypeToDataType(paramsPtr->getDescPtr()->getPrecision()), + srcFormat); auto srcMem = std::make_shared(engine, srcMemoryDesc, paramsPtr->getData()); dstMem->load(*srcMem); @@ -615,25 +677,32 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr, return dstMem; } -void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr, bool needTranspose, ov::element::Type dstPrecision) { +void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr, + bool needTranspose, + ov::element::Type dstPrecision) { if (scales_ptr == nullptr) return; auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine); attr.set_scales_dims(DNNL_ARG_WEIGHTS, - DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()), DnnlExtensionUtils::ElementTypeToDataType(dstPrecision)); + DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()), + DnnlExtensionUtils::ElementTypeToDataType(dstPrecision)); cpuArgs[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = std::move(scalesMem); dnnlArgs[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = cpuArgs[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS]->getPrimitive(); } -void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, bool needTranspose, ov::element::Type dstPrecision) { +void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, + bool needTranspose, + ov::element::Type dstPrecision) { if (zero_points_ptr == nullptr) return; - auto zeroPointsMem = prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine); + auto zeroPointsMem = + prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine); attr.set_zero_points_dims(DNNL_ARG_WEIGHTS, - DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()), DnnlExtensionUtils::ElementTypeToDataType(dstPrecision)); + DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()), + DnnlExtensionUtils::ElementTypeToDataType(dstPrecision)); cpuArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = zeroPointsMem; dnnlArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = zeroPointsMem->getPrimitive(); } diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.h b/src/plugins/intel_cpu/src/dnnl_postops_composer.h index c07ec0f608b6db..8c2718aaaed4d5 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.h +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.h @@ -27,8 +27,7 @@ class DnnlPostOpsComposer { const size_t indexOfOutputChannelDim, const bool isINT8, const int weiScaleMaskPerChannel, - const std::vector& DQScales, - const bool hasBias, + const MemoryArgs& memory, const dnnl::memory::data_type outDataType); DnnlPrimitiveAttrs compose(); void appendDecompressionScales(const MemoryCPtr& scales_ptr, bool needTranspose, ov::element::Type dstPrecision); diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 82bde8edae2b4a..c49b924477f694 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -5,6 +5,7 @@ #include "edge.h" #include "node.h" #include "dnnl_extension_utils.h" +#include "openvino/core/type/element_type.hpp" #include "openvino/util/pp.hpp" using namespace dnnl; @@ -212,6 +213,10 @@ Edge::ReorderStatus Edge::needReorder() { bool optimized = false; auto inputPortDesc = getInputPortDesc(); auto outPortDesc = getOutputPortDesc(); + + if (inputPortDesc->getMemDesc()->getPrecision() == element::undefined) + return ReorderStatus::No; + // Check whether the child node may accept the parent produced tensor if (!outPortDesc->isCompatible(*inputPortDesc)) { // Performance optimization which exploit the fact that some tensors do not need actual data reordering to be read using different descriptors @@ -410,6 +415,9 @@ const MemoryDesc& Edge::getOutputDesc() const { } const MemoryDesc& Edge::getDesc() const { + if (getInputDesc().getPrecision() == element::undefined) + return getInputDesc(); + if (!getInputDesc().isCompatible(getOutputDesc())) OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName()); diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index a29282d4af3101..e6dbc04b0ca6a4 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -7,6 +7,10 @@ #include "openvino/core/op_extension.hpp" #include "ov_ops/augru_cell.hpp" #include "ov_ops/augru_sequence.hpp" +#include "ov_ops/fully_connected.hpp" +#include "ov_ops/fully_connected_compressed.hpp" +#include "ov_ops/fully_connected_quantized_legacy.hpp" +#include "ov_ops/fully_connected_quantized.hpp" #include "ov_ops/gather_compressed.hpp" #include "ov_ops/multiclass_nms_ie_internal.hpp" #include "ov_ops/nms_ie_internal.hpp" @@ -16,7 +20,6 @@ #include "ov_ops/type_relaxed.hpp" #include "snippets/op/subgraph.hpp" #include "transformations/cpu_opset/common/op/causal_mask_preprocess.hpp" -#include "transformations/cpu_opset/common/op/fully_connected.hpp" #include "transformations/cpu_opset/common/op/leaky_relu.hpp" #include "transformations/cpu_opset/common/op/ngram.hpp" #include "transformations/cpu_opset/common/op/power_static.hpp" @@ -70,7 +73,6 @@ class TypeRelaxedExtension : public ov::OpExtension> { #endif #define CPU_EXTENSIONS \ - OP_EXTENSION(ov::intel_cpu::FullyConnectedNode) \ OP_EXTENSION(ov::intel_cpu::LeakyReluNode) \ OP_EXTENSION(ov::intel_cpu::PowerStaticNode) \ OP_EXTENSION(ov::intel_cpu::CausalMaskPreprocessNode) \ @@ -85,6 +87,10 @@ class TypeRelaxedExtension : public ov::OpExtension> { OP_EXTENSION(ov::op::internal::NmsStaticShapeIE) \ OP_EXTENSION(ov::op::internal::RMS) \ OP_EXTENSION(ov::op::internal::RoPE) \ + OP_EXTENSION(ov::op::internal::FullyConnected) \ + OP_EXTENSION(ov::op::internal::FullyConnectedCompressed) \ + OP_EXTENSION(ov::op::internal::FullyConnectedQuantizedLegacy) \ + OP_EXTENSION(ov::op::internal::FullyConnectedQuantized) \ OP_EXTENSION_X64(ov::intel_cpu::MHANode) \ OP_EXTENSION_X64(ov::intel_cpu::InteractionNode) \ OP_EXTENSION_X64(ov::intel_cpu::LLMMLPNode) \ diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 61590b8691f4b2..94f54fc4c59b55 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -67,10 +67,6 @@ void GraphOptimizer::ApplyCommonGraphOptimizations(Graph &graph) { FuseConvMatmulFCDeconvAndDQScales(graph); graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFCAndWeightsDecompression"); - FuseFCAndWeightsDecompression(graph); - graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndBias"); FuseConvolutionMatMulDeconvAndBias(graph); graph.RemoveDroppedNodes(); @@ -217,8 +213,7 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) { auto scaleNode = node->getParentEdgeAt(1)->getParent(); if (!(parentNode->getType() == Type::Convolution || parentNode->getType() == Type::MatMul - || parentNode->getType() == Type::Deconvolution - || parentNode->getType() == Type::FullyConnected)) + || parentNode->getType() == Type::Deconvolution)) return false; if (!scaleNode->isConstant()) return false; @@ -292,257 +287,6 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) { } } -void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) { - std::set supportedWeightsPrecisions{ - ov::element::u8, ov::element::i8, ov::element::nf4, ov::element::u4, ov::element::i4, ov::element::f4e2m1}; - const std::set supportedDataPrecisions{ov::element::f32, ov::element::bf16}; - auto expectedNode = [](NodePtr node, Type expectedType) { - return node->getType() == expectedType && node->getChildEdges().size() == 1; - }; - -#define SKIP_FUSION_FOR_NODE(node) \ - DEBUG_LOG("FuseFCAndWeightsDecompression can't be applied for node ", node->getName()); \ - continue - - if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2)) - return; - - auto& graphNodes = graph.GetNodes(); - for (size_t i = 0; i < graphNodes.size(); i++) { - const auto fcNode = std::dynamic_pointer_cast(graphNodes[i]); - if (fcNode == nullptr) - continue; - - auto parent = fcNode->getParentEdgeAt(1)->getParent(); - const bool withTranspose = parent->getType() == Type::Transpose; - const NodePtr transposeNode = withTranspose ? parent : nullptr; - if (transposeNode) - parent = transposeNode->getParentEdgeAt(0)->getParent(); - // Compressed weights can be shared between several FC layers - const bool is_shared_decompression = parent->getChildEdges().size() > 1; - - const bool withReshape = parent->getType() == Type::Reshape; - const auto reshapeNode = withReshape ? parent : nullptr; - if (reshapeNode) { - parent = reshapeNode->getParentEdgeAt(0)->getParent(); - } - - const auto multiplyNode = parent; - if (multiplyNode->getType() != Type::Eltwise || multiplyNode->getAlgorithm() != Algorithm::EltwiseMultiply || - !multiplyNode->isConstant()) { - SKIP_FUSION_FOR_NODE(fcNode); - } - - CPU_GRAPH_OPTIMIZER_SCOPE(FuseFCAndWeightsDecompression); - const auto mulParent1 = multiplyNode->getParentEdgeAt(1)->getParent(); - NodePtr multiplyParent, multiplyConvertNode, multiplyConstNode; - multiplyParent = mulParent1; - if (multiplyParent->getType() == Type::Convert) { - multiplyConvertNode = multiplyParent; - multiplyParent = multiplyConvertNode->getParentEdgeAt(0)->getParent(); - } - multiplyConstNode = multiplyParent; - if (multiplyConstNode->getType() != Type::Input) { - SKIP_FUSION_FOR_NODE(fcNode); - } - const bool withMultiplyConvert = multiplyConvertNode != nullptr; - - const auto mulParent0 = multiplyNode->getParentEdgeAt(0)->getParent(); - const bool withSubtract = mulParent0->getAlgorithm() == Algorithm::EltwiseSubtract; - NodePtr subtractNode, subtractConvertNode, subtractConstNode; - if (withSubtract) { - subtractNode = mulParent0; - if (!expectedNode(subtractNode, Type::Eltwise)) { - SKIP_FUSION_FOR_NODE(fcNode); - } - auto subtractParent = subtractNode->getParentEdgeAt(1)->getParent(); - if (subtractParent->getType() == Type::Convert) { - subtractConvertNode = subtractParent; - subtractParent = subtractConvertNode->getParentEdgeAt(0)->getParent(); - } - subtractConstNode = subtractParent; - if (subtractConstNode->getType() != Type::Input) { - SKIP_FUSION_FOR_NODE(fcNode); - } - } - - const bool withSubtractConvert = subtractConvertNode != nullptr; - const auto convertNode = withSubtract ? subtractNode->getParentEdgeAt(0)->getParent() : mulParent0; - if (!expectedNode(convertNode, Type::Convert)) { - SKIP_FUSION_FOR_NODE(fcNode); - } - const auto weightsNode = convertNode->getParentEdgeAt(0)->getParent(); - if (weightsNode->getType() != Type::Input) { - SKIP_FUSION_FOR_NODE(fcNode); - } - - // Precision limitations - if (supportedDataPrecisions.find(fcNode->getOriginalInputPrecisionAtPort(0)) == supportedDataPrecisions.end()) { - SKIP_FUSION_FOR_NODE(fcNode); - } - if (supportedWeightsPrecisions.find(weightsNode->getOriginalOutputPrecisionAtPort(0)) == supportedWeightsPrecisions.end()) { - SKIP_FUSION_FOR_NODE(fcNode); - } - if (withSubtract && - !one_of(subtractConstNode->getOriginalOutputPrecisionAtPort(0), weightsNode->getOriginalOutputPrecisionAtPort(0), ov::element::f32)) { - SKIP_FUSION_FOR_NODE(fcNode); - } - - // Shape limitations - const auto weightsShape = weightsNode->getOutputShapeAtPort(0); - if (weightsShape != multiplyNode->getOutputShapeAtPort(0)) { - SKIP_FUSION_FOR_NODE(fcNode); - } - if (reshapeNode && (reshapeNode->getInputShapeAtPort(0).getRank() != 3 || reshapeNode->getOutputShapeAtPort(0).getRank() != 2)) { - SKIP_FUSION_FOR_NODE(fcNode); - } - - VectorDims decompressionConstShape; - const auto fcInputWeightsShape = fcNode->getInputShapeAtPort(1); - int groupNum = 1; - // Ordinary case: one decompression group - if (fcInputWeightsShape.getRank() == weightsShape.getRank()) { - const auto& out_channels = fcInputWeightsShape.getDims()[0]; - decompressionConstShape = withTranspose ? VectorDims{1, out_channels} : VectorDims{out_channels, 1}; - } else { - // Group decompression case: last 3 dimension (there could be also prepending '1's in the beginning) of weights shape must be: - // [N, G, O], if transpose = true - // [O, N, G], otherwise. - // O - output channels - // N - number of groups - // G - group size - const auto& weights_dims = weightsShape.getStaticDims(); - const auto& N = withTranspose ? *(weights_dims.rbegin() + 2) : *(weights_dims.rbegin() + 1); - const auto& O = withTranspose ? *weights_dims.rbegin() : *(weights_dims.rbegin() + 2); - // Group decompression is applied by O and N dims - decompressionConstShape = withTranspose ? VectorDims{N, 1, O} : VectorDims{O, N, 1}; - groupNum = N; - } - - auto check_decompression_shape = [&decompressionConstShape](const VectorDims& shape_to_check) { - if (shape_to_check.size() > decompressionConstShape.size()) - return false; - if (std::all_of(shape_to_check.begin(), shape_to_check.end(), [](Dim x) { return x == 1; })) - return true; - const auto comparison_start_pos = decompressionConstShape.size() - shape_to_check.size(); - // in case of different ranks shapes are compared taking into account ranks numpy broadcasting - return std::equal(shape_to_check.begin(), shape_to_check.end(), decompressionConstShape.begin() + comparison_start_pos); - }; - if (!check_decompression_shape(multiplyConstNode->getOutputShapeAtPort(0).getDims())) { - SKIP_FUSION_FOR_NODE(fcNode); - } - if (withSubtract && !check_decompression_shape(subtractConstNode->getOutputShapeAtPort(0).getDims())) { - SKIP_FUSION_FOR_NODE(fcNode); - } - - const size_t OC = fcInputWeightsShape.getDims()[0]; - const size_t IC = fcInputWeightsShape.getDims()[1]; - // HW specific shape limitations - if (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx) && - fcNode->getOriginalInputPrecisionAtPort(0) == ov::element::bf16) { - // OneDNN AMX IP implementation has limited shapes support due to performance considerations. As a current solution conditions below are copied - // from OneDNN to make sure correct IP impl will be used since fallback one doesn't support weights decompression feature. - size_t simdWidth = 16; - size_t vnniFactor = 2; - size_t maxSize = 512; - auto amxRow = vnniFactor * simdWidth; - - if ((IC <= amxRow && OC <= amxRow) || (IC <= maxSize && OC <= maxSize && IC % amxRow != 0)) { - SKIP_FUSION_FOR_NODE(fcNode); - } - } - - // OneDNN IP primitive provides limited decompression params support - if (IC % groupNum != 0 || IC / groupNum < 4 || OC == 1) { - SKIP_FUSION_FOR_NODE(fcNode); - } - - // Fusion processing - auto *multiplyInputNode = dynamic_cast(multiplyConstNode.get()); - OPENVINO_ASSERT(multiplyInputNode, "Cannot cast ", multiplyConstNode->getName(), " to Input node."); - fcNode->fuseDecompressionMultiply(multiplyInputNode->getMemoryPtr()); - - if (withSubtract) { - auto *subtractInputNode = dynamic_cast(subtractConstNode.get()); - OPENVINO_ASSERT(multiplyInputNode, "Cannot cast ", subtractConstNode->getName(), " to Input node."); - fcNode->fuseDecompressionSubtract(subtractInputNode->getMemoryPtr()); - } - - fcNode->addOriginalLayer(multiplyNode->getOriginalLayers()); - fcNode->addOriginalLayer(convertNode->getOriginalLayers()); - if (withSubtract) - fcNode->addOriginalLayer(subtractNode->getOriginalLayers()); - if (withSubtractConvert) - fcNode->addOriginalLayer(subtractConvertNode->getOriginalLayers()); - if (withMultiplyConvert) - fcNode->addOriginalLayer(multiplyConvertNode->getOriginalLayers()); - - const auto& weightsPrecision = weightsNode->getOriginalOutputPrecisionAtPort(0); - if (withTranspose) { - transposeNode->setOriginalInputPrecisionAtPort(0, weightsPrecision); - transposeNode->setOriginalOutputPrecisionAtPort(0, weightsPrecision); - } - if (withReshape) { - reshapeNode->setOriginalInputPrecisionAtPort(0, weightsPrecision); - reshapeNode->setOriginalOutputPrecisionAtPort(0, weightsPrecision); - } - fcNode->setOriginalInputPrecisionAtPort(1, weightsPrecision); - - // If decompression subgraph is shared with other nodes, it mustn't be removed. - // In this case, the current FC is reconnected to the weights - if (is_shared_decompression) { - const auto weights_out_edge = weightsNode->getChildEdges()[0].lock(); - const auto fc_weights_path_edge = withTranspose ? transposeNode->getParentEdgeAt(0) - : fcNode->getParentEdgeAt(1); - const auto inNum = weights_out_edge->getInputNum(); - const auto outNum = fc_weights_path_edge->getOutputNum(); - graph.RemoveEdge(fc_weights_path_edge); - // In case of shared group decompression, Reshape node has to be copied for the current FC - if (withReshape) { - const auto& reshapeOutShape = reshapeNode->getOutputShapeAtPort(0).getStaticDims(); - auto reshapeConst = std::make_shared(ov::element::i32, - ov::Shape{reshapeOutShape.size()}, - reshapeOutShape); - auto reshapeDummyInput = std::make_shared(reshapeNode->getOriginalInputPrecisionAtPort(0), - reshapeNode->getInputShapeAtPort(0).toPartialShape()); - const auto reshape = std::make_shared(reshapeDummyInput, reshapeConst, false); - reshape->set_friendly_name(reshapeNode->getName() + "_copy"); - const auto cpuReshape = std::make_shared(reshape, graph.getGraphContext()); - graph.InsertNode(weightsNode, withTranspose ? transposeNode : fcNode, cpuReshape, inNum, outNum, false); - const auto cpuReshapeConst = std::make_shared(reshapeConst, graph.getGraphContext()); - graph.AddNode(cpuReshapeConst); - graph.CreateEdge(cpuReshapeConst, cpuReshape, 0, 1); - } else { - graph.CreateEdge(weightsNode, withTranspose ? transposeNode : fcNode, inNum, outNum); - } - } else { - // If decompression subgraph is not shared with other nodes, it can be removed - if (withSubtract) - graph.RemoveEdge(subtractNode->getParentEdgeAt(1)); - if (withSubtractConvert) { - // SubtractConvert is removed only if there are no other consumers (e.g. CompressedGather) - const auto& restChilds = subtractConvertNode->getChildEdges(); - if (restChilds.empty()) - graph.RemoveEdge(subtractConvertNode->getParentEdgeAt(0)); - } - graph.RemoveEdge(multiplyNode->getParentEdgeAt(1)); - if (withMultiplyConvert) { - // MultiplyConvert is removed only if there are no other consumers (e.g. CompressedGather) - const auto& restChilds = multiplyConvertNode->getChildEdges(); - if (restChilds.empty()) - graph.RemoveEdge(multiplyConvertNode->getParentEdgeAt(0)); - } - - graph.DropNode(convertNode); - if (withSubtract) - graph.DropNode(subtractNode); - graph.DropNode(multiplyNode); - } - DEBUG_LOG("FuseFCAndWeightsDecompression finished for node ", fcNode->getName()); - } -#undef SKIP_FUSION_FOR_NODE -} - void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph &graph) { auto& graphNodes = graph.GetNodes(); @@ -556,7 +300,7 @@ void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph &graph) { return false; if (!deconv) - return (one_of(node->getType(), Type::Convolution, Type::MatMul, Type::FullyConnected) && + return (one_of(node->getType(), Type::Convolution, Type::MatMul) && node->getParentEdges().size() == 2); else return deconv->canFuseBias(); @@ -984,9 +728,7 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) { auto isSuitablePattern = [](NodePtr parent) { bool res = true && parent->getType() == Type::Transpose && parent->getChildEdges().size() == 1 - && parent->getChildEdgeAt(0)->getOutputNum() == 1 && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected - && parent->getOutputShapeAtPort(0).getRank() == 2 && parent->isConstant(); return res; }; diff --git a/src/plugins/intel_cpu/src/graph_optimizer.h b/src/plugins/intel_cpu/src/graph_optimizer.h index 886296a7c0053b..536ef468a09816 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.h +++ b/src/plugins/intel_cpu/src/graph_optimizer.h @@ -20,7 +20,6 @@ class GraphOptimizer { private: void FuseConvMatmulFCDeconvAndDQScales(Graph &graph); - void FuseFCAndWeightsDecompression(Graph &graph); void FuseConvolutionMatMulDeconvAndBias(Graph &graph); void FuseDeconvolutionAndSimpleOperation(Graph &graph); void FuseMultiplyAndAdd(Graph &graph); diff --git a/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h index 4b641669262591..1575841cb2be9e 100644 --- a/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h +++ b/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h @@ -59,7 +59,9 @@ class EmptyMemoryDesc : public MemoryDesc { } MemoryDescPtr cloneWithNewPrecision(const ov::element::Type prec) const override { - OPENVINO_THROW("Clone an empty memory desc with any precision (", prec, ") is prohibited"); + OPENVINO_ASSERT(prec == ov::element::undefined, + "Clone an empty memory desc with defined precision: ", prec, " is prohibited"); + return clone(); } private: diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index de5c53429138c4..ee0a99c3bba44e 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -6,6 +6,7 @@ #include "cpu_types.h" #include "edge.h" #include "partitioned_mem_blk.h" +#include "openvino/core/type/element_type.hpp" #include #include @@ -1673,7 +1674,7 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const { auto edge = getParentEdgeAt(port); if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) { auto&& mem = edge->getMemory(); - if (mem.isDefined()) { + if (mem.isDefined() && !mem.getDesc().empty()) { return mem.getShape().hasZeroDims(); } } diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp index cc42691950a3ff..9660178e1af4a4 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp @@ -11,6 +11,7 @@ #include "nodes/executors/executor.hpp" #include "nodes/executors/memory_arguments.hpp" #include "utils/debug_capabilities.h" +#include "utils/cpu_utils.hpp" #include "nodes/executors/debug_messages.hpp" #include "nodes/executors/implementation_utils.hpp" #include "nodes/convert.h" @@ -201,9 +202,22 @@ static MemoryPtr prepareWeightMemory(const MemoryArgs &memory, MemoryArgs memoryArgs; memoryArgs[ARG_BIAS] = memory.at(ARG_BIAS); memoryArgs[ARG_WEI] = memory.at(ARG_WEI); + + auto originalWeightsDesc = memory.at(ARG_WEI)->getDescPtr(); + + // normalize weights to 2D + const auto& wgtDims = originalWeightsDesc->getShape().getStaticDims(); + const VectorDims wgtDims2D = reshapeDownToRank<2>(wgtDims); + + originalWeightsDesc = std::make_shared(originalWeightsDesc->getPrecision(), Shape{wgtDims2D}); + + auto dnnlSrcDesc = MemoryDescUtils::convertToDnnlMemoryDesc(originalWeightsDesc); + auto dstDesc = originalWeightsDesc->cloneWithNewPrecision(aclfcAttrs.inputPrecision); + auto dnnlDstDesc = MemoryDescUtils::convertToDnnlMemoryDesc(dstDesc); + if (memory.at(ARG_SRC_0)->getShape().isDynamic()) { const auto& inShape = memory.at(ARG_SRC_0)->getShape(); - const auto& wShape = memory.at(ARG_WEI)->getShape(); + const auto& wShape = originalWeightsDesc->getShape(); const auto& inDymmyDims = makeDummyInputDims(inShape, wShape); const auto& outDymmyDims = makeDummyOutputDims(inDymmyDims, wShape.getStaticDims(), memory.at(ARG_DST)->getShape().getRank()); memoryArgs[ARG_SRC_0] = std::make_shared(context->getEngine(), @@ -214,6 +228,7 @@ static MemoryPtr prepareWeightMemory(const MemoryArgs &memory, memoryArgs[ARG_SRC_0] = memory.at(ARG_SRC_0); memoryArgs[ARG_DST] = memory.at(ARG_DST); } + // TODO: ACLWeightFormatGenerator should be replaced with Reorder executor // that calls ACL NEReorder + NETranspose or dnnl::reorder depending on backend availability auto aclWeightsRepack = std::make_shared(attrs, postOps, memoryArgs); @@ -221,13 +236,6 @@ static MemoryPtr prepareWeightMemory(const MemoryArgs &memory, expectedWeightFormat = isNeededReorder ? aclWeightsRepack->getOptImplWeightFormat() : arm_compute::WeightFormat::UNSPECIFIED; weiTensorInfo = aclWeightsRepack->getTensorInfo(ACLArgs::ACL_WEI); - MemoryPtr dstMemPtr = std::make_shared(context->getEngine(), - memory.at(ARG_WEI)->getDescPtr()->cloneWithNewPrecision(aclfcAttrs.inputPrecision)); - auto dstDesc = dstMemPtr->getDescPtr(); - auto dnnlDstDesc = MemoryDescUtils::convertToDnnlMemoryDesc(dstDesc); - auto weiDesc = memory.at(ARG_WEI)->getDescPtr(); - auto dnnlSrcDesc = MemoryDescUtils::convertToDnnlMemoryDesc(weiDesc); - if (isNeededReorder) { dnnl::impl::dim_t o_dim = 0; dnnl::impl::dim_t inner_dim = 1; diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp index 8f9d7ad0805e41..61aca683a37687 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp @@ -157,8 +157,7 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const ConvAttrs& attrs, one_of(srcDesc->getPrecision(), ov::element::u8, ov::element::i8) && weiDesc->getPrecision() == ov::element::i8; auto outputDataType = DnnlExtensionUtils::ElementTypeToDataType(dstDesc->getPrecision()); - DnnlPostOpsComposer - dnnlpoc(postOps, context->getEngine(), dims, 1, isINT8, 1 << 0, {}, attrs.withBias, outputDataType); + DnnlPostOpsComposer dnnlpoc(postOps, context->getEngine(), dims, 1, isINT8, 1 << 0, memory, outputDataType); return dnnlpoc.compose(); } diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp index fcb70d4753b2ce..780dbb6f2f3f11 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include "nodes/executors/executor.hpp" #include "nodes/executors/fullyconnected_config.hpp" #include "nodes/executors/memory_arguments.hpp" +#include "utils/cpu_utils.hpp" #include "utils/debug_capabilities.h" namespace ov { @@ -115,9 +117,10 @@ DnnlMemoryDescPtr DnnlFCPrimitive::makeTransposedWeightDescriptor(const DnnlMemo return srcDesc; const auto& weiDesc = srcDesc->getDnnlDesc(); - const auto reorderedWeiDesc = - dnnl::memory::desc{weiDesc.get_dims(), weiDesc.get_data_type(), dnnl::memory::format_tag::ba}; - const auto transposedWeiDesc = reorderedWeiDesc.reshape(dstDesc->getDnnlDesc().get_dims()); + auto wDims = weiDesc.get_dims(); + dnnl::memory::dims wDims2D = reshapeDownToRank<2>(wDims); + + const auto transposedWeiDesc = dnnl::memory::desc{wDims2D, weiDesc.get_data_type(), dnnl::memory::format_tag::ba}; return DnnlExtensionUtils::makeDescriptor(transposedWeiDesc); } @@ -140,12 +143,11 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT return false; } -bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, - const MemoryDescPtr srcDesc, - const MemoryDescPtr weightsDesc, - MemoryCPtr scalesPtr, - MemoryCPtr zpPtr, - bool needTranspose) { +static bool useDynamicQuantizationImpl(size_t dqGroupSize, + const MemoryDescPtr srcDesc, + const MemoryDescPtr weightsDesc, + const MemoryArgs& memory, + bool needTranspose) { if (dqGroupSize == 0) return false; @@ -155,6 +157,8 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, if (srcDesc->getPrecision() != ov::element::f32) return false; + + MemoryCPtr zpPtr = memory.count(ARG_WEI | ARG_ATTR_ZERO_POINTS) ? memory.at(ARG_WEI | ARG_ATTR_ZERO_POINTS) : nullptr; // For dynamic quantization, VNNI accumulation requires weight to be unsigned. // To support dynamic quantization with weights symmetrically quantized as i8/i4 // w/o zero-point, we will transform weight to u8/u4 weight with zp 128/8. @@ -177,11 +181,15 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, if (weightsDesc->getPrecision() == ov::element::u4) { int ic = weightsDesc->getShape().getStaticDims()[1]; int minGroupSize = INT_MAX; + + MemoryCPtr scalesPtr = memory.count(ARG_WEI | ARG_ATTR_SCALES) ? memory.at(ARG_WEI | ARG_ATTR_SCALES) : nullptr; + if (scalesPtr && scalesPtr->getShape().getRank() == 3) { auto scalesDims = scalesPtr->getShape().getStaticDims(); auto groupsNum = needTranspose ? scalesDims[1] : scalesDims[0]; minGroupSize = ic / groupsNum; } + if (zpPtr && zpPtr->getShape().getRank() == 3) { auto zpDims = zpPtr->getShape().getStaticDims(); int groupsNum = needTranspose ? zpDims[1] : zpDims[0]; @@ -196,11 +204,6 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, return true; } -template -static std::vector normalizeDimsTo2D(const std::vector& dims) { - return {std::accumulate(dims.begin(), dims.end() - 1, (T)1, std::multiplies()), dims[dims.size() - 1]}; -} - static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs, const PostOps& postOps, const MemoryArgs& memory, @@ -211,7 +214,7 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs, const auto& dstDesc = memory.at(ARG_DST)->getDescPtr(); const auto& originalDims = dstDesc->getShape().getMinDims(); - const auto& dims = normalizeDimsTo2D(originalDims); + const auto& dims = reshapeDownToRank<2>(originalDims); auto isINT8 = one_of(srcDesc->getPrecision(), ov::element::u8, ov::element::i8) && weiDesc->getPrecision() == ov::element::i8; @@ -223,21 +226,22 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs, dims.size() - 1, isINT8, 1 << 0, - attrs.dequantizationScales, - !memory.at(ARG_BIAS)->getDesc().empty(), + memory, outputDataType); - if (attrs.decompressionMultiplyPtr) { - auto dstPrc = attrs.decompressionMultiplyPtr->getPrecision(); + if (memory.count(ARG_WEI | ARG_ATTR_SCALES)) { + auto dstPrc = memory.at(ARG_WEI | ARG_ATTR_SCALES)->getPrecision(); if (dstPrc != f8e8m0 || useDynamicQuantization) dstPrc = ov::element::f32; - dnnlpoc.appendDecompressionScales(attrs.decompressionMultiplyPtr, !attrs.weightsNonTransposed, dstPrc); + dnnlpoc.appendDecompressionScales(memory.at(ARG_WEI | ARG_ATTR_SCALES), !attrs.weightsNonTransposed, dstPrc); } - if (attrs.decompressionSubtractPtr) { + + if (memory.count(ARG_WEI | ARG_ATTR_ZERO_POINTS)) { auto dstPrc = useDynamicQuantization ? ov::element::u8 : ov::element::f32; - dnnlpoc.appendDecompressionZeroPoints(attrs.decompressionSubtractPtr, !attrs.weightsNonTransposed, dstPrc); + dnnlpoc.appendDecompressionZeroPoints(memory.at(ARG_WEI | ARG_ATTR_ZERO_POINTS), !attrs.weightsNonTransposed, dstPrc); } + if (useDynamicQuantization) { auto wei_precision = weiDesc->getPrecision(); bool is_symmetric_weights = (wei_precision == ov::element::i8) || (wei_precision == ov::element::i4); @@ -261,7 +265,7 @@ static dnnl::memory::desc normalizeDescriptor(const dnnl::memory::desc& desc) { const auto& dims = desc.get_dims(); if (dims.size() > 2) - return desc.reshape(normalizeDimsTo2D(dims)); + return desc.reshape(reshapeDownToRank<2>(dims)); return desc; } @@ -276,12 +280,13 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons const bool useWeightsDecompression) { const auto normalizedInputDesc = normalizeDescriptor(inputDesc); const auto normalizedOutputDesc = normalizeDescriptor(outputDesc); + const auto normalizedWeightDesc = normalizeDescriptor(weightDesc); const auto indt = normalizedInputDesc.get_data_type(); auto wdt = indt; if (useWeightsDecompression) { - wdt = weightDesc.get_data_type(); + wdt = normalizedWeightDesc.get_data_type(); // dynamic quantization with symmetric quantized weights needs unsigned weights uint64_t dynQuantGroupSize = 0; @@ -297,8 +302,8 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons } const dnnl::memory::desc weightsDesc = - useSparseWeights ? dnnl::memory::desc().sparse_desc(weightDesc.get_dims(), wdt) - : dnnl::memory::desc(weightDesc.get_dims(), wdt, memory::format_tag::any); + useSparseWeights ? dnnl::memory::desc().sparse_desc(normalizedWeightDesc.get_dims(), wdt) + : dnnl::memory::desc(normalizedWeightDesc.get_dims(), wdt, memory::format_tag::any); return dnnl::inner_product_forward::primitive_desc(engine, dnnl::prop_kind::forward_inference, @@ -387,8 +392,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs& useWeightsDecompression && useDynamicQuantizationImpl(attrs.dynamicQuantizationGroupSize, srcDesc, weiDesc, - attrs.decompressionMultiplyPtr, - attrs.decompressionSubtractPtr, + memory, !attrs.weightsNonTransposed); const auto postOpData = createPrimitiveAttrs(attrs, postOps, memory, context, useDynamicQuantization); diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp index 5295b9655066cc..21247f149ca69f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp @@ -75,13 +75,6 @@ class DnnlFCPrimitive { const DnnlShapeAgnosticDataPtr& shapeAgnosticData); private: - static bool useDynamicQuantizationImpl(size_t dqGroupSize, - const MemoryDescPtr srcDesc, - const MemoryDescPtr weightsDesc, - MemoryCPtr scalesPtr, - MemoryCPtr zpPtr, - bool needTranspose); - dnnl::stream m_stream; dnnl::primitive_desc m_primDesc; impl_desc_type m_implType; diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp index 1b8646c858e532..40c365ee5f4da5 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp @@ -27,6 +27,7 @@ #include "nodes/executors/fullyconnected_config.hpp" #include "nodes/executors/matmul_config.hpp" #include "nodes/executors/memory_arguments.hpp" +#include "utils/cpu_utils.hpp" #include "utils/debug_capabilities.h" namespace ov { @@ -104,10 +105,10 @@ DnnlMemoryDescPtr DnnlMatMulPrimitive::makeTransposedWeightDescriptor(const Dnnl const auto& weiDesc = srcDesc->getDnnlDesc(); auto wDims = weiDesc.get_dims(); auto wDataType = weiDesc.get_data_type(); - std::swap(wDims[wDims.size() - 1], wDims[wDims.size() - 2]); + dnnl::memory::dims wDims2D = reshapeDownToRank<2>(wDims); const auto format = weightsNonTransposed ? dnnl::memory::format_tag::ab : dnnl::memory::format_tag::ba; - const auto transposedWeiDesc = dnnl::memory::desc{wDims, wDataType, format}; + const auto transposedWeiDesc = dnnl::memory::desc{wDims2D, wDataType, format}; return DnnlExtensionUtils::makeDescriptor(transposedWeiDesc); } @@ -134,8 +135,7 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const MatMulAttrs& attrs, dims.size() - 1, isINT8, 1 << 0, - attrs.dequantizationScales, - !memory.at(ARG_BIAS)->getDesc().empty(), + memory, outputDataType); return dnnlpoc.compose(); @@ -262,7 +262,7 @@ DnnlShapeAgnosticDataPtr DnnlMatMulPrimitive::createShapeAgnosticData(const FCAt const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr(); const auto& biasDesc = memory.at(ARG_BIAS)->getDescPtr(); auto dstDesc = memory.at(ARG_DST)->getDescPtr(); - MatMulAttrs mmAttrs{false, false, attrs.dequantizationScales}; + MatMulAttrs mmAttrs{false, false}; const auto postOpData = createPrimitiveAttrs(mmAttrs, postOps, memory, context, false); diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp index 09b3b33cfe6b2f..d08c4ad8127325 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp @@ -6,7 +6,6 @@ #include "post_ops.hpp" #include "memory_arguments.hpp" -#include "printers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp index f12795d5d1eb16..dd05cc58d43c32 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp @@ -19,7 +19,6 @@ namespace ov { namespace intel_cpu { -using namespace executor; template class ExecutorFactory { diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp index ad6479597c6971..1699a845a3314b 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp @@ -19,13 +19,8 @@ struct FCAttrs { bool withBias = false; bool weightsNonTransposed = false; bool sparseWeights = false; - // @todo only memory descriptors should be a part of attributes - // actual memory should be passed into "execute" or "prepareMemory" calls - std::vector dequantizationScales; - // @todo should be passed as an additional memory input? - MemoryCPtr decompressionSubtractPtr; - MemoryCPtr decompressionMultiplyPtr; uint64_t dynamicQuantizationGroupSize; + ov::intel_cpu::Config::ModelType modelType = ov::intel_cpu::Config::ModelType::Unknown; }; diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index 4cf6992985ecd3..10f472ddcd7283 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -441,8 +441,7 @@ const std::vector>& getImplementations() { const ExecutorContext::CPtr context, std::shared_ptr shareAgnosticData) const { MatMulAttrs matMulAttrs{false, - false, - attrs.dequantizationScales}; + false}; auto primitive = DefaultInstantiator{}( memory, diff --git a/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp index 9e484b24a2940e..e42bf3138bce91 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp @@ -12,7 +12,6 @@ namespace intel_cpu { struct MatMulAttrs { bool transposeA; bool transposeB; - std::vector dequantizationScales; }; using MatMulConfig = executor::Config; diff --git a/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp b/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp index c04ca39e845ee1..7150226d27c601 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp @@ -14,7 +14,7 @@ namespace intel_cpu { using MemoryDescArgs = std::unordered_map; using MemoryArgs = std::unordered_map; -// @todo add more options +// basic inputs #define ARG_SRC_0 1 #define ARG_SRC ARG_SRC_0 #define ARG_SRC_1 2 @@ -24,6 +24,12 @@ using MemoryArgs = std::unordered_map; #define ARG_WEI_0 33 #define ARG_WEI ARG_WEI_0 #define ARG_BIAS 41 +// legacy dequantization scale +#define ARG_DST_DEQ_SCALE 53 +// scaling factors provided at execution time +#define ARG_ATTR_SCALES 4096 +// zero points provided at execution time +#define ARG_ATTR_ZERO_POINTS 8192 } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp index a03bfe2649413a..8fd945b773f262 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp @@ -23,6 +23,10 @@ using namespace executor; using namespace dnnl; using namespace ov::element; +static Dim batchDim(const VectorDims& dims) { + return std::accumulate(dims.begin(), dims.end() - 1, 1, std::multiplies()); +} + static MemoryPtr prepareWeightMemory(const MemoryPtr weightsMemory, const ExecutorContext::CPtr context, const bool weightsTransposed) { @@ -31,14 +35,15 @@ static MemoryPtr prepareWeightMemory(const MemoryPtr weightsMemory, // Weights are transposed by MatMulConstTransposesExtraction // K is the IC of weight // the weight is reshaped to [-1, K] in ConvertMatMulToFC - const auto K = wgtDims[1]; - const auto N = wgtDims[0]; + Dim K = wgtDims.back(); + Dim N = batchDim(wgtDims); auto packedBsize = mlas_sgemm_pack_get_size(N, K); auto create = [&]() { float* weightPtr = weightsMemory->getDataAs(); size_t ldb = weightsTransposed ? K : N; + MemoryPtr _ptr = std::make_shared(context->getEngine(), intel_cpu::CpuBlockedMemoryDesc(i8, intel_cpu::Shape{packedBsize})); float* prepackedDst = _ptr->getDataAs(); @@ -66,21 +71,10 @@ bool MlasGemmExecutor::supports(const FCConfig& config) { DEBUG_LOG("MlasGemmExecutor: PostOps are not supported"); return false; } - const auto& weiDesc = config.descs.at(ARG_WEI); - const auto& dstDesc = config.descs.at(ARG_DST); - // MLAS cannot support weight dims > 2, e.g. [1,64,9,9] * [10,64,9,9] - const auto& weightsDims = weiDesc->getShape().getStaticDims(); - if (weightsDims.size() > 2) { - if (!std::all_of(weightsDims.begin() + 2, weightsDims.end(), [](const Dim dim) { - return dim == 1; - })) { - DEBUG_LOG("MlasGemmExecutor: weights dims > 2 are not supported"); - return false; - } - } + const auto& dstDesc = config.descs.at(ARG_DST); - if (config.attrs.withBias) { + if (!config.descs.at(ARG_BIAS)->empty()) { const auto& biaDesc = config.descs.at(ARG_BIAS); const auto& biasDims = biaDesc->getShape().getStaticDims(); const auto& outDims = dstDesc->getShape().getDims(); @@ -108,24 +102,17 @@ MlasGemmExecutor::MlasGemmExecutor(const FCAttrs& attrs, const ExecutorContext::CPtr context) : m_attrs(attrs), m_memoryArgs(memory), - packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context, !attrs.weightsNonTransposed)) {} + packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context, !attrs.weightsNonTransposed)), + N(batchDim(memory.at(ARG_WEI)->getStaticDims())), + K(memory.at(ARG_WEI)->getStaticDims().back()) +{} bool MlasGemmExecutor::update(const MemoryArgs& memory) { - const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr(); const auto& dstDesc = memory.at(ARG_DST)->getDescPtr(); - const auto& wgtDims = weiDesc->getShape().getStaticDims(); - // Weights are transposed by MatMulConstTransposesExtraction - // K is the IC of weight - // the weight is reshaped to [-1, K] in ConvertMatMulToFC - K = wgtDims[1]; - N = wgtDims[0]; const auto& outDims = dstDesc->getShape().getStaticDims(); - if (outDims.size() > 2) { - M = std::accumulate(outDims.begin(), outDims.end() - 1, 1, std::multiplies()); - } else { - M = outDims[0]; - } + M = outDims.size() > 2 ? batchDim(outDims) : outDims[0]; + return true; } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 31ae4f26cc08a1..0f5c46e8bcd7cd 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -10,6 +10,7 @@ #include "common/cpu_convert.h" #include "common/cpu_memcpy.h" +#include "cpu_types.h" #include "dnnl_extension_utils.h" #include "executors/memory_arguments.hpp" #include "graph_context.h" @@ -19,11 +20,16 @@ #include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/executors/executor.hpp" #include "nodes/executors/fullyconnected_config.hpp" +#include "openvino/core/type.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/runtime/threading/cpu_message.hpp" +#include "ov_ops/fully_connected.hpp" +#include "ov_ops/fully_connected_quantized.hpp" +#include "ov_ops/fully_connected_quantized_legacy.hpp" +#include "ov_ops/fully_connected_compressed.hpp" #include "post_ops.hpp" #include "shape_inference/custom/fullyconnected.hpp" -#include "transformations/cpu_opset/common/op/fully_connected.hpp" +#include "transformations/utils/utils.hpp" #include "utils/debug_capabilities.h" #include "utils/general_utils.h" @@ -39,25 +45,76 @@ namespace node { bool FullyConnected::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - const auto fc = std::dynamic_pointer_cast(op); - if (!fc) { - errorMessage = "Only legacy FullyConnected operation is supported"; + if (!ov::is_type(op) && + !ov::is_type(op) && + !ov::is_type(op)) { return false; } - if (fc->get_input_size() == 3 && - std::dynamic_pointer_cast(fc->get_input_node_shared_ptr(BIAS_ID)) == nullptr) { - errorMessage = "Only Constant operation on 'bias' input is supported"; + + if (ov::is_type(op)) { + if (!ov::op::util::is_on_constant_path(op->input_value(BIAS))) { + errorMessage = "Only Constant operation on 'bias' input is supported"; + return false; + } + } + + if (ov::is_type(op)) { + if (!ov::op::util::is_on_constant_path(op->input_value(WEIGHT_SCALES)) || + !ov::op::util::is_on_constant_path(op->input_value(WEIGHT_ZERO_POINTS))) { + errorMessage = "Only Constant operation on 'weight scales', and 'weight zero points' inputs is supported"; + return false; + } + } + } catch (...) { + return false; + } + + return true; +} + +// @todo replace 'inferencePrecision' check with 'fc->get_input_element_type(0) == ov::element::bf16' +// after bf16 pipeline is moved to ConvertPrecision +bool FullyConnected::isSupportedCompressedOperation(const std::shared_ptr& op, + size_t IC, + size_t OC, + size_t G, + ov::element::Type inferencePrecision) noexcept { +#if defined(OPENVINO_ARCH_X86_64) + try { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) return false; + + if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) + return false; + + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && + inferencePrecision == ov::element::bf16) { + // OneDNN AMX IP implementation has limited shapes support due to performance considerations. As a + // current solution conditions below are copied from OneDNN to make sure correct IP impl will be + // used since fallback one doesn't support weights decompression feature. + size_t simdWidth = 16; + size_t vnniFactor = 2; + size_t maxSize = 512; + auto amxRow = vnniFactor * simdWidth; + + if ((IC <= amxRow && OC <= amxRow) || (IC <= maxSize && OC <= maxSize && IC % amxRow != 0)) { + return false; + } } - const auto weightRank = fc->get_input_partial_shape(WEIGHTS_ID).size(); - if (weightRank != 2) { - errorMessage = "Doesn't support 'weight' input with rank: " + std::to_string(weightRank); + + if (IC % G != 0 || IC / G < 4 || OC == 1) { return false; } + + return true; } catch (...) { return false; } return true; +#else + return false; +#endif } void FullyConnected::initTensorParallelConfig(const GraphContext::CPtr context) { @@ -79,6 +136,31 @@ FullyConnected::FullyConnected(const std::shared_ptr& op, const GraphC initTensorParallelConfig(context); if (!isSupportedOperation(op, errorMessage)) OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage); + + m_atoi[ARG_SRC] = DATA; + m_atoi[ARG_WEI] = WEIGHTS; + m_atoi[ARG_BIAS] = BIAS; + + auto mapArgToInput = [&op](std::unordered_map& argToInput, size_t argId, size_t inputId) { + if (op->get_input_size() > inputId && + op->input(inputId).get_element_type() != ov::element::undefined) { + argToInput[argId] = inputId; + } + }; + + if (ov::is_type(op)) { + mapArgToInput(m_atoi, ARG_WEI | ARG_ATTR_SCALES, WEIGHT_SCALES); + mapArgToInput(m_atoi, ARG_WEI | ARG_ATTR_ZERO_POINTS, WEIGHT_ZERO_POINTS); + algorithm = Algorithm::FullyConnectedCompressed; + } else if (ov::is_type(op)) { + mapArgToInput(m_atoi, ARG_DST_DEQ_SCALE, 3); + algorithm = Algorithm::FullyConnectedQuantizedLegacy; + } else if (ov::is_type(op)) { + algorithm = Algorithm::FullyConnectedQuantized; + OPENVINO_THROW_NOT_IMPLEMENTED("FullyConnectedQuantized is not implemented yet"); + } else { + algorithm = Algorithm::FullyConnectedCommon; + } } bool FullyConnected::canBeExecutedInInt8() const { @@ -220,6 +302,7 @@ void FullyConnected::execTensorParallelSync() { } } } + void FullyConnected::execute(dnnl::stream strm) { initTensorParallelSync(); @@ -366,31 +449,11 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput, return sparseRate >= minSparseRate; } -void FullyConnected::needUpdateDQScaleForTensorParallel(std::vector& dequantizationScales) { - if (tp_cfg.enable_tensor_parallel) { - auto split_parts = [](int len, int n) { - int average = len / n; - std::vector parts(n, average); - parts.back() = len - average * (n - 1); - return parts; - }; - auto DQScales = getDQScales(); - auto split_lens = split_parts(DQScales.size(), tp_cfg.w_size); - auto split_offset = tp_cfg.w_rank * split_lens[0]; - std::vector newDQScales(split_lens[tp_cfg.w_rank]); - std::copy(DQScales.begin() + split_offset, DQScales.begin() + split_offset + split_lens[tp_cfg.w_rank], newDQScales.begin()); - dequantizationScales = std::move(newDQScales); - } -} - void FullyConnected::initSupportedPrimitiveDescriptors() { - attrs.withBias = getOriginalInputsNumber() == 3; - - attrs.dequantizationScales = getDQScales(); - needUpdateDQScaleForTensorParallel(attrs.dequantizationScales); + attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::undefined; - attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS_ID)->getParent(), - getOriginalInputPrecisionAtPort(DATA_ID), + attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(), + getOriginalInputPrecisionAtPort(DATA), context->getConfig().fcSparseWeiDecompressionRate); attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize; attrs.modelType = context->getConfig().modelType; @@ -406,6 +469,10 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { VecMemoryDescs srcDescs; const auto& creatorsMap = BlockedDescCreator::getCommonCreators(); for (size_t i = 0; i < srcTypes.size(); i++) { + if (srcTypes[i] == element::undefined) { + srcDescs.push_back(MemoryDescUtils::makeEmptyDesc()); + continue; + } const auto srcDesc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(srcTypes[i], getInputShapeAtPort(i)); srcDescs.push_back(srcDesc); } @@ -417,23 +484,31 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { } MemoryDescArgs descs{ - {ARG_SRC, srcDescs[0]}, - {ARG_WEI, srcDescs[1]}, - {ARG_BIAS, attrs.withBias ? srcDescs[2] : MemoryDescUtils::makeEmptyDesc()}, + {ARG_SRC, srcDescs[DATA]}, + {ARG_WEI, srcDescs[WEIGHTS]}, + {ARG_BIAS, srcDescs[BIAS]}, {ARG_DST, dstDescs[0]}, }; - needUpdateScaleForTensorParallel(); - needUpdateZeroPointForTensorParallel(); - auto executionContext = std::make_shared(context, getImplPriority(), privateWeightCache); factory = std::make_shared>(attrs, postOps, executionContext, descs); const auto nodeDescriptors = factory->getProperMemoryDescriptors(descs); NodeConfig nodeConfig; - nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_SRC)); - nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_WEI)); - if (attrs.withBias) nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_BIAS)); + nodeConfig.inConfs.resize(srcDescs.size()); + + for (const auto& desc : nodeDescriptors) { + if (m_atoi.count(desc.first)) { + nodeConfig.inConfs[m_atoi[desc.first]] = desc.second; + } + } + + // add extra inputs bypassing proper memory descriptors + // @todo pass all the input descriptors to getProperMemoryDescriptors and allow + // to ignore extra input descriptors if necessery + for (size_t i = 3; i < srcDescs.size(); i++) { + nodeConfig.inConfs[i] = srcDescs[i]; + } const int inPlace = canBeInPlace() ? 0 : -1; nodeConfig.outConfs.emplace_back(nodeDescriptors.at(ARG_DST), BlockedMemoryDesc::FULL_MASK, inPlace); @@ -443,11 +518,11 @@ void FullyConnected::initSupportedPrimitiveDescriptors() { void FullyConnected::needSplitMemoryForTensorParallel() { if (tp_cfg.enable_tensor_parallel) { - auto src = getSrcMemoryAtPort(DATA_ID); - auto wgt = getSrcMemoryAtPort(WEIGHTS_ID); + auto src = getSrcMemoryAtPort(DATA); + auto wgt = getSrcMemoryAtPort(WEIGHTS); auto dst = getDstMemoryAtPort(0); // src - memory[ARG_SRC] = getSrcMemoryAtPort(DATA_ID); + memory[ARG_SRC] = getSrcMemoryAtPort(DATA); // wgt // split N direction tp_cfg.cached_splited_weight = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), std::move(wgt), 0, tp_cfg.w_rank, tp_cfg.w_size) @@ -455,7 +530,7 @@ void FullyConnected::needSplitMemoryForTensorParallel() { memory[ARG_WEI] = tp_cfg.cached_splited_weight; // bias if (attrs.withBias) { - auto bias = getSrcMemoryAtPort(BIAS_ID); + auto bias = getSrcMemoryAtPort(BIAS); auto select_bias = split_horizontal(context->getEngine(), std::move(bias), 0, tp_cfg.w_rank, tp_cfg.w_size); tp_cfg.cached_splited_bias = std::move(select_bias); } else { @@ -465,6 +540,21 @@ void FullyConnected::needSplitMemoryForTensorParallel() { // dst memory[ARG_DST] = getDstMemoryAtPort(0); tp_cfg.cached_dst = split_horizontal(context->getEngine(), std::move(dst), -1, tp_cfg.w_rank, tp_cfg.w_size, false); + + memory[ARG_DST | ARG_ATTR_SCALES] = split_horizontal(context->getEngine(), memory[ARG_DST | ARG_ATTR_SCALES], 0, tp_cfg.w_rank, tp_cfg.w_size); + + auto scale_mem = std::const_pointer_cast(memory[ARG_WEI | ARG_ATTR_SCALES]); + memory[ARG_WEI | ARG_ATTR_SCALES] = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size) + : split_horizontal(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size); + + auto zeropoint_mem = std::const_pointer_cast(memory[ARG_WEI | ARG_ATTR_ZERO_POINTS]); + auto element_num = zeropoint_mem->getSize() / zeropoint_mem->getPrecision().size(); + if (element_num == 1) { + tp_cfg.cached_zeropoint = zeropoint_mem; + } else { + tp_cfg.cached_zeropoint = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size) + : split_horizontal(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size); + } } } @@ -473,7 +563,7 @@ void FullyConnected::needUpdateTensorParalelConfig() { // 1. weight shape is dynamic // 2. last dim can be splited. if (tp_cfg.enable_tensor_parallel) { - auto& shape = getSrcMemoryAtPort(WEIGHTS_ID)->getShape(); + auto& shape = getSrcMemoryAtPort(WEIGHTS)->getShape(); if (shape.isDynamic()) { tp_cfg.enable_tensor_parallel = false; } else if (shape.getDims()[0] < static_cast(tp_cfg.w_size)) { @@ -481,12 +571,16 @@ void FullyConnected::needUpdateTensorParalelConfig() { } } } + void FullyConnected::createPrimitive() { needUpdateTensorParalelConfig(); - memory[ARG_SRC] = getSrcMemoryAtPort(DATA_ID); - memory[ARG_WEI] = getSrcMemoryAtPort(WEIGHTS_ID); - memory[ARG_BIAS] = attrs.withBias ? getSrcMemoryAtPort(BIAS_ID) : MemoryDescUtils::makeEmptyMemory(context); + for (const auto& entry : m_atoi) { + const auto argumentId = entry.first; + const auto inputId = entry.second; + memory[argumentId] = getSrcMemoryAtPort(inputId); + } + memory[ARG_DST] = getDstMemoryAtPort(0); needSplitMemoryForTensorParallel(); @@ -513,49 +607,6 @@ ov::element::Type FullyConnected::getRuntimePrecision() const { return getMaxPrecision(srcTypes); } -void FullyConnected::needUpdateScaleForTensorParallel() { - if (tp_cfg.enable_tensor_parallel && tp_cfg.cached_scale) { - attrs.decompressionMultiplyPtr = tp_cfg.cached_scale; - } -} - -void FullyConnected::needSplitScaleForTensorParallel(const MemoryCPtr& memory) { - if (tp_cfg.enable_tensor_parallel && !tp_cfg.cached_scale) { - auto scale_mem = std::const_pointer_cast(memory); - tp_cfg.cached_scale = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), std::move(scale_mem), 0, tp_cfg.w_rank, tp_cfg.w_size) - : split_horizontal(context->getEngine(), std::move(scale_mem), 0, tp_cfg.w_rank, tp_cfg.w_size); - } -} - -void FullyConnected::needUpdateZeroPointForTensorParallel() { - if (tp_cfg.enable_tensor_parallel && tp_cfg.cached_zeropoint) { - attrs.decompressionSubtractPtr = tp_cfg.cached_zeropoint; - } -} - -void FullyConnected::needSplitZeroPointForTensorParallel(const MemoryCPtr& memory) { - if (tp_cfg.enable_tensor_parallel && !tp_cfg.cached_zeropoint) { - auto zeropoint_mem = std::const_pointer_cast(memory); - auto element_num = memory->getSize() / memory->getPrecision().size(); - if (element_num == 1) { - tp_cfg.cached_zeropoint = std::move(zeropoint_mem); - } else { - tp_cfg.cached_zeropoint = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size) - : split_horizontal(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size); - } - } -} - -void FullyConnected::fuseDecompressionMultiply(const MemoryCPtr& memory) { - attrs.decompressionMultiplyPtr = memory; - needSplitScaleForTensorParallel(memory); -} - -void FullyConnected::fuseDecompressionSubtract(const MemoryCPtr& memory) { - attrs.decompressionSubtractPtr = memory; - needSplitZeroPointForTensorParallel(memory); -} - } // namespace node } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 8c17228e365af4..177edd3d426339 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -6,9 +6,11 @@ #include +#include #include #include #include +#include #include #include "cpu_memory.h" @@ -65,6 +67,15 @@ class FullyConnected : public Node { bool canFuse(const NodePtr& node) const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedCompressedOperation(const std::shared_ptr& op, + size_t IC, + size_t OC, + size_t G, + ov::element::Type inferencePrecision) noexcept; + + bool isExecutable() const override { + return !isInputTensorAtPortEmpty(0); + } void prepareParams() override; void executeDynamicImpl(dnnl::stream strm) override; @@ -80,9 +91,21 @@ class FullyConnected : public Node { void toNumaNodeImpl(int numaID) override; private: - static const size_t DATA_ID = 0; - static const size_t WEIGHTS_ID = 1; - static const size_t BIAS_ID = 2; + enum InputId : size_t { + DATA = 0, + WEIGHTS, + BIAS, + WEIGHT_SCALES, + WEIGHT_ZERO_POINTS, + INPUT_SCALES, + INPUT_ZERO_POINTS, + OUTPUT_SCALES, + OUTPUT_ZERO_POINTS, + }; + + static bool isConstantInput(const std::shared_ptr& op, InputId port); + + std::unordered_map m_atoi; // memory argument id to input id void fuseDecompressionConstant(const MemoryCPtr& memory, MemoryCPtr& decompressionValuesPtr); @@ -92,11 +115,6 @@ class FullyConnected : public Node { void initTensorParallelSync(); void execTensorParallelSync(); void needSplitMemoryForTensorParallel(); - void needSplitScaleForTensorParallel(const MemoryCPtr& memory); - void needUpdateScaleForTensorParallel(); - void needSplitZeroPointForTensorParallel(const MemoryCPtr& memory); - void needUpdateZeroPointForTensorParallel(); - void needUpdateDQScaleForTensorParallel(std::vector& dequantizationScales); FCAttrs attrs; PostOps postOps; diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 1f650bd8c5de17..4ccdc87ada25f1 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -7,7 +7,10 @@ #include "cpu/x64/jit_generator.hpp" #include "nodes/node_config.h" #include "openvino/core/parallel.hpp" +#include "openvino/core/shape.hpp" +#include "openvino/core/type/element_type.hpp" #include "shape_inference/shape_inference_pass_through.hpp" +#include "memory_desc/cpu_memory_desc_utils.h" using namespace dnnl; using namespace dnnl::impl::cpu::x64; @@ -228,9 +231,9 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte op->get_type_name(), " with name ", op->get_friendly_name()); - constOp = ov::as_type_ptr(op); - if (constOp) { + if (auto constOp = ov::as_type_ptr(op)) { constant = ConstantType::Const; + m_constOp = constOp; cloneBlobIfRequired(); } else { constant = ConstantType::StrictNoConst; @@ -238,8 +241,14 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte } void Input::cloneBlobIfRequired() { - Shape shape(constOp->get_shape().empty() ? ov::Shape(1, 1) : constOp->get_shape()); - const auto prec = constOp->get_element_type(); + const auto prec = m_constOp->get_element_type(); + + if (prec == ov::element::undefined && shape_size(m_constOp->get_shape()) == 0) { + memoryPtr = MemoryDescUtils::makeEmptyMemory(context); + return; + } + + Shape shape(m_constOp->get_shape().empty() ? ov::Shape(1, 1) : m_constOp->get_shape()); const size_t size = shape.getElementsCount(); CpuBlockedMemoryDesc memDesc(prec, shape); @@ -258,21 +267,21 @@ void Input::cloneBlobIfRequired() { // oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...) // but ngraph Constant uses actual bitWidth for data storage allocation // in that case we make a copy to avoid overflow - if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { - if (constOp->get_element_type() == element::string) { - memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); + if (m_constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { + if (m_constOp->get_element_type() == element::string) { + memory = std::make_shared(getEngine(), memDesc, m_constOp->get_data_ptr()); } else { - memory = std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()); + memory = std::make_shared(getEngine(), memDesc, m_constOp->get_data_ptr()); } } else { - if (constOp->get_element_type() == element::string) { + if (m_constOp->get_element_type() == element::string) { memory = std::make_shared(getEngine(), memDesc); - auto src = constOp->get_data_ptr(); + auto src = m_constOp->get_data_ptr(); auto dst = memory->getDataAs(); std::copy(src, src + size, dst); } else { memory = std::make_shared(getEngine(), memDesc); - memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size()); + memcpy(memory->getData(), m_constOp->get_data_ptr(), m_constOp->get_byte_size()); } } @@ -287,22 +296,22 @@ void Input::cloneBlobIfRequired() { return ptr; }; - auto isBlobAligned = [&] () { - bool blobAlignedOnSSE = true; + auto isBlobAligned = [] (const std::shared_ptr& constant) { #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) // Majority of arithmetic and data processing instructions in legacy SSE isa requires // the memory address in the operands must be aligned on 16-byte boundary. To ensure // safely reusing ngraph const blob memory, need to check address alignment. - const void *ptr = constOp->get_data_ptr(); - blobAlignedOnSSE = mayiuse(cpu_isa_t::avx2) || ((reinterpret_cast(ptr) & 15) == 0); + const void *ptr = constant->get_data_ptr(); + return mayiuse(cpu_isa_t::avx2) || ((reinterpret_cast(ptr) & 15) == 0); +#else + return true; #endif - return blobAlignedOnSSE; }; // The presence of subnormals is better to determined at IR read time. auto hasSubnormals = [&] () { if (prec == ov::element::f32) { - uint32_t const *u32data = constOp->get_data_ptr(); + uint32_t const *u32data = m_constOp->get_data_ptr(); if (!size) return false; @@ -345,7 +354,7 @@ void Input::cloneBlobIfRequired() { auto blobKey = [&] () { char ptr[32]; - snprintf(ptr, sizeof ptr, "%p", constOp->get_data_ptr()); + snprintf(ptr, sizeof ptr, "%p", m_constOp->get_data_ptr()); return getName() + "_" + std::to_string(size * prec.size()) + "_" + ptr; @@ -356,12 +365,13 @@ void Input::cloneBlobIfRequired() { prec != element::string && // IRs already have all subnormals flushed to zero, but in // read_model scenario with directly loaded original model still can have subnormals - isBlobAligned() && (!needFlushDenormalsToZero || !hasSubnormals()) && + isBlobAligned(m_constOp) && (!needFlushDenormalsToZero || !hasSubnormals()) && // Blob should be cloned in cache only if original weights are stored on other numa node. // This is possible only in multistream case on multisocket machine. // TODO: don't clone blob for multisocket + multistream case if current stream is run on the numa node where original weights are stored. (!weightCache || context->getNumNumaNodes() == 1 || context->getCPUStreamExecutor()->get_streams_num() == 1); - memoryPtr = clone_is_not_needed ? std::make_shared(getEngine(), memDesc, constOp->get_data_ptr()) + + memoryPtr = clone_is_not_needed ? std::make_shared(getEngine(), memDesc, m_constOp->get_data_ptr()) : std::const_pointer_cast( weightCache ? *weightCache->findOrCreate(blobKey(), cloneBlob) : cloneBlob()); } diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index 4d7febb17ad4b7..e659ea2359aabd 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -75,7 +75,7 @@ class Input : public Node { void initSupportedPdFromMemDesc(); private: - std::shared_ptr constOp; + std::shared_ptr m_constOp; MemoryCPtr memoryPtr; bool isMeanImage = false; MemoryDescPtr extMemDesc = nullptr; diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index 5dc7c8818dd52b..b84836c869deb3 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -29,7 +29,7 @@ Reference::Reference(const std::shared_ptr& op, : Node(op, context, ReferenceShapeInferFactory(op)), ovCoreNode(op), additionalErrorMessage(errorMessage) { if (!op->has_evaluate()) { OPENVINO_THROW_NOT_IMPLEMENTED( - "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)"); + "Cannot fallback on ngraph reference implementation. Ngraph::Node::evaluate() is not implemented for op: ", *op); } setType(Type::Reference); diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp index 5aef73df1949bd..048b413b61a60b 100644 --- a/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp @@ -15,7 +15,7 @@ Result FCShapeInfer::infer( const VectorDims& activationShape = input_shapes[0].get(); const VectorDims& weightShape = input_shapes[1].get(); size_t activationRank = activationShape.size(); - size_t channelRank = weightShape.size() - 1; + size_t channelRank = 1; // activation weight output_shape // NCHW CoCHW NCo @@ -23,7 +23,7 @@ Result FCShapeInfer::infer( // NC CoC NCo VectorDims outputShape(out_rank, 1); // set Co - outputShape.back() = weightShape[0]; + outputShape.back() = std::accumulate(weightShape.begin(), weightShape.end() - 1, 1, std::multiplies()); // set batch dims size_t batchRank = activationRank - channelRank; size_t startIdx = out_rank - batchRank - 1; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.cpp deleted file mode 100644 index a6d97b6a84b613..00000000000000 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "fully_connected.hpp" -#include "transformations/itt.hpp" - -ov::intel_cpu::FullyConnectedNode::FullyConnectedNode(const ov::Output& A, - const ov::Output& B, - const ov::Rank& output_rank, - const ov::element::Type output_type) - : Op({A, B}), m_output_rank(output_rank), m_output_type(output_type) { - validate_and_infer_types(); -} - -std::shared_ptr ov::intel_cpu::FullyConnectedNode::clone_with_new_inputs(const ov::OutputVector& new_args) const { - INTERNAL_OP_SCOPE(FullyConnectedNode_clone_with_new_inputs); - check_new_args_count(this, new_args); - - return std::make_shared(new_args.at(0), new_args.at(1), m_output_rank, m_output_type); -} - -void ov::intel_cpu::FullyConnectedNode::validate_and_infer_types() { - INTERNAL_OP_SCOPE(FullyConnectedNode_validate_and_infer_types); - const auto input_size = get_input_size(); - NODE_VALIDATION_CHECK(this, - input_size == 2, - "Number of inputs is incorrect. Current value is: ", - input_size, - ", expected: 2."); - - // Weights shape: [O, I1, ..., Im]; - // O - output channels dimensions, Ik - input channels dimensions - const auto weights_pshape = get_input_partial_shape(1); - NODE_VALIDATION_CHECK(this, - weights_pshape.is_static(), - "Weights pshape must be static"); - const auto weights_shape = weights_pshape.to_shape(); - - NODE_VALIDATION_CHECK(this, - weights_pshape.size() > 0, - "Weights rank must be greater than 0"); - - const auto o_channels = weights_pshape[0]; - - // Activations shape: [B1, ..., Bn, I1, ..., Im]; - // Bi - batch dimensions, Ik - input channels dimensions - const auto activations_pshape = get_input_partial_shape(0); - - // Result shape: [B1, ..., Bn, O] - ov::PartialShape output_pshape; - if (activations_pshape.rank().is_static()) { - size_t output_channels_dimensions_count = weights_shape.size() - 1; - for (size_t i = 0; i < activations_pshape.size() - output_channels_dimensions_count; ++i) { - output_pshape.push_back(activations_pshape[i]); - } - output_pshape.push_back(o_channels); - - NODE_VALIDATION_CHECK(this, - m_output_rank.is_static(), - "Output rank must be static if activations rank is static."); - - while (output_pshape.rank().get_length() < m_output_rank.get_length()) { - output_pshape.insert(output_pshape.begin(), 1); - } - } else { - output_pshape = ov::PartialShape::dynamic(); - } - - auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type; - set_output_type(0, output_type, output_pshape); -} - -bool ov::intel_cpu::FullyConnectedNode::visit_attributes(ov::AttributeVisitor &visitor) { - INTERNAL_OP_SCOPE(FullyConnectedNode_visit_attributes); - visitor.on_attribute("out-rank", m_output_rank); - visitor.on_attribute("out-type", m_output_type); - return true; -} diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.hpp deleted file mode 100644 index d992b76cf0b79b..00000000000000 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/fully_connected.hpp +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/core/node.hpp" -#include "openvino/op/op.hpp" - -namespace ov { -namespace intel_cpu { - -class FullyConnectedNode : public ov::op::Op { -public: - OPENVINO_OP("FullyConnected", "cpu_plugin_opset"); - - FullyConnectedNode() = default; - - FullyConnectedNode(const ov::Output &A, - const ov::Output &B, - const ov::Rank& output_rank, - const ov::element::Type output_type = ov::element::undefined); - - bool visit_attributes(ov::AttributeVisitor &visitor) override; - - void validate_and_infer_types() override; - - std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; - - ov::Rank get_output_rank() const { return m_output_rank; } - ov::element::Type get_output_type() const { return m_output_type; } - -private: - ov::Rank m_output_rank; - ov::element::Type m_output_type; -}; - -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp index f2861843a81110..da25e9aac30240 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp @@ -2,12 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/cpu_opset/common/op/fully_connected.hpp" +#include "openvino/core/type/element_type.hpp" +#include "ov_ops/fully_connected.hpp" #include "convert_matmul_to_fc.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/transpose.hpp" -#include "openvino/op/reshape.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/utils/utils.hpp" @@ -135,22 +135,6 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { OPENVINO_THROW("MatMul " + matmul->get_friendly_name() + " shapes are inconsistent."); } - // Transferring from MatMul representation: [B, I, K] * [B, K, O] = [B, I, O] - // to FullyConnected representation: [I, K] * [K, O] = [I, O] - - if (rank_b != 2) { - ov::Dimension K = *(shape_b_aligned.rbegin() + 1); - OPENVINO_ASSERT(K.is_static()); - auto k_len = K.get_length(); - auto reshape_shape_values = matmul->get_transpose_b() ? std::vector{-1, k_len} : std::vector{k_len, -1}; - auto reshape_shape = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, reshape_shape_values); - fc_input_b = ov::op::util::make_try_fold(fc_input_b, reshape_shape, false); - if (!std::dynamic_pointer_cast(fc_input_b.get_node_shared_ptr())) { - new_ops.push_back(reshape_shape); - } - new_ops.push_back(fc_input_b.get_node_shared_ptr()); - } - // Weights normalization if (!matmul->get_transpose_b()) { fc_input_b = create_transpose(fc_input_b, matmul->get_friendly_name() + "/transpose_b"); @@ -169,10 +153,14 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { fc_input_b = convert; } - // Create FullyConnected - auto output_rank = matmul->get_output_partial_shape(0).rank(); - auto fc = std::make_shared(fc_input_a, fc_input_b, output_rank, - matmul->get_output_element_type(0)); + auto bias = std::make_shared(element::undefined, Shape{0}); + new_ops.push_back(bias); + + auto fc = std::make_shared(fc_input_a, + fc_input_b, + bias, + matmul->get_output_element_type(0)); + fc->set_friendly_name(matmul->get_friendly_name()); ///todo: CVS-130863 Remove after fp16_compression is copyable if (ov::fp16_compression_is_disabled(matmul)) diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.hpp index 69991802101138..7d75fcc19170d0 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.hpp @@ -4,7 +4,7 @@ #pragma once -#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/matcher_pass.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp index 8079286d1e3ad7..03d9a294bbcab9 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp @@ -12,7 +12,7 @@ #include "openvino/pass/pattern/op/or.hpp" #include "transformations/rt_info/dequantization_node.hpp" #include "transformations/cpu_opset/common/op/power_static.hpp" -#include "transformations/cpu_opset/common/op/fully_connected.hpp" +#include "ov_ops/fully_connected.hpp" #include "utils/general_utils.h" #include "itt.hpp" @@ -47,16 +47,16 @@ bool isConvertableToPowerStatic(const std::shared_ptr &node) { return ov::shape_size(const_shape) == 1 && input_rank.get_length() >= static_cast(const_shape.size()) && !ov::intel_cpu::one_of(node->get_input_node_shared_ptr(nonConstPort)->get_type_info(), - ov::opset1::NormalizeL2::get_type_info_static(), - ov::opset4::Interpolate::get_type_info_static(), - ov::opset1::Convolution::get_type_info_static(), - ov::opset1::GroupConvolution::get_type_info_static(), - ov::opset1::ConvolutionBackpropData::get_type_info_static(), - ov::opset1::GroupConvolutionBackpropData::get_type_info_static(), - ov::opset1::MatMul::get_type_info_static(), - ov::intel_cpu::FullyConnectedNode::get_type_info_static(), - ov::op::v0::MVN::get_type_info_static(), - ov::opset6::MVN::get_type_info_static()); + ov::opset1::NormalizeL2::get_type_info_static(), + ov::opset4::Interpolate::get_type_info_static(), + ov::opset1::Convolution::get_type_info_static(), + ov::opset1::GroupConvolution::get_type_info_static(), + ov::opset1::ConvolutionBackpropData::get_type_info_static(), + ov::opset1::GroupConvolutionBackpropData::get_type_info_static(), + ov::opset1::MatMul::get_type_info_static(), + ov::op::internal::FullyConnected::get_type_info_static(), + ov::op::v0::MVN::get_type_info_static(), + ov::opset6::MVN::get_type_info_static()); } template <> diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp new file mode 100644 index 00000000000000..d92d2d3627b65b --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fc_bias_fusion.hpp" + +#include +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/fully_connected.hpp" +#include "transformations/utils/utils.hpp" + +ov::intel_cpu::FullyConnectedBiasFusion::FullyConnectedBiasFusion() { + MATCHER_SCOPE(FullyConnectedBiasFusion); + + auto input = ov::pass::pattern::any_input(ov::pass::pattern::has_static_rank()); + auto weights = ov::pass::pattern::any_input(ov::pass::pattern::has_static_shape()); + auto bias = ov::pass::pattern::wrap_type(); + auto m_fc = ov::pass::pattern::wrap_type({input, weights, bias}, + ov::pass::pattern::consumers_count(1)); + auto m_bias = ov::pass::pattern::wrap_type(); + auto m_add = ov::pass::pattern::wrap_type({m_fc, m_bias}); + + ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + + auto add = pattern_to_output[m_add].get_node_shared_ptr(); + auto bias = pattern_to_output[m_bias].get_node_shared_ptr(); + + auto fc = pattern_to_output[m_fc].get_node_shared_ptr(); + + if (transformation_callback(fc)) { + return false; + } + + ov::Shape bias_shape(bias->get_shape()); + const ov::PartialShape& output_shape = fc->get_output_partial_shape(0); + size_t bias_size = ov::shape_size(bias_shape); + auto rank = output_shape.size(); + if (rank == 0 || output_shape[rank - 1].is_dynamic()) { + return false; + } + + if (bias_shape.empty() || static_cast(bias_shape.back()) != output_shape[rank - 1].get_length() || + bias_shape.back() != bias_size) { + return false; + } + + ov::NodeVector new_ops; + + std::shared_ptr final_bias = bias; + if (bias_shape.size() >= 2) { + auto reshape_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{1}, {-1}); + final_bias = ov::op::util::make_try_fold(final_bias, reshape_const, true); + new_ops.push_back(final_bias); + } + + std::shared_ptr fc_with_bias; + + auto fc_node = ov::as_type_ptr(fc); + fc_with_bias = fc_node->clone_with_new_inputs({fc_node->input_value(0), fc_node->input_value(1), final_bias}); + + new_ops.push_back(fc_with_bias); + + fc_with_bias->set_friendly_name(add->get_friendly_name()); + ov::copy_runtime_info({fc, add}, new_ops); + ov::replace_node(add, fc_with_bias); + return true; + }; + + auto m = std::make_shared(m_add, matcher_name); + this->register_matcher(m, callback); +} diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp new file mode 100644 index 00000000000000..b21cf80ad327e6 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" + +namespace ov { +namespace intel_cpu { + +class FullyConnectedBiasFusion : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("FullyConnectedBiasFusion", "0"); + FullyConnectedBiasFusion(); +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp index e681cd48ce8087..18a54dc45e173f 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/cpu_opset/common/op/fully_connected.hpp" +#include "ov_ops/fully_connected.hpp" #include "move_fc_reshape_to_weights.hpp" #include #include @@ -48,7 +48,8 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() { auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m}); auto data_m = any_input(); - auto fully_connected_m = wrap_type({data_m, weights_input_m}); + auto bias_m = any_input(); + auto fully_connected_m = wrap_type({data_m, weights_input_m, bias_m}); ov::matcher_pass_callback callback = [&](ov::pass::pattern::Matcher& m) { const auto fully_connected = m.get_match_root(); diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.cpp deleted file mode 100644 index 27207b3e051fdb..00000000000000 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.cpp +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "openvino/core/rt_info.hpp" -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include "openvino/pass/constant_folding.hpp" -#include -#include "openvino/op/concat.hpp" -#include "openvino/op/constant.hpp" -#include "openvino/op/convert.hpp" -#include "openvino/op/multiply.hpp" -#include "openvino/op/reshape.hpp" -#include "openvino/op/subtract.hpp" -#include "openvino/op/transpose.hpp" -#include "openvino/op/variadic_split.hpp" -#include "transformations/cpu_opset/common/op/fully_connected.hpp" - -#include "split_fc.hpp" - -#include "itt.hpp" - -ov::intel_cpu::SplitFC::SplitFC(int sub_stream_num) { - MATCHER_SCOPE(SplitFC); - auto fc_m = ov::pass::pattern::wrap_type(); - - ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { - const auto& pattern_map = m.get_pattern_value_map(); - - const auto& fc_node = pattern_map.at(fc_m).get_node_shared_ptr(); - auto& rt_info = fc_node->get_rt_info(); - if (rt_info.count("parallelDomain")) { - return false; - } - - const auto src_item = fc_node->get_input_node_shared_ptr(0); - const auto fc_weight_node = fc_node->get_input_node_shared_ptr(1); - - // split happens on the first dimension. - constexpr size_t split_dim = 0; - auto split_dim_node = std::make_shared(ov::element::i32, ov::Shape{}, split_dim); - - // needn't to split fc when the dim is 0. - const auto& wgt_shape = fc_weight_node->get_shape(); - // weight shape size 660000 is a trade-off value, which is summarized and verified by LLMs. - if (wgt_shape[split_dim] <= 1 || ov::shape_size(wgt_shape) < 6600000) { - return false; - } - - // parts will be splited according the sub stream num. - int split_num = sub_stream_num + 1; - - auto split_parts = [](int len, int n) { - int average = len / n; - std::vector parts(n, average); - parts.back() = len - average * (n - 1); - return parts; - }; - - // TODO: support transpose - if (ov::is_type(fc_weight_node)) { - return false; - } - - // 1. If the model is INT4 format, split the INT4 pattern for the FuseFCAndWeightsDecompression. - // 2. If the model is NOT INT4 format, split the weight. - std::vector> wgt_node_vec(split_num); - if (ov::is_type(fc_weight_node) || ov::is_type(fc_weight_node)) { - // INT4 model should consider two patterns, including with Reshape Node and without Reshape Node. - const auto reshape_node = ov::as_type_ptr(fc_weight_node); - const auto multiply_node = reshape_node ? reshape_node->get_input_node_shared_ptr(0) : fc_weight_node; - if (!ov::is_type(multiply_node)) { - return false; - } - auto multiply_pattern = multiply_node->get_input_node_shared_ptr(1); - if (!ov::is_type(multiply_pattern)) { - return false; - } - auto subtract_node = multiply_node->get_input_node_shared_ptr(0); - if (!ov::is_type(subtract_node)) { - return false; - } - auto convert_node1 = subtract_node->get_input_node_shared_ptr(1); - if (!ov::is_type(convert_node1)) { - return false; - } - auto convert_node1_const = ov::as_type_ptr(convert_node1->get_input_node_shared_ptr(0)); - if (!convert_node1_const) { - return false; - } - auto convert_node0 = subtract_node->get_input_node_shared_ptr(0); - if (!ov::is_type(convert_node0)) { - return false; - } - auto wgt_item = convert_node0->get_input_node_shared_ptr(0); - auto cvt_prec = convert_node0->get_element_type(); - - auto split_dim_range = wgt_item->get_shape()[split_dim]; - const auto& convert_node1_shape = convert_node1->get_shape(); - bool need_to_split_convert = ov::shape_size(convert_node1_shape) > 1 && - split_dim < convert_node1_shape.size() && - convert_node1_shape[split_dim] == split_dim_range; - - // We should use VariadicSplit to split the input for FC. - std::vector> split_reshape_pattern_vec(split_num); - auto fc_dim_vec = split_parts(split_dim_range, split_num); - auto split_length = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{static_cast(split_num)}, fc_dim_vec); - - auto split_constants = [&](const std::shared_ptr& constant) { - static const std::set unsupported_by_split_element_types{ov::element::u4, ov::element::i4, ov::element::nf4}; - const auto& constant_precision = constant->get_output_element_type(0); - if (unsupported_by_split_element_types.count(constant_precision) == 0) { - auto split = std::make_shared(constant, split_dim_node, split_length); - return split->outputs(); - } - - auto convert = std::make_shared(constant, ov::element::i8); - auto split = std::make_shared(convert, split_dim_node, split_length); - ov::OutputVector res(split->get_output_size()); - for (size_t i = 0; i < split->get_output_size(); ++i) { - res[i] = std::make_shared(split->output(i), constant_precision); - } - return res; - }; - - auto split_wgts = split_constants(wgt_item); - auto split_muls = split_constants(multiply_pattern); - ov::OutputVector split_cvts; - if (need_to_split_convert) { - split_cvts = split_constants(convert_node1_const); - } - - if (reshape_node) { - auto reshape_pattern = reshape_node->get_input_node_shared_ptr(1); - auto reshape_const = ov::as_type_ptr(reshape_pattern); - if (!reshape_const) { - return false; - } - const auto reshape_vec = reshape_const->cast_vector(); - for (int i = 0; i < split_num; ++i) { - split_reshape_pattern_vec[i] = {fc_dim_vec[i], reshape_vec[1]}; - } - } - - std::vector> zp_const_vec(split_num); - for (int i = 0; i < split_num; ++i) { - zp_const_vec[i] = need_to_split_convert ? split_cvts[i] : convert_node1_const->clone_with_new_inputs({}); - } - - for (int i = 0; i < split_num; ++i) { - auto sub_parent0 = std::make_shared(split_wgts[i], cvt_prec); - auto sub_parent1 = std::make_shared(zp_const_vec[i], cvt_prec); - ov::pass::disable_constant_folding(sub_parent0); - ov::pass::disable_constant_folding(sub_parent1); - auto sub_node = std::make_shared(sub_parent0, sub_parent1); - - auto mul_node = std::make_shared(sub_node, split_muls[i]); - if (reshape_node) { - auto reshape_pattern = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{2}, split_reshape_pattern_vec[i]); - wgt_node_vec[i] = std::make_shared(mul_node, reshape_pattern, reshape_node->get_special_zero()); - } else { - wgt_node_vec[i] = mul_node; - } - } - } else { - // get input - auto wgt_item = fc_node->get_input_node_shared_ptr(1); - - // split weight - auto split_dim_range = wgt_item->get_shape()[split_dim]; - - // We should use VariadicSplit to split input for FC. - auto fc_dim_vec = split_parts(split_dim_range, split_num); - auto split_length = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{static_cast(split_num)}, fc_dim_vec); - auto split_wgts = std::make_shared(wgt_item, - split_dim_node, - split_length); - - wgt_node_vec = split_wgts->outputs(); - } - - // create fc Nodes according to the splited weight or splited pattern. - std::vector> fc_node_vec(split_num); - for (int i = 0; i < split_num; ++i) { - fc_node_vec[i] = fc_node->clone_with_new_inputs(ov::OutputVector{src_item, wgt_node_vec[i]}); - fc_node_vec[i]->get_rt_info()["parallelDomain"] = fc_node->get_name(); - } - - // concat all small fc for result. - ov::NodeVector concat_args(std::move(fc_node_vec)); - // concat happens on the latest dimension. - constexpr size_t concat_dim = -1; - auto concat_node = std::make_shared(concat_args, concat_dim); - - // check the shape after transformation. - const auto& out_shape = fc_node->get_output_partial_shape(0); - const auto& concat_shape = concat_node->get_output_partial_shape(0); - if (concat_shape != out_shape) { - return false; - } - ov::replace_node_update_name(fc_node, concat_node); - return true; - }; - - auto m = std::make_shared(fc_m, matcher_name); - this->register_matcher(m, callback); -} diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.hpp deleted file mode 100644 index f8434770b278ef..00000000000000 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/split_fc.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/pass/graph_rewrite.hpp" - -namespace ov { -namespace intel_cpu { - -/* - * Description: - * SplitFC detects FC CPU operation with and without compressed weights. - * And then splits the FC into several small FCs by output channel according to sub stream number. - * The goal is that the executor can dispatch the split FCs to different numa nodes in the system. - * As a result, the split FCs can be executed at the parallel level. - * - * Before: - * - * +-------+ +-------+ - * | X | | W | - * | | | | - * | | | | - * +-------+ +-------+ - * | | - * | | - * +---------------v---------------------------------v--------------+ - * | | - * | FullyConnected | - * | | - * +------------------------------+---------------------------------+ - * | - * | Output - * v - * - * After: - * - * +-------+ +-------+ - * | X | | W | - * | | | | - * | | | | - * +---+---+ +---+---+ - * | | - * | | - * | +-------v-------+ - * | | | - * | | VariadicSplit | - * | | | - * | +--+---------+--+ - * | | | - * | +------------------------+ | - * | | | - * +---------|------------------------+ | - * | | | | - * +----------v---------v---------+ +-----------v---------v--------+ - * | | | | - * | FullyConnected | | FullyConnected | - * | | | | - * +--------------+---------------+ +--------------+---------------+ - * | | - * | Output | Output - * | | - * +--------------v---------------------------------v---------------+ - * | | - * | Concat | - * | | - * +-------------------------------+--------------------------------+ - * | - * | - * v - */ - -class SplitFC: public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("SplitFC", "0"); - SplitFC(int sub_stream_num); -}; - -} // namespace intel_cpu -} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 20502f67d3645e..87fa1291bb7141 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -2,36 +2,67 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/type/element_type.hpp" #include "openvino/pass/constant_folding.hpp" -#include "openvino/op/fake_quantize.hpp" #include "openvino/pass/manager.hpp" #include "common/pass/align_matmul_input_ranks.hpp" -#include "transformations/common_optimizations/reshape_prelu.hpp" -#include "common/pass/convert_broadcast_to_tiles.hpp" +#include "transformations/common_optimizations/nop_elimination.hpp" #include "common/pass/convert_tile_to_seq_tiles.hpp" #include "common/pass/convert_matmul_to_fc.hpp" #include "common/pass/convert_to_power_static.hpp" #include "common/pass/convert_to_leaky_relu.hpp" #include "common/pass/convert_to_swish_cpu.hpp" #include "common/pass/move_fc_reshape_to_weights.hpp" -#include "common/pass/split_fc.hpp" +#include "common/pass/fc_bias_fusion.hpp" #include "transformations/convert_precision.hpp" -#include "transformations/utils/utils.hpp" +#include "transformations/op_conversions/convert_fc_to_compressed.hpp" +#include "transformations/op_conversions/convert_fc_to_quantized_legacy.hpp" #include "common/pass/rnn_sequences_optimization.hpp" #include "transformations/common_optimizations/reshape_sequence_fusion.hpp" #include "transformations/defs.hpp" +#include "config.h" +#include "nodes/fullyconnected.h" #include "itt.hpp" namespace ov { namespace intel_cpu { -inline void ConvertToCPUSpecificOpset(std::shared_ptr &model) { +inline void ConvertToCPUSpecificOpset(std::shared_ptr &model, const Config& config) { RUN_ON_FUNCTION_SCOPE(ConvertToCPUSpecificOpset); ov::pass::Manager manager("CPU:ConvertToCPUSpecificOpset"); manager.set_per_pass_validation(false); + CPU_REGISTER_PASS_COMMON(manager, ConvertMatMulToFC); + CPU_REGISTER_PASS_COMMON(manager, FullyConnectedBiasFusion); + + std::vector supported_activation_types { + // @todo enable for bf16 as well + // after EnforceInferencePrecision is replaced with ConvertPrecision + ov::element::f32, + }; + + std::vector supported_compressed_weights_types { + ov::element::u8, + ov::element::i8, + ov::element::u4, + ov::element::i4, + ov::element::nf4, + ov::element::f4e2m1, + }; + + CPU_REGISTER_PASS_X64( + manager, + pass::ConvertFullyConnectedToFullyConnectedCompressed, + supported_activation_types, + supported_compressed_weights_types, + [&config](const std::shared_ptr& fc, size_t IC, size_t OC, size_t G) { + return ov::intel_cpu::node::FullyConnected::isSupportedCompressedOperation( + fc, IC, OC, G, config.inferencePrecision); + }); + + CPU_REGISTER_PASS_X64(manager, pass::ConvertFCToFCQuantizedLegacy); CPU_REGISTER_PASS_X64(manager, MoveFCReshapeToWeights); CPU_REGISTER_PASS_X64(manager, ov::pass::Validate); CPU_REGISTER_PASS_COMMON(manager, AlignMatMulInputRanks); diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 27afb95a73a1e9..f9fa372030e4cc 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -21,6 +21,7 @@ // Common transformations #include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp" #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/reshape_prelu.hpp" #include "transformations/fp16_compression/convert_compression_only_to_legacy.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include "transformations/common_optimizations/lstm_cell_fusion.hpp" @@ -319,7 +320,7 @@ void Transformations::UpToLpt() { void Transformations::CpuSpecificOpSet(void) { CPU_DEBUG_CAP_TRANSFORMATION_SCOPE(this, Specific); - ConvertToCPUSpecificOpset(model); + ConvertToCPUSpecificOpset(model, config); } void Transformations::PreLpt(const std::vector& defaultPrecisions) { diff --git a/src/plugins/intel_cpu/src/transformations/utils.cpp b/src/plugins/intel_cpu/src/transformations/utils.cpp index 3aa74f9ed9a970..63871868713e02 100644 --- a/src/plugins/intel_cpu/src/transformations/utils.cpp +++ b/src/plugins/intel_cpu/src/transformations/utils.cpp @@ -4,7 +4,7 @@ #include "utils.hpp" #include "openvino/opsets/opset1.hpp" -#include "cpu_opset/common/op/fully_connected.hpp" +#include "ov_ops/fully_connected.hpp" #include "transformations/rt_info/dequantization_node.hpp" #include "transformations/utils/utils.hpp" @@ -21,7 +21,7 @@ bool has_matmul_with_compressed_weights(const std::shared_ptr& }; for (const auto& op : model->get_ops()) { - if (!ov::is_type(op) && !ov::is_type(op)) + if (!ov::is_type(op) && !ov::is_type(op)) continue; if (!op->get_input_element_type(0).is_real()) diff --git a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp index b6bd36205f985d..8ae9aa67edf9a7 100644 --- a/src/plugins/intel_cpu/src/utils/cpu_utils.hpp +++ b/src/plugins/intel_cpu/src/utils/cpu_utils.hpp @@ -9,6 +9,7 @@ #include #include "general_utils.h" +#include "openvino/core/except.hpp" #include "precision_support.h" namespace ov { @@ -156,5 +157,35 @@ inline std::vector makeAlignedBuffer(size_t targetSize, const std::vector } return alignedBuffer; } + +/** +* @brief Reshape a tensor down to a specific rank +* +* Examples: +* - reshapeToRank<2>({1, 2, 3, 4, 5}) == {1*2*3*4, 5} == {24, 5} +* - reshapeToRank<4>({1, 2, 3, 4, 5}) == {1*2, 3, 4, 5} == {2, 3, 4, 5} +*/ +template +std::vector reshapeDownToRank(const std::vector& dims, size_t rank) { + OPENVINO_ASSERT(rank > 0, "Rank greater than zero is expected"); + + if (dims.size() <= rank) { + return dims; + } + + const auto accEnd = dims.begin() + (dims.size() - rank + 1); + const auto acc = std::accumulate(dims.begin(), accEnd, (T)1, std::multiplies()); + + std::vector result{acc}; + result.insert(result.end(), accEnd, dims.end()); + + return result; +} + +template +std::vector reshapeDownToRank(const std::vector& dims) { + return reshapeDownToRank(dims, rank); +} + } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp index fcc983d84166c5..195d46c70e1c7c 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp @@ -2,6 +2,7 @@ // Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/core/type/element_type.hpp" #ifdef CPU_DEBUG_CAPS #include "cpu_memory.h" @@ -310,7 +311,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) { void * data = pmem->getData(); auto shape = pmem->getDesc().getShape().getDims(); - if (shape_size(shape) <= 8) { + if (shape_size(shape) <= 8 && pmem->getDesc().getPrecision() != ov::element::undefined) { auto type = pmem->getDesc().getPrecision(); auto tensor = ov::Tensor(type, shape, data); auto constop = std::make_shared(tensor); @@ -663,7 +664,7 @@ std::ostream& operator<<(std::ostream& os, const IMemory& mem) { } return os; } -// @todo remove + void print_dnnl_memory(const dnnl::memory& memory, const size_t size, const int id, const char* message) { const size_t s = memory.get_desc().get_size() / sizeof(float); std::cout << message << " " << id << " size: " << s << ", values: "; diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.h b/src/plugins/intel_cpu/src/utils/debug_capabilities.h index 7a1158d259a4a3..2646ba817dca9c 100644 --- a/src/plugins/intel_cpu/src/utils/debug_capabilities.h +++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.h @@ -3,6 +3,7 @@ // #pragma once +#include "cpu_types.h" #include "openvino/util/env_util.hpp" #ifdef CPU_DEBUG_CAPS @@ -94,6 +95,12 @@ class PrintableTimer { } }; +template +std::ostream & operator<<(std::ostream & os, const std::vector vec) { + for (const auto& element : vec) + os << element << "x"; + return os; +} std::ostream & operator<<(std::ostream & os, const PortConfig& desc); std::ostream & operator<<(std::ostream & os, const NodeConfig& desc); std::ostream & operator<<(std::ostream & os, const NodeDesc& desc); diff --git a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/matmul.cpp b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/matmul.cpp index 6d827614f80c54..4afdd90427b06e 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/single_layer_tests/instances/arm/matmul.cpp @@ -23,7 +23,6 @@ static const std::vector& filterSpecificParamsFC() { std::vector fusingParamsSet2D_smoke { emptyFusingSpec, fusingBias, - fusingMultiplyPerChannel, fusingRelu, fusingTanh }; @@ -62,7 +61,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_f16, MatMulLayerCPUTest, testParams2D_smoke std::vector fusingParamsSet3D_smoke { emptyFusingSpec, fusingBias, - fusingMultiplyPerChannel, fusingRelu, fusingTanh }; @@ -106,7 +104,6 @@ const std::vector IS = { std::vector fusingParamsSet4D_smoke { emptyFusingSpec, - fusingMultiplyPerChannel, fusingRelu, fusingTanh }; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp index 3643427de3e9b7..9a434943893eed 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/x64/matmul_weights_decompression.cpp @@ -87,7 +87,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface() << ":"; + result << configEntry.first << ", " << configEntry.second.as() << "_"; } result << ")"; result << CpuTestWithFusing::getTestCaseName(fusing_params); diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp index a5b01a2c3c2f9c..90a2fc9d0b9768 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp @@ -4,9 +4,11 @@ #include -#include "openvino/op/parameter.hpp" -#include "transformations/cpu_opset/common/op/fully_connected.hpp" #include "custom_shape_infer.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/parameter.hpp" +#include "ov_ops/fully_connected.hpp" + namespace ov { namespace intel_cpu { namespace unit_test { @@ -16,16 +18,66 @@ using namespace ov; using namespace ov::intel_cpu; TEST(CpuShapeInfer, FC_InputSize_2) { - auto activate = std::make_shared(element::f32, PartialShape{-1, -1 }); + auto activate = std::make_shared(element::f32, PartialShape{-1, -1}); auto weight = std::make_shared(element::f32, PartialShape{5, 6}); - auto op = std::make_shared(activate, weight, ov::Rank(5), element::f32); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); std::vector static_input_shapes = {StaticShape{720, 640}, {5, 6}}; - std::vector static_output_shapes = {StaticShape{1, 1, 1, 720, 5}}; + std::vector static_output_shapes = {StaticShape{720, 5}}; + unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); +} + +TEST(CpuShapeInfer, FC_broadcastWeights1) { + auto activate = std::make_shared(element::f32, PartialShape{1, -1, -1}); + auto weight = std::make_shared(element::f32, PartialShape{5, 6}); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); + std::vector static_input_shapes = {StaticShape{1, 720, 6}, {5, 6}}; + std::vector static_output_shapes = {StaticShape{1, 720, 5}}; + unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); +} + +TEST(CpuShapeInfer, FC_broadcastWeights2) { + auto activate = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); + auto weight = std::make_shared(element::f32, PartialShape{5, 6}); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); + std::vector static_input_shapes = {StaticShape{2, 3, 720, 6}, {5, 6}}; + std::vector static_output_shapes = {StaticShape{2, 3, 720, 5}}; + unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); +} + +TEST(CpuShapeInfer, FC_broadcastActivations1) { + auto activate = std::make_shared(element::f32, PartialShape{720, -1}); + auto weight = std::make_shared(element::f32, PartialShape{1, 5, 6}); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); + std::vector static_input_shapes = {StaticShape{720, 6}, {1, 5, 6}}; + std::vector static_output_shapes = {StaticShape{1, 720, 5}}; unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); } -} // namespace cpu_shape_infer -} // namespace unit_test -} // namespace intel_cpu -} // namespace ov +TEST(CpuShapeInfer, FC_broadcastActivations2) { + auto activate = std::make_shared(element::f32, PartialShape{-1, -1}); + auto weight = std::make_shared(element::f32, PartialShape{1, 1, 5, 6}); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); + std::vector static_input_shapes = {StaticShape{720, 6}, {1, 1, 5, 6}}; + std::vector static_output_shapes = {StaticShape{1, 1, 720, 5}}; + unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); +} +} // namespace cpu_shape_infer +} // namespace unit_test +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp index cb085920d97dc5..37df1fd6d27910 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp @@ -4,21 +4,20 @@ #include -#include #include - #include #include #include #include -#include +#include +#include #include #include #include -#include -#include #include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/constant.hpp" +#include "ov_ops/fully_connected.hpp" #include "transformations/rt_info/decompression.hpp" using namespace testing; @@ -26,25 +25,28 @@ using namespace ov::intel_cpu; TEST_F(TransformationTestsF, ConvertMatMulToFCTest1) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 2}, {1}); auto matmul = std::make_shared(input1, input2, true, false); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose1 = std::make_shared(input1, transpose_constant1); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 2 }, { 1 }); - auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 2}, {1}); + auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose2 = std::make_shared(input2, transpose_constant2); - auto matmul = std::make_shared(transpose1, transpose2, ov::Rank(3)); + auto matmul = std::make_shared( + transpose1, + transpose2, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } @@ -78,7 +80,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest3) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, ov::Rank(3)); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -96,27 +101,30 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest4) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, ov::Rank(3)); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest5) { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, -1, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 3, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2, 2}, {1}); auto matmul = std::make_shared(input1, input2, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } TEST_F(TransformationTestsF, ConvertMatMulToFCTest6) { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, -1, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 3, 1, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 1, 2}, {1}); auto matmul = std::make_shared(input1, input2, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } @@ -132,7 +140,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest7) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1}); - auto fc = std::make_shared(input1, input2, ov::Rank(2)); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -151,11 +162,14 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest8) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1}); - auto fc = std::make_shared(input1, input2, ov::Rank(2)); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); auto a_shape = std::make_shared(input1); auto I = ov::op::util::node_to_get_shape_value_of_indices_from_shape_node(a_shape, {0, 1}); - auto O = ov::opset1::Constant::create(ov::element::i64, { 1 }, { 3 }); + auto O = ov::opset1::Constant::create(ov::element::i64, {1}, {3}); auto output_shape = std::make_shared(ov::OutputVector{I, O}, 0); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); @@ -174,7 +188,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest9) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, ov::Rank(3)); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -182,10 +199,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest9) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest10) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape::dynamic()); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 2 }, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); auto matmul = std::make_shared(input1, input2, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } @@ -218,8 +235,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest13) { } { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 1}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{80, 1}, {1}); - auto matmul = std::make_shared(input1, input2, ov::Rank(3)); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 80, 1}, {1}); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -242,8 +262,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest14) { } { auto input1 = std::make_shared(ov::element::u8, ov::PartialShape{-1, -1, 1}); - auto input2 = ov::opset1::Constant::create(ov::element::i8, ov::Shape{80, 1}, {1}); - auto matmul = std::make_shared(input1, input2, ov::Rank(3), ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::i8, ov::Shape{1, 80, 1}, {1}); + + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -252,7 +277,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest14) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_1) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 4, 5}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 6, 5 }, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{6, 5}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -260,8 +285,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_1) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 4, 5}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 6, 5 }, { 1 }); - auto fc = std::make_shared(input1, input2, ov::Rank(4), ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{6, 5}, {1}); + + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -278,8 +308,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_2) { } { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 1, 5}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{10, 5}, {1}); - auto fc = std::make_shared(input1, input2, ov::Rank(4)); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 10, 5}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -288,7 +321,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_2) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -296,8 +329,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{5, 4}, { 1 }); - auto fc = std::make_shared(input1, input2, ov::Rank(4), ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -306,7 +343,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -314,8 +351,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{5, 4}, { 1 }); - auto fc = std::make_shared(input1, input2, ov::Rank(4), ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -324,7 +365,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_5) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -332,8 +373,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_5) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{5, 4}, { 1 }); - auto fc = std::make_shared(input1, input2, ov::Rank(4), ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -350,97 +395,112 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_1) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 3}, {1}); - auto matmul = std::make_shared(input1, input2, ov::Rank(2)); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_2) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 2, 3 }); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3}); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 3}, {1}); auto matmul = std::make_shared(input1, weights, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 2, 3 }); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 }); - auto matmul = std::make_shared(input1, weights, ov::Rank(2)); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3}); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 3}, {1}); + auto matmul = std::make_shared( + input1, + weights, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_3) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 5, 2, 3 }); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 3 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); auto matmul = std::make_shared(input1, weights, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 5, 2, 3 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 }); - auto matmul = std::make_shared(input1, weights, ov::Rank(3)); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); + auto matmul = std::make_shared( + input1, + weights, + std::make_shared(ov::element::undefined, ov::Shape{0})); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_0) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); auto convert = std::make_shared(input2, ov::element::f32); ov::mark_as_decompression(convert); auto matmul = std::make_shared(input1, convert, false, false); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 2, 2 }, { 1 }); - auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); + auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose = std::make_shared(input2, transpose_constant); auto convert = std::make_shared(transpose, ov::element::f32); - auto matmul = std::make_shared(input1, convert, ov::Rank(3)); + auto matmul = std::make_shared( + input1, + convert, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_1) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); auto convert = std::make_shared(input2, ov::element::f32); ov::mark_as_decompression(convert); auto matmul = std::make_shared(input1, convert, true, false); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose1 = std::make_shared(input1, transpose_constant1); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 2, 2 }, { 1 }); - auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 }); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); + auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose2 = std::make_shared(input2, transpose_constant2); auto convert = std::make_shared(transpose2, ov::element::f32); - auto matmul = std::make_shared(transpose1, convert, ov::Rank(3)); + auto matmul = std::make_shared( + transpose1, + convert, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } @@ -467,12 +527,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_compressed_u8_weights) { auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 2}, {1}); auto mul = std::make_shared(sub, mul_const); - auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {2}, {2, -1}); - auto reshape = std::make_shared(mul, reshape_const, false); - auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {2}, {1, 0}); - auto transpose = std::make_shared(reshape, transpose_const); - auto matmul = std::make_shared(data, transpose, ov::Rank(3)); + auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, {0, 2, 1}); + auto transpose = std::make_shared(mul, transpose_const); + auto matmul = std::make_shared( + data, + transpose, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ data }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{data}); } } diff --git a/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp b/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp index 68241c9169bce7..b3d733aecba27b 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp @@ -11,7 +11,7 @@ #include #include -#include +#include "ov_ops/fully_connected.hpp" #include #include @@ -115,7 +115,12 @@ class MoveFCReshapeToWeightsTests : public TransformationTestsF, public WithPara auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {2}, {1, 0}); weights_path = std::make_shared(weights_path, transpose_const); } - auto fully_connected = std::make_shared(data, weights_path, ov::Rank(3)); + + auto fully_connected = std::make_shared( + data, + weights_path, + std::make_shared(ov::element::undefined, ov::Shape{0})); + return std::make_shared(ov::NodeVector{fully_connected}, ov::ParameterVector{data}); } diff --git a/src/plugins/intel_cpu/tests/unit/transformations/split_fc_test.cpp b/src/plugins/intel_cpu/tests/unit/transformations/split_fc_test.cpp deleted file mode 100644 index 4c955ec5286813..00000000000000 --- a/src/plugins/intel_cpu/tests/unit/transformations/split_fc_test.cpp +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include "openvino/core/visibility.hpp" -#include -#include - -#include "common_test_utils/ov_test_utils.hpp" -#include "transformations/rt_info/decompression.hpp" - -using namespace testing; -using namespace ov::intel_cpu; - -#if defined (OPENVINO_ARCH_ARM) && defined(__linux__) -// Ticket: 153166 -TEST_F(TransformationTestsF, DISABLED_SplitFCTest) { -#else -TEST_F(TransformationTestsF, SplitFCTest) { -#endif - disable_rt_info_check(); - { - auto src = std::make_shared(ov::element::f32, ov::Shape{ 3, 4096, 1 }); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2048, 4096 }, { 12.34 }); - - auto fc = std::make_shared(transpose_src, wgt, ov::Rank(3)); - model = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{src}); - manager.register_pass(1); - } - { - auto src = std::make_shared(ov::element::f32, ov::Shape{ 3, 4096, 1 }); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2048, 4096 }, { 12.34 }); - - auto split_dim_node = std::make_shared(ov::element::i32, ov::Shape{}, 0); - auto split_length = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {1024, 1024}); - auto split_wgts = std::make_shared(wgt, split_dim_node, split_length); - - auto fc0 = std::make_shared(transpose_src, split_wgts->output(0), ov::Rank(3)); - auto fc1 = std::make_shared(transpose_src, split_wgts->output(1), ov::Rank(3)); - - ov::NodeVector concat_args({fc0, fc1}); - constexpr size_t concat_dim = -1; - auto concat = std::make_shared(concat_args, concat_dim); - model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{src}); - } -} - -#if defined (OPENVINO_ARCH_ARM) && defined(__linux__) -// Ticket: 153166 -TEST_F(TransformationTestsF, DISABLED_SplitFCTest_int8_weight) { -#else -TEST_F(TransformationTestsF, SplitFCTest_int8_weight) { -#endif - disable_rt_info_check(); - { - auto src = std::make_shared(ov::element::f32, ov::Shape{3, 4096, 1}); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::u8, ov::Shape{2048, 4096}, {123}); - auto cvt_wgt = std::make_shared(wgt, ov::element::f32); - - auto zp = ov::opset1::Constant::create(ov::element::u8, ov::Shape{2048, 1}, {1}); - auto cvt_zp = std::make_shared(zp, ov::element::f32); - - auto sub = std::make_shared(cvt_wgt, cvt_zp); - - auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2048, 1}, {0.2}); - auto mul = std::make_shared(sub, mul_const); - - auto fc = std::make_shared(transpose_src, mul, ov::Rank(3)); - model = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{src}); - manager.register_pass(1); - } - { - auto src = std::make_shared(ov::element::f32, ov::Shape{ 3, 4096, 1 }); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::u8, ov::Shape{ 2048, 4096 }, { 123 }); - auto cvt_wgt = std::make_shared(wgt, ov::element::f32); - - auto split_dim_node = std::make_shared(ov::element::i32, ov::Shape{}, 0); - auto split_length = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {1024, 1024}); - - auto split_wgts = std::make_shared(wgt, split_dim_node, split_length); - auto cvt_wgt0 = std::make_shared(split_wgts->output(0), ov::element::f32); - auto cvt_wgt1 = std::make_shared(split_wgts->output(1), ov::element::f32); - - auto zp = ov::opset1::Constant::create(ov::element::u8, ov::Shape{2048, 1}, {1}); - auto split_zp = std::make_shared(zp, split_dim_node, split_length); - - auto cvt_zp0 = std::make_shared(split_zp->output(0), ov::element::f32); - auto cvt_zp1 = std::make_shared(split_zp->output(1), ov::element::f32); - - auto sub0 = std::make_shared(cvt_wgt0, cvt_zp0); - auto sub1 = std::make_shared(cvt_wgt1, cvt_zp1); - - auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2048, 1}, {0.2}); - auto split_mul_const = std::make_shared(mul_const, split_dim_node, split_length); - - auto mul0 = std::make_shared(sub0, split_mul_const->output(0)); - auto mul1 = std::make_shared(sub1, split_mul_const->output(1)); - - auto fc0 = std::make_shared(transpose_src, mul0, ov::Rank(3)); - auto fc1 = std::make_shared(transpose_src, mul1, ov::Rank(3)); - - ov::NodeVector concat_args({fc0, fc1}); - constexpr size_t concat_dim = -1; - auto concat = std::make_shared(concat_args, concat_dim); - model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{src}); - } -} - -#if defined (OPENVINO_ARCH_ARM) && defined(__linux__) -// Ticket: 153166 -TEST_F(TransformationTestsF, DISABLED_SplitFCTest_int4_weight) { -#else -TEST_F(TransformationTestsF, SplitFCTest_int4_weight) { -#endif - disable_rt_info_check(); - { - auto src = std::make_shared(ov::element::f32, ov::Shape{3, 4096, 1}); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::u4, ov::Shape{2048, 4096}, {12}); - auto cvt_wgt = std::make_shared(wgt, ov::element::f32); - - auto zp = ov::opset1::Constant::create(ov::element::u4, ov::Shape{2048, 1}, {1}); - auto cvt_zp = std::make_shared(zp, ov::element::f32); - - auto sub = std::make_shared(cvt_wgt, cvt_zp); - - auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2048, 1}, {0.2}); - auto mul = std::make_shared(sub, mul_const); - - auto fc = std::make_shared(transpose_src, mul, ov::Rank(3)); - model = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{src}); - manager.register_pass(1); - } - { - auto src = std::make_shared(ov::element::f32, ov::Shape{3, 4096, 1}); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::u4, ov::Shape{2048, 4096}, {12}); - auto cvt_wgt_i8 = std::make_shared(wgt, ov::element::i8); - - auto split_dim_node = std::make_shared(ov::element::i32, ov::Shape{}, 0); - auto split_length = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {1024, 1024}); - - auto split_wgts = std::make_shared(cvt_wgt_i8, split_dim_node, split_length); - auto cvt_wgt0_u4 = std::make_shared(split_wgts->output(0), ov::element::u4); - auto cvt_wgt1_u4 = std::make_shared(split_wgts->output(1), ov::element::u4); - auto cvt_wgt0_f32 = std::make_shared(cvt_wgt0_u4, ov::element::f32); - auto cvt_wgt1_f32 = std::make_shared(cvt_wgt1_u4, ov::element::f32); - - auto zp = ov::opset1::Constant::create(ov::element::u4, ov::Shape{2048, 1}, {1}); - auto cvt_zp_i8 = std::make_shared(zp, ov::element::i8); - auto split_zp = std::make_shared(cvt_zp_i8, split_dim_node, split_length); - - auto cvt_zp0_u4 = std::make_shared(split_zp->output(0), ov::element::u4); - auto cvt_zp1_u4 = std::make_shared(split_zp->output(1), ov::element::u4); - auto cvt_zp0_f32 = std::make_shared(cvt_zp0_u4, ov::element::f32); - auto cvt_zp1_f32 = std::make_shared(cvt_zp1_u4, ov::element::f32); - - auto sub0 = std::make_shared(cvt_wgt0_f32, cvt_zp0_f32); - auto sub1 = std::make_shared(cvt_wgt1_f32, cvt_zp1_f32); - - auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2048, 1}, {0.2}); - auto split_mul_const = std::make_shared(mul_const, split_dim_node, split_length); - - auto mul0 = std::make_shared(sub0, split_mul_const->output(0)); - auto mul1 = std::make_shared(sub1, split_mul_const->output(1)); - - auto fc0 = std::make_shared(transpose_src, mul0, ov::Rank(3)); - auto fc1 = std::make_shared(transpose_src, mul1, ov::Rank(3)); - - ov::NodeVector concat_args({fc0, fc1}); - constexpr size_t concat_dim = -1; - auto concat = std::make_shared(concat_args, concat_dim); - model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{src}); - } -} - -#if (defined OPENVINO_ARCH_ARM && defined(__linux__)) -// Ticket: 153166 -TEST_F(TransformationTestsF, DISABLED_SplitFCTest_int4_weight_reshape) { -#else -TEST_F(TransformationTestsF, SplitFCTest_int4_weight_reshape) { -#endif - disable_rt_info_check(); - { - auto src = std::make_shared(ov::element::f32, ov::Shape{ 3, 2048, 1 }); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::u4, ov::Shape{ 4096, 2, 1024}, { 12 }); - auto cvt_wgt = std::make_shared(wgt, ov::element::f32); - - auto zp = ov::opset1::Constant::create(ov::element::u4, ov::Shape{1}, { 1 }); - auto cvt_zp = std::make_shared(zp, ov::element::f32); - - auto sub = std::make_shared(cvt_wgt, cvt_zp); - - auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{4096, 2, 1}, {0.2}); - auto mul = std::make_shared(sub, mul_const); - - auto res_const = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {4096, 2048}); - auto reshape = std::make_shared(mul, res_const, false); - - auto fc = std::make_shared(transpose_src, reshape, ov::Rank(3)); - model = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{src}); - manager.register_pass(1); - } - { - auto src = std::make_shared(ov::element::f32, ov::Shape{ 3, 2048, 1 }); - auto transpose_constant_src = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); - auto transpose_src = std::make_shared(src, transpose_constant_src); - - auto wgt = ov::opset1::Constant::create(ov::element::u4, ov::Shape{ 4096, 2, 1024 }, { 12 }); - auto cvt_wgt_i8 = std::make_shared(wgt, ov::element::i8); - - auto split_dim_node = std::make_shared(ov::element::i32, ov::Shape{}, 0); - auto split_length = ov::opset1::Constant::create(ov::element::i32, ov::Shape{2}, {2048, 2048}); - - auto split_wgts = std::make_shared(cvt_wgt_i8, split_dim_node, split_length); - auto cvt_wgt0_u4 = std::make_shared(split_wgts->output(0), ov::element::u4); - auto cvt_wgt1_u4 = std::make_shared(split_wgts->output(1), ov::element::u4); - auto cvt_wgt0_f32 = std::make_shared(cvt_wgt0_u4, ov::element::f32); - auto cvt_wgt1_f32 = std::make_shared(cvt_wgt1_u4, ov::element::f32); - - auto zp = ov::opset1::Constant::create(ov::element::u4, ov::Shape{1}, { 1 }); - auto zp0 = std::make_shared(zp->get_element_type(), zp->get_shape(), zp->get_data_ptr()); - auto zp1 = std::make_shared(zp->get_element_type(), zp->get_shape(), zp->get_data_ptr()); - - auto cvt_zp0 = std::make_shared(zp0, ov::element::f32); - auto cvt_zp1 = std::make_shared(zp1, ov::element::f32); - - auto sub0 = std::make_shared(cvt_wgt0_f32, cvt_zp0); - auto sub1 = std::make_shared(cvt_wgt1_f32, cvt_zp1); - - auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{4096, 2, 1}, {0.2}); - auto split_mul_const = std::make_shared(mul_const, split_dim_node, split_length); - - auto mul0 = std::make_shared(sub0, split_mul_const->output(0)); - auto mul1 = std::make_shared(sub1, split_mul_const->output(1)); - - std::vector reshape_pattern_vec = {2048, 2048}; - auto reshape_pattern = std::make_shared(ov::element::i32, ov::Shape{2}, reshape_pattern_vec); - auto reshape0 = std::make_shared(mul0, reshape_pattern, false); - auto reshape1 = std::make_shared(mul1, reshape_pattern, false); - - auto fc0 = std::make_shared(transpose_src, reshape0, ov::Rank(3)); - auto fc1 = std::make_shared(transpose_src, reshape1, ov::Rank(3)); - - ov::NodeVector concat_args({fc0, fc1}); - constexpr size_t concat_dim = -1; - auto concat = std::make_shared(concat_args, concat_dim); - model_ref = std::make_shared(ov::NodeVector{concat}, ov::ParameterVector{src}); - } -} diff --git a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp index 8ca920d421040f..d781d92b57052a 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp @@ -88,6 +88,7 @@ void TransformationTestsF::TearDown() { ASSERT_TRUE(res.valid) << res.message; comparator.disable(FunctionsComparator::CmpValues::ACCURACY); } + auto res = comparator.compare(model, model_ref); ASSERT_TRUE(res.valid) << res.message; } From 0762993323c509eeffd2cae48492607dac936903 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Tue, 10 Dec 2024 08:41:52 +0100 Subject: [PATCH 30/43] Docs Port for sitemap update to master (#27977) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../openvino_custom_sphinx_sitemap/__init__.py | 2 +- docs/sphinx_setup/_static/js/custom.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py index 6bdd3288f8069c..c578b82c360a53 100644 --- a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py +++ b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py @@ -155,6 +155,6 @@ def extract_hierarchy(link): return ';'.join(hierarchy) def format_segment(segment): - if segment == 'c_cpp_api': segment = 'c_c++_api' + if segment == 'c_cpp_api': segment = 'C/C++_api' return ' '.join(word.capitalize() for word in segment.replace('-', ' ').replace('_', ' ').split()) \ No newline at end of file diff --git a/docs/sphinx_setup/_static/js/custom.js b/docs/sphinx_setup/_static/js/custom.js index 241f8895ee1c61..95f9549959e102 100644 --- a/docs/sphinx_setup/_static/js/custom.js +++ b/docs/sphinx_setup/_static/js/custom.js @@ -189,7 +189,7 @@ function getCurrentVersion() { if (wordAfterDomain === 'cn') { wordAfterDomain = link[2]; } - if (["index.html", "404.html", "", "latest"].indexOf(wordAfterDomain) >= 0) { + if (["index.html", "404.html", ""].indexOf(wordAfterDomain) >= 0) { /* * If this landing page, 404 or domain.com we should get first version * */ @@ -426,7 +426,7 @@ document.addEventListener('DOMContentLoaded', function () { const searchInterfaceSa = document.querySelector("#sa-search"); const searchInterface = document.querySelector("#search"); const currentVersion = getCurrentVersion(); - + await initializeSearchInterface(searchInterfaceSa, currentVersion); await initializeSearchInterface(searchInterface); From be0ab30ac93be815a34ee20a92348b3220bbf5e1 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 10 Dec 2024 11:56:06 +0400 Subject: [PATCH 31/43] [JAX FE] Support square operation (#27978) **Details:** It appears since JAX 0.4.36 **Ticket:** 158994 Signed-off-by: Kazantsev, Roman --- src/frontends/jax/src/op/square.cpp | 28 ++++++++++++++ src/frontends/jax/src/op_table.cpp | 2 + tests/constraints.txt | 6 +-- tests/layer_tests/jax_tests/test_square.py | 44 ++++++++++++++++++++++ 4 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 src/frontends/jax/src/op/square.cpp create mode 100644 tests/layer_tests/jax_tests/test_square.py diff --git a/src/frontends/jax/src/op/square.cpp b/src/frontends/jax/src/op/square.cpp new file mode 100644 index 00000000000000..268debb7992ba8 --- /dev/null +++ b/src/frontends/jax/src/op/square.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/jax/node_context.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/squeeze.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace jax { +namespace op { + +using namespace ov::op; + +OutputVector translate_square(const NodeContext& context) { + num_inputs_check(context, 1, 1); + auto x = context.get_input(0); + auto const_two = create_same_type_const_scalar(x, 2); + return {std::make_shared(x, const_two)}; +}; + +} // namespace op +} // namespace jax +} // namespace frontend +} // namespace ov diff --git a/src/frontends/jax/src/op_table.cpp b/src/frontends/jax/src/op_table.cpp index 98f22452c5afab..3ca58745bc1909 100644 --- a/src/frontends/jax/src/op_table.cpp +++ b/src/frontends/jax/src/op_table.cpp @@ -53,6 +53,7 @@ OP_CONVERTER(translate_reduce_window_sum); OP_CONVERTER(translate_reshape); OP_CONVERTER(translate_rsqrt); OP_CONVERTER(translate_slice); +OP_CONVERTER(translate_square); OP_CONVERTER(translate_squeeze); OP_CONVERTER(translate_transpose); @@ -92,6 +93,7 @@ const std::map get_supported_ops_jaxpr() { {"rsqrt", op::translate_rsqrt}, {"reshape", op::translate_reshape}, {"slice", op::translate_slice}, + {"square", op::translate_square}, {"sqrt", op::translate_1to1_match_1_input}, {"squeeze", op::translate_squeeze}, {"stop_gradient", op::skip_node}, diff --git a/tests/constraints.txt b/tests/constraints.txt index 004a2c65b5e474..4f46cd0cc8b2e9 100644 --- a/tests/constraints.txt +++ b/tests/constraints.txt @@ -21,11 +21,11 @@ pytest>=5.0,<8.4 pytest-dependency==0.5.1 pytest-html==4.1.1 pytest-timeout==2.3.1 -jax<=0.4.35 -jaxlib<=0.4.35 +jax<=0.4.36 +jaxlib<=0.4.36 kornia==0.7.0 networkx<=3.3 -flax<=0.10.0 +flax<=0.10.2 --extra-index-url https://download.pytorch.org/whl/cpu torch~=2.5.1; platform_system != "Darwin" or platform_machine != "x86_64" diff --git a/tests/layer_tests/jax_tests/test_square.py b/tests/layer_tests/jax_tests/test_square.py new file mode 100644 index 00000000000000..32e842d182e90e --- /dev/null +++ b/tests/layer_tests/jax_tests/test_square.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import jax +import numpy as np +import pytest +from jax import numpy as jnp + +from jax_layer_test_class import JaxLayerTest + +rng = np.random.default_rng(34455) + + +class TestSquare(JaxLayerTest): + def _prepare_input(self): + if np.issubdtype(self.input_type, np.floating): + x = rng.uniform(-8.0, 8.0, self.input_shape).astype(self.input_type) + elif np.issubdtype(self.input_type, np.signedinteger): + x = rng.integers(-8, 8, self.input_shape).astype(self.input_type) + else: + x = rng.integers(0, 8, self.input_shape).astype(self.input_type) + x = jnp.array(x) + return [x] + + def create_model(self, input_shape, input_type): + self.input_shape = input_shape + self.input_type = input_type + + def jax_square(x): + return jax.numpy.square(x) + + return jax_square, None, None + + @pytest.mark.parametrize("input_shape", [[2], [3, 4]]) + @pytest.mark.parametrize("input_type", [np.int8, np.uint8, np.int16, np.uint16, + np.int32, np.uint32, np.int64, np.uint64, + np.float16, np.float32, np.float64]) + @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.precommit_jax_fe + def test_square(self, ie_device, precision, ir_version, input_shape, input_type): + self._test(*self.create_model(input_shape, input_type), + ie_device, precision, + ir_version) From 1b480cba7b8bd55a46345868567af6f864e9ff39 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Tue, 10 Dec 2024 13:23:17 +0100 Subject: [PATCH 32/43] New plugins property to pass mmap buffer (#27981) ### Details: - Replacement for https://github.com/openvinotoolkit/openvino/pull/27644 ### Tickets: - CVS-154602 - CVS-157192 --- .../openvino/runtime/internal_properties.hpp | 7 + src/inference/src/cache_manager.hpp | 6 +- src/inference/src/dev/core_impl.cpp | 5 +- .../tests/functional/caching_test.cpp | 136 ++++++++++++++++++ src/plugins/intel_cpu/src/plugin.cpp | 9 +- src/plugins/intel_cpu/src/utils/serialize.cpp | 13 +- src/plugins/intel_cpu/src/utils/serialize.hpp | 7 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 6 + 8 files changed, 178 insertions(+), 11 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/internal_properties.hpp b/src/inference/dev_api/openvino/runtime/internal_properties.hpp index 60d6b66cfda897..bec304104581ac 100644 --- a/src/inference/dev_api/openvino/runtime/internal_properties.hpp +++ b/src/inference/dev_api/openvino/runtime/internal_properties.hpp @@ -9,6 +9,7 @@ #pragma once +#include "openvino/runtime/aligned_buffer.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/threading/istreams_executor.hpp" @@ -36,6 +37,12 @@ static constexpr Property, PropertyMutability::RO> cac */ static constexpr Property caching_with_mmap{"CACHING_WITH_MMAP"}; +/** + * @brief Property to get a ov::AlignedBuffer with cached model + * @ingroup ov_dev_api_plugin_api + */ +static constexpr Property, PropertyMutability::RW> cached_model_buffer{"CACHED_MODEL_BUFFER"}; + /** * @brief Allow to create exclusive_async_requests with one executor * @ingroup ov_dev_api_plugin_api diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index c441811c3cfd02..82813e5dd4788f 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -69,7 +69,7 @@ class ICacheManager { /** * @brief Function passing created input stream */ - using StreamReader = std::function; + using StreamReader = std::function)>; /** * @brief Callback when OpenVINO intends to read model from cache @@ -143,10 +143,10 @@ class FileStorageCacheManager final : public ICacheManager { std::make_shared>>(mmap->data(), mmap->size(), mmap); OwningSharedStreamBuffer buf(shared_buffer); std::istream stream(&buf); - reader(stream); + reader(stream, shared_buffer); } else { std::ifstream stream(blob_file_name, std::ios_base::binary); - reader(stream); + reader(stream, nullptr); } } } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 244d27b5eebb67..673f6fd569a11e 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1413,7 +1413,7 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( cacheContent.blobId, coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap), - [&](std::istream& networkStream) { + [&](std::istream& networkStream, std::shared_ptr model_buffer) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); @@ -1459,6 +1459,9 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( update_config[ov::weights_path.name()] = weights_path; } } + if (model_buffer) { + update_config[ov::internal::cached_model_buffer.name()] = model_buffer; + } compiled_model = context ? plugin.import_model(networkStream, context, update_config) : plugin.import_model(networkStream, update_config); }); diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp index 5b01af9a22cde8..6b1c7f938ae731 100644 --- a/src/inference/tests/functional/caching_test.cpp +++ b/src/inference/tests/functional/caching_test.cpp @@ -2424,6 +2424,142 @@ TEST_P(CachingTest, Load_threads) { std::cout << "Caching Load multiple threads test completed. Tried " << index << " times" << std::endl; } +TEST_P(CachingTest, Load_mmap) { + ON_CALL(*mockPlugin, import_model(_, _)).WillByDefault(Invoke([&](std::istream& istr, const ov::AnyMap& config) { + if (m_checkConfigCb) { + m_checkConfigCb(config); + } + std::shared_ptr model_buffer; + if (config.count(ov::internal::cached_model_buffer.name())) + model_buffer = config.at(ov::internal::cached_model_buffer.name()).as>(); + EXPECT_TRUE(model_buffer); + + std::string name; + istr >> name; + char space; + istr.read(&space, 1); + std::lock_guard lock(mock_creation_mutex); + return create_mock_compiled_model(m_models[name], mockPlugin); + })); + + ON_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)) + .WillByDefault(Invoke([&](const std::string&, const ov::AnyMap&) { + return std::vector{ov::internal::caching_properties.name(), + ov::internal::caching_with_mmap.name()}; + })); + EXPECT_CALL(*mockPlugin, get_property(_, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); + if (m_remoteContext) { + return; // skip the remote Context test for Multi plugin + } + int index = 0; + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + MkDirGuard guard(m_cacheDir); + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(1); + testLoad([&](ov::Core& core) { + core.set_property({{ov::cache_dir.name(), m_cacheDir}}); + m_testFunction(core); + m_testFunction(core); + }); + std::cout << "Caching Load multiple threads test completed. Tried " << index << " times" << std::endl; +} + +TEST_P(CachingTest, Load_mmap_is_disabled) { + ON_CALL(*mockPlugin, import_model(_, _)).WillByDefault(Invoke([&](std::istream& istr, const ov::AnyMap& config) { + if (m_checkConfigCb) { + m_checkConfigCb(config); + } + std::shared_ptr model_buffer; + if (config.count(ov::internal::cached_model_buffer.name())) + model_buffer = config.at(ov::internal::cached_model_buffer.name()).as>(); + EXPECT_FALSE(model_buffer); + + std::string name; + istr >> name; + char space; + istr.read(&space, 1); + std::lock_guard lock(mock_creation_mutex); + return create_mock_compiled_model(m_models[name], mockPlugin); + })); + ON_CALL(*mockPlugin, get_property(ov::internal::supported_properties.name(), _)) + .WillByDefault(Invoke([&](const std::string&, const ov::AnyMap&) { + return std::vector{ov::internal::caching_properties.name(), + ov::internal::caching_with_mmap.name()}; + })); + EXPECT_CALL(*mockPlugin, get_property(_, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); + if (m_remoteContext) { + return; // skip the remote Context test for Multi plugin + } + int index = 0; + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + MkDirGuard guard(m_cacheDir); + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(1); + testLoad([&](ov::Core& core) { + core.set_property({{ov::cache_dir.name(), m_cacheDir}}); + core.set_property({ov::enable_mmap(false)}); + m_testFunction(core); + m_testFunction(core); + }); + std::cout << "Caching Load multiple threads test completed. Tried " << index << " times" << std::endl; +} + +TEST_P(CachingTest, Load_mmap_is_not_supported_by_plugin) { + ON_CALL(*mockPlugin, import_model(_, _)).WillByDefault(Invoke([&](std::istream& istr, const ov::AnyMap& config) { + if (m_checkConfigCb) { + m_checkConfigCb(config); + } + std::shared_ptr model_buffer; + if (config.count(ov::internal::cached_model_buffer.name())) + model_buffer = config.at(ov::internal::cached_model_buffer.name()).as>(); + EXPECT_FALSE(model_buffer); + + std::string name; + istr >> name; + char space; + istr.read(&space, 1); + std::lock_guard lock(mock_creation_mutex); + return create_mock_compiled_model(m_models[name], mockPlugin); + })); + EXPECT_CALL(*mockPlugin, get_property(_, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber()); + EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); + if (m_remoteContext) { + return; // skip the remote Context test for Multi plugin + } + int index = 0; + m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { + EXPECT_CALL(net, export_model(_)).Times(1); + }); + MkDirGuard guard(m_cacheDir); + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(_, _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(_, _)).Times(1); + testLoad([&](ov::Core& core) { + core.set_property({{ov::cache_dir.name(), m_cacheDir}}); + core.set_property({ov::enable_mmap(true)}); + m_testFunction(core); + m_testFunction(core); + }); + std::cout << "Caching Load multiple threads test completed. Tried " << index << " times" << std::endl; +} + #if defined(ENABLE_OV_IR_FRONTEND) static std::string getTestCaseName(const testing::TestParamInfo>& obj) { diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 6fdbf7a4ea4dee..33cb87b337bfef 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -565,8 +565,16 @@ std::shared_ptr Plugin::import_model(std::istream& model_str decript_from_string = true; } + auto _config = config; + std::shared_ptr model_buffer; + if (_config.count(ov::internal::cached_model_buffer.name())) { + model_buffer = _config.at(ov::internal::cached_model_buffer.name()).as>(); + _config.erase(ov::internal::cached_model_buffer.name()); + } + ModelDeserializer deserializer( model_stream, + model_buffer, [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); }, @@ -579,7 +587,6 @@ std::shared_ptr Plugin::import_model(std::istream& model_str Config::ModelType modelType = getModelType(model); conf.applyRtInfo(model); // check ov::loaded_from_cache property and erase it to avoid exception in readProperties. - auto _config = config; const auto& it = _config.find(ov::loaded_from_cache.name()); bool loaded_from_cache = false; if (it != _config.end()) { diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 814e8d19311a8c..33d8140fbe4a84 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -30,8 +30,12 @@ void ModelSerializer::operator<<(const std::shared_ptr& model) { ////////// ModelDeserializer ////////// -ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string) - : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string) { +ModelDeserializer::ModelDeserializer(std::istream& model_stream, + std::shared_ptr model_buffer, + ModelBuilder fn, + const CacheDecrypt& decrypt_fn, + bool decript_from_string) + : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string), m_model_buffer(model_buffer) { if (m_decript_from_string) { m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; } else { @@ -42,9 +46,8 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (auto mmap_buffer = dynamic_cast(m_istream.rdbuf())) { - auto buffer = mmap_buffer->get_buffer(); - process_mmap(model, buffer); + if (m_model_buffer) { + process_mmap(model, m_model_buffer); } else { process_stream(model); } diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 897a2c2e52f092..4dfdd6b22afbd4 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -31,7 +31,11 @@ class ModelDeserializer { public: typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; - ModelDeserializer(std::istream& model, ModelBuilder fn, const CacheDecrypt& encrypt_fn, bool decript_from_string); + ModelDeserializer(std::istream& model, + std::shared_ptr model_buffer, + ModelBuilder fn, + const CacheDecrypt& encrypt_fn, + bool decript_from_string); virtual ~ModelDeserializer() = default; @@ -48,6 +52,7 @@ class ModelDeserializer { ModelBuilder m_model_builder; CacheDecrypt m_cache_decrypt; bool m_decript_from_string; + std::shared_ptr m_model_buffer; }; } // namespace intel_cpu diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index c8839472a6d962..9f9c9692b57b42 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -329,6 +329,12 @@ std::shared_ptr Plugin::import_model(std::istream& model, _orig_config.erase(it); } + std::shared_ptr model_buffer; + if (_orig_config.count(ov::internal::cached_model_buffer.name())) { + model_buffer = _orig_config.at(ov::internal::cached_model_buffer.name()).as>(); + _orig_config.erase(ov::internal::cached_model_buffer.name()); + } + ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(_orig_config); config.apply_user_properties(context_impl->get_engine().get_device_info()); From 348469ffca697c7b2401814e3938f104c28c31fe Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Tue, 10 Dec 2024 13:25:13 +0100 Subject: [PATCH 33/43] added code to dump start and end manager time (#27884) ### Details: - modify class Profiler, add additional time info dumping ### Tickets: - 157579 Co-authored-by: Ivan Tikhonov --- src/core/src/pass/manager.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/core/src/pass/manager.cpp b/src/core/src/pass/manager.cpp index 9168292f5284c0..a6f1fc287e221c 100644 --- a/src/core/src/pass/manager.cpp +++ b/src/core/src/pass/manager.cpp @@ -104,8 +104,8 @@ class stopwatch { void stop() { if (m_active) { - auto end_time = m_clock.now(); - m_last_time = end_time - m_start_time; + m_end_time = m_clock.now(); + m_last_time = m_end_time - m_start_time; m_active = false; } } @@ -122,9 +122,17 @@ class stopwatch { return std::chrono::duration_cast(get_timer_value()).count(); } + std::chrono::nanoseconds get_start_time() const { + return std::chrono::duration_cast(m_start_time.time_since_epoch()); + } + + std::chrono::nanoseconds get_end_time() const { + return std::chrono::duration_cast(m_end_time.time_since_epoch()); + } + private: std::chrono::high_resolution_clock m_clock; - std::chrono::time_point m_start_time; + std::chrono::time_point m_start_time, m_end_time; bool m_active = false; std::chrono::nanoseconds m_last_time = std::chrono::high_resolution_clock::duration::zero(); }; @@ -221,6 +229,8 @@ class Profiler { if (is_pass_manager) { m_file << "m;" << name << ";" << stopwatch.get_timer_value().count() << ";" << (applied ? "1" : "0") << std::endl; + m_file << "m_start;" << name << ";" << stopwatch.get_start_time().count() << std::endl; + m_file << "m_end;" << name << ";" << stopwatch.get_end_time().count() << std::endl; } else { m_file << "t;" << name << ";" << m_manager_name << ";" << stopwatch.get_timer_value().count() << ";" << (applied ? "1" : "0") << std::endl; From 59188b77a95b50e68265317823668c46927ec1f5 Mon Sep 17 00:00:00 2001 From: Andrii Staikov Date: Tue, 10 Dec 2024 13:48:42 +0100 Subject: [PATCH 34/43] [TRANSFORMATIONS] Allow NOP-elimination for shapes with dynamic 0th dimension (#27939) NOP-elimination doesn't allow the removal of unnecessary data-movement operations stacked on top of each other if their shapes are dynamic, even though certain cases of dynamic shapes can be covered by NOP-elimination. Allow NOP-elimination for dynamic shapes if they have only the 0th dimension dynamic and other static, making this case compatible for such a fusion. - Tickets: * CVS-158394 Signed-off-by: Andrii Staikov --------- Signed-off-by: Andrii Staikov --- .../common_optimizations/nop_elimination.cpp | 40 +++++++++++++++---- .../common_optimizations/nop_elimination.cpp | 32 +++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp index 8b26d74dc2aac6..3883e94b74d33c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp @@ -132,6 +132,23 @@ static bool eliminate_nop(const shared_ptr& node) { return false; } +// Check if first dim is dynamic, other dims are static +static bool only_first_dim_dynamic(const PartialShape& pshape) { + if (pshape.rank().is_static() && pshape.size() > 0) { + if (pshape[0].is_dynamic()) { + for (size_t i = 1; i < pshape.size(); ++i) { + if (pshape[i].is_dynamic()) { + return false; + } + } + + return true; + } + } + + return false; +} + static bool eliminate_reshape_v1(const shared_ptr& node) { auto input = node->input_value(0); @@ -139,14 +156,17 @@ static bool eliminate_reshape_v1(const shared_ptr& node) { if (input.get_partial_shape().same_scheme(node->get_output_partial_shape(0))) return replace_output_update_name(node->output(0), input); } - // check if reshape is not identity op if (input.get_partial_shape().is_dynamic() || node->get_output_partial_shape(0).is_dynamic()) { - OPENVINO_DEBUG(node, " has dynamic shapes."); - return false; + if (!only_first_dim_dynamic(input.get_partial_shape()) || + !only_first_dim_dynamic(node->get_output_partial_shape(0))) { + OPENVINO_DEBUG(node, " has dynamic shapes with not only 0th dimension dynamic."); + return false; + } } + // remove identity op - if (input.get_shape() == node->get_output_shape(0)) { + if (input.get_partial_shape() == node->get_output_partial_shape(0)) { return replace_output_update_name(node->output(0), input); } // eliminate redundant reshape, squeeze, or unsqueeze @@ -156,15 +176,19 @@ static bool eliminate_reshape_v1(const shared_ptr& node) { if (input_node->get_output_target_inputs(0).size() != 1) return false; - auto shape = node->get_output_shape(0); + auto shape = node->get_output_partial_shape(0); // remove interchangeable nodes - if (input_node->get_input_partial_shape(0).is_static() && input_node->get_input_shape(0) == shape) { + if (input_node->get_input_partial_shape(0).is_static() && + input_node->get_input_partial_shape(0) == node->get_output_partial_shape(0)) { return replace_output_update_name(node->output(0), input_node->input_value(0)); } else { vector vi; - vi.assign(shape.begin(), shape.end()); - auto pat = ov::op::v0::Constant::create(element::i64, Shape{vi.size()}, vi); + vi.reserve(shape.size()); + for (const auto& dim : shape) { + vi.push_back(dim.is_dynamic() ? -1 : dim.get_length()); + } + auto pat = ov::op::v0::Constant::create(element::i64, Shape{shape.size()}, vi); auto new_reshape = make_shared(input.get_node()->input_value(0), pat, false); new_reshape->set_friendly_name(node->get_friendly_name()); copy_runtime_info({input_node, node}, new_reshape); diff --git a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp index 19b5fefd79b9b0..1245bd26e0d3b2 100644 --- a/src/common/transformations/tests/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/tests/common_optimizations/nop_elimination.cpp @@ -233,6 +233,38 @@ TEST(nop_elimination, squeeze_unsqueeze_elimination_dynamic_without_squeeze_axis EXPECT_NO_THROW(pass_manager.run_passes(f)); } +TEST_F(TransformationTestsF, reshape_reshape_elimination_v1_dynamic) { + { + auto input = make_shared(element::f32, PartialShape({-1, 32, 1, 128})); + + auto top_reshape_const = op::v0::Constant::create(element::i32, Shape{4}, {-1, 32, 1, 128}); + auto top_reshape = std::make_shared(input, top_reshape_const, false); + + auto bottom_reshape_const = op::v0::Constant::create(element::i32, Shape{2}, {-1, 4096}); + auto bottom_reshape = std::make_shared(top_reshape, bottom_reshape_const, false); + + auto add_param = make_shared(element::f32, PartialShape({-1, 4096})); + auto add = std::make_shared(bottom_reshape, add_param); + model = std::make_shared(NodeVector{add}, ParameterVector{input, add_param}); + } + { + auto input = make_shared(element::f32, PartialShape({-1, 32, 1, 128})); + + auto bottom_reshape_const = op::v0::Constant::create(element::i32, Shape{2}, {-1, 4096}); + auto bottom_reshape = std::make_shared(input, bottom_reshape_const, false); + + auto add_param = make_shared(element::f32, PartialShape({-1, 4096})); + auto add = std::make_shared(bottom_reshape, add_param); + model_ref = std::make_shared(NodeVector{add}, ParameterVector{input, add_param}); + } + + manager.register_pass(); + manager.run_passes(model); + + auto res = comparator.compare(model, model_ref); + ASSERT_TRUE(res.valid) << res.message; +} + TEST(nop_elimination, reshape_elimination_v1_dynamic_negative) { auto arg = std::make_shared(element::i64, PartialShape::dynamic()); auto pattern = make_shared(element::i64, PartialShape::dynamic(1)); From 535f807ea45c9c6e8e6cbe198ba12b215a4d59d8 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Tue, 10 Dec 2024 13:57:16 +0100 Subject: [PATCH 35/43] [DOCS] Updating notebooks (#28002) --- docs/nbdoc/consts.py | 2 +- .../3D-pose-estimation-with-output.rst | 61 +- ...-segmentation-point-clouds-with-output.rst | 6 +- ...entation-point-clouds-with-output_11_1.png | 3 + .../action-recognition-webcam-with-output.rst | 6 +- ...on-recognition-webcam-with-output_22_0.png | 4 +- docs/notebooks/all_notebooks_paths.txt | 12 +- docs/notebooks/animate-anyone-with-output.rst | 263 ++++---- docs/notebooks/async-api-with-output.rst | 63 +- .../async-api-with-output_23_0.png | 4 +- docs/notebooks/auto-device-with-output.rst | 48 +- .../auto-device-with-output_14_0.png | 3 + .../auto-device-with-output_27_0.png | 4 +- .../auto-device-with-output_28_0.png | 4 +- ...visual-language-processing-with-output.rst | 4 +- docs/notebooks/catvton-with-output.rst | 53 +- ...ontrolnet-stable-diffusion-with-output.rst | 7 +- .../convert-to-openvino-with-output.rst | 8 +- .../convnext-classification-with-output.rst | 2 +- ...ss-lingual-books-alignment-with-output.rst | 6 +- ...segmentation-quantize-nncf-with-output.rst | 70 +-- ...ntation-quantize-nncf-with-output_37_1.png | 4 +- ...ddcolor-image-colorization-with-output.rst | 46 +- .../depth-anything-v2-with-output.rst | 52 +- docs/notebooks/depth-anything-with-output.rst | 32 +- .../detectron2-to-openvino-with-output.rst | 152 ++--- ...etectron2-to-openvino-with-output_22_0.jpg | 4 +- ...etectron2-to-openvino-with-output_22_0.png | 4 +- ...etectron2-to-openvino-with-output_32_0.jpg | 4 +- ...etectron2-to-openvino-with-output_32_0.png | 4 +- ...micrafter-animating-images-with-output.rst | 339 +++++++---- docs/notebooks/efficient-sam-with-output.rst | 88 +-- .../efficient-sam-with-output_17_1.png | 4 +- .../efficient-sam-with-output_25_1.png | 4 +- .../efficient-sam-with-output_36_1.png | 4 +- .../encodec-audio-compression-with-output.rst | 20 +- .../fast-segment-anything-with-output.rst | 37 +- docs/notebooks/florence2-with-output.rst | 38 +- .../florence2-with-output_18_0.png | 4 +- .../freevc-voice-conversion-with-output.rst | 42 +- docs/notebooks/glm-edge-v-with-output.rst | 516 ++++++++++++++++ .../glm-edge-v-with-output_13_1.jpg | 3 + .../glm-edge-v-with-output_13_1.png | 3 + .../grounded-segment-anything-with-output.rst | 26 +- .../notebooks/handwritten-ocr-with-output.rst | 8 +- .../notebooks/hello-detection-with-output.rst | 6 +- .../hello-detection-with-output_11_0.png | 3 + .../hello-segmentation-with-output.rst | 10 +- .../hello-segmentation-with-output_11_1.png | 3 + docs/notebooks/hello-world-with-output.rst | 8 +- .../hello-world-with-output_11_0.png | 3 + .../hugging-face-hub-with-output.rst | 101 +-- ...nyuan-dit-image-generation-with-output.rst | 1 + ...lassification-quantization-with-output.rst | 44 +- docs/notebooks/instant-id-with-output.rst | 2 +- ...anus-multimodal-generation-with-output.rst | 472 +++++++++++++++ ...multimodal-generation-with-output_14_1.jpg | 3 + ...multimodal-generation-with-output_14_1.png | 3 + ...multimodal-generation-with-output_18_0.jpg | 3 + ...multimodal-generation-with-output_18_0.png | 3 + ...classification-to-openvino-with-output.rst | 354 +++++++++++ ...ification-to-openvino-with-output_16_0.jpg | 3 + ...ification-to-openvino-with-output_16_0.png | 3 + .../knowledge-graphs-conve-with-output.rst | 40 +- ...modal-large-language-model-with-output.rst | 30 +- ...-large-language-model-with-output_29_1.jpg | 4 +- ...-large-language-model-with-output_29_1.png | 4 +- ...-large-language-model-with-output_48_1.png | 4 +- ...l-large-language-model-with-output_8_0.jpg | 4 +- ...l-large-language-model-with-output_8_0.png | 4 +- .../language-quantize-bert-with-output.rst | 67 +- ...a-multimodal-chatbot-genai-with-output.rst | 2 +- ...va-next-multimodal-chatbot-with-output.rst | 6 +- .../llm-agent-rag-llamaindex-with-output.rst | 2 +- .../llm-agent-react-langchain-with-output.rst | 236 ++++---- .../notebooks/llm-agent-react-with-output.rst | 86 ++- .../llm-chatbot-generate-api-with-output.rst | 83 ++- docs/notebooks/llm-chatbot-with-output.rst | 15 +- docs/notebooks/localai-with-output.rst | 220 +++++++ ...a-content-type-recognition-with-output.rst | 2 +- docs/notebooks/meter-reader-with-output.rst | 8 +- ...nicpm-v-multimodal-chatbot-with-output.rst | 2 +- .../mobileclip-video-search-with-output.rst | 97 ++- ...bileclip-video-search-with-output_12_0.png | 3 + .../modelscope-to-openvino-with-output.rst | 560 +++++++++++++++++ ...odelscope-to-openvino-with-output_12_0.jpg | 3 + ...odelscope-to-openvino-with-output_12_0.png | 3 + .../music-generation-with-output.rst | 12 +- ...o-llava-multimodal-chatbot-with-output.rst | 21 +- .../notebooks_with_binder_buttons.txt | 1 - .../notebooks_with_colab_buttons.txt | 2 - .../object-detection-with-output.rst | 14 +- docs/notebooks/omniparser-with-output.rst | 79 ++- docs/notebooks/openvino-api-with-output.rst | 56 +- docs/notebooks/openvoice-with-output.rst | 85 ++- ...ical-character-recognition-with-output.rst | 6 +- .../optimize-preprocessing-with-output.rst | 14 +- .../outetts-text-to-speech-with-output.rst | 367 +++++++++++ .../paddle-ocr-webcam-with-output.rst | 8 +- .../paddle-ocr-webcam-with-output_30_0.png | 4 +- ...to-openvino-classification-with-output.rst | 18 +- .../parler-tts-text-to-speech-with-output.rst | 221 ++++++- .../notebooks/person-tracking-with-output.rst | 133 ++-- .../person-tracking-with-output_25_0.png | 4 +- docs/notebooks/phi-3-vision-with-output.rst | 31 +- docs/notebooks/photo-maker-with-output.rst | 41 +- docs/notebooks/pixart-with-output.rst | 48 +- .../pixart-with-output_6_0.jpg | 4 +- .../pixart-with-output_6_0.png | 4 +- docs/notebooks/pixtral-with-output.rst | 65 +- .../notebooks/pose-estimation-with-output.rst | 4 +- .../pose-estimation-with-output_22_0.png | 4 +- .../pytorch-onnx-to-openvino-with-output.rst | 16 +- ...training-quantization-nncf-with-output.rst | 118 ++-- ...uantization-aware-training-with-output.rst | 89 +-- ...on-sparsity-aware-training-with-output.rst | 365 ++++++----- .../pytorch-to-openvino-with-output.rst | 14 +- docs/notebooks/qrcode-monster-with-output.rst | 110 ++-- .../qrcode-monster-with-output_22_1.jpg | 4 +- .../qrcode-monster-with-output_22_1.png | 4 +- .../qrcode-monster-with-output_39_0.png | 4 +- docs/notebooks/qwen2-audio-with-output.rst | 25 +- docs/notebooks/qwen2-vl-with-output.rst | 79 ++- .../rmbg-background-removal-with-output.rst | 6 +- .../segment-anything-2-image-with-output.rst | 39 +- ...ment-anything-2-image-with-output_92_0.png | 3 + .../segment-anything-2-video-with-output.rst | 50 +- ...ment-anything-2-video-with-output_40_1.png | 4 +- ...ment-anything-2-video-with-output_46_0.png | 4 +- ...-shot-image-classification-with-output.rst | 10 +- ...-image-classification-with-output_24_1.png | 4 +- ...tch-to-image-pix2pix-turbo-with-output.rst | 50 +- ...o-image-pix2pix-turbo-with-output_19_0.jpg | 4 +- ...o-image-pix2pix-turbo-with-output_19_0.png | 4 +- .../sparsity-optimization-with-output.rst | 44 +- .../speculative-sampling-with-output.rst | 32 +- ...tion-quantization-wav2vec2-with-output.rst | 136 ++--- ...hbrain-emotion-recognition-with-output.rst | 26 +- ...e-cascade-image-generation-with-output.rst | 12 +- ...cade-image-generation-with-output_29_2.jpg | 4 +- ...cade-image-generation-with-output_29_2.png | 4 +- ...scade-image-generation-with-output_8_2.jpg | 3 + ...scade-image-generation-with-output_8_2.png | 3 + ...table-diffusion-ip-adapter-with-output.rst | 38 +- ...-diffusion-ip-adapter-with-output_22_1.png | 4 +- ...-diffusion-ip-adapter-with-output_25_0.png | 4 +- ...-diffusion-ip-adapter-with-output_28_0.png | 4 +- ...fusion-torchdynamo-backend-with-output.rst | 9 +- ...able-diffusion-v3-torch-fx-with-output.rst | 562 +++++++++++++++++ .../stable-diffusion-xl-with-output.rst | 6 +- docs/notebooks/style-transfer-with-output.rst | 6 +- .../style-transfer-with-output_25_0.png | 4 +- .../table-question-answering-with-output.rst | 56 +- ...fication-nncf-quantization-with-output.rst | 573 ++++++++++++++++++ ...ion-nncf-quantization-with-output_10_1.png | 3 + ...ion-nncf-quantization-with-output_27_1.png | 3 + ...tion-nncf-quantization-with-output_9_1.png | 3 + ...classification-to-openvino-with-output.rst | 6 +- ...ification-to-openvino-with-output_19_0.png | 3 + ...e-segmentation-to-openvino-with-output.rst | 10 +- ...mentation-to-openvino-with-output_39_0.png | 4 +- ...ject-detection-to-openvino-with-output.rst | 23 +- ...detection-to-openvino-with-output_38_0.png | 4 +- .../text-to-image-genai-with-output.rst | 62 +- ...tflite-selfie-segmentation-with-output.rst | 34 +- ...e-selfie-segmentation-with-output_33_0.png | 4 +- .../tflite-to-openvino-with-output.rst | 24 +- .../tiny-sd-image-generation-with-output.rst | 199 +++--- ...-detection-and-recognition-with-output.rst | 10 +- .../vision-background-removal-with-output.rst | 22 +- .../vision-monodepth-with-output.rst | 39 +- docs/notebooks/wav2lip-with-output.rst | 64 +- ...isper-subtitles-generation-with-output.rst | 19 +- .../yolov10-optimization-with-output.rst | 11 - ...ov11-instance-segmentation-with-output.rst | 98 ++- ...instance-segmentation-with-output_10_1.jpg | 3 + ...instance-segmentation-with-output_10_1.png | 3 + ...instance-segmentation-with-output_46_0.png | 4 +- ...yolov11-keypoint-detection-with-output.rst | 82 ++- ...11-keypoint-detection-with-output_43_0.png | 4 +- .../yolov11-object-detection-with-output.rst | 109 ++-- ...ov11-object-detection-with-output_10_1.jpg | 3 + ...ov11-object-detection-with-output_10_1.png | 3 + ...ov11-object-detection-with-output_43_0.png | 4 +- ...tion-with-accuracy-control-with-output.rst | 2 +- docs/notebooks/yolov8-obb-with-output.rst | 1 - .../yolov8-object-detection-with-output.rst | 12 +- .../yolov9-optimization-with-output.rst | 55 +- .../yolov9-optimization-with-output_36_0.png | 4 +- 189 files changed, 6678 insertions(+), 2619 deletions(-) create mode 100644 docs/notebooks/3D-segmentation-point-clouds-with-output_files/3D-segmentation-point-clouds-with-output_11_1.png create mode 100644 docs/notebooks/auto-device-with-output_files/auto-device-with-output_14_0.png create mode 100644 docs/notebooks/glm-edge-v-with-output.rst create mode 100644 docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.jpg create mode 100644 docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png create mode 100644 docs/notebooks/hello-detection-with-output_files/hello-detection-with-output_11_0.png create mode 100644 docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png create mode 100644 docs/notebooks/hello-world-with-output_files/hello-world-with-output_11_0.png create mode 100644 docs/notebooks/janus-multimodal-generation-with-output.rst create mode 100644 docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.jpg create mode 100644 docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.png create mode 100644 docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_18_0.jpg create mode 100644 docs/notebooks/janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_18_0.png create mode 100644 docs/notebooks/jax-classification-to-openvino-with-output.rst create mode 100644 docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg create mode 100644 docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png create mode 100644 docs/notebooks/localai-with-output.rst create mode 100644 docs/notebooks/mobileclip-video-search-with-output_files/mobileclip-video-search-with-output_12_0.png create mode 100644 docs/notebooks/modelscope-to-openvino-with-output.rst create mode 100644 docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg create mode 100644 docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png create mode 100644 docs/notebooks/outetts-text-to-speech-with-output.rst create mode 100644 docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_92_0.png create mode 100644 docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.jpg create mode 100644 docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.png create mode 100644 docs/notebooks/stable-diffusion-v3-torch-fx-with-output.rst create mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output.rst create mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png create mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_27_1.png create mode 100644 docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png create mode 100644 docs/notebooks/tensorflow-classification-to-openvino-with-output_files/tensorflow-classification-to-openvino-with-output_19_0.png create mode 100644 docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_1.jpg create mode 100644 docs/notebooks/yolov11-instance-segmentation-with-output_files/yolov11-instance-segmentation-with-output_10_1.png create mode 100644 docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_1.jpg create mode 100644 docs/notebooks/yolov11-object-detection-with-output_files/yolov11-object-detection-with-output_10_1.png diff --git a/docs/nbdoc/consts.py b/docs/nbdoc/consts.py index 1a4d3a13049041..e5c5d4773dce4c 100644 --- a/docs/nbdoc/consts.py +++ b/docs/nbdoc/consts.py @@ -6,7 +6,7 @@ repo_owner = "openvinotoolkit" repo_name = "openvino_notebooks" repo_branch = "tree/main" -artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241120220806/dist/rst_files/" +artifacts_link = "http://repository.toolbox.iotg.sclab.intel.com/projects/ov-notebook/0.1.0-latest/20241209220902/dist/rst_files/" blacklisted_extensions = ['.xml', '.bin'] notebooks_repo = "https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/" notebooks_binder = "https://mybinder.org/v2/gh/openvinotoolkit/openvino_notebooks/HEAD?filepath=" diff --git a/docs/notebooks/3D-pose-estimation-with-output.rst b/docs/notebooks/3D-pose-estimation-with-output.rst index 9e09d96094fc78..7959bf48a75e45 100644 --- a/docs/notebooks/3D-pose-estimation-with-output.rst +++ b/docs/notebooks/3D-pose-estimation-with-output.rst @@ -113,58 +113,57 @@ Lab instead.** Collecting torch Using cached https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp38-cp38-linux_x86_64.whl (194.9 MB) Collecting tqdm - Using cached tqdm-4.67.0-py3-none-any.whl.metadata (57 kB) - Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) + Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB) + Requirement already satisfied: ipywidgets>=7.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (8.1.5) Collecting ipydatawidgets>=1.1.1 (from pythreejs) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl.metadata (1.4 kB) Collecting numpy (from pythreejs) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB) - Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) + Requirement already satisfied: traitlets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pythreejs) (5.14.3) Collecting openvino-telemetry>=2023.2.1 (from openvino>=2024.4.0) Using cached openvino_telemetry-2024.5.0-py3-none-any.whl.metadata (2.3 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.2) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2024.4.0) (24.2) Collecting filelock (from torch) Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB) - Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) Collecting sympy (from torch) Using cached sympy-1.13.3-py3-none-any.whl.metadata (12 kB) Collecting networkx (from torch) Using cached https://download.pytorch.org/whl/networkx-3.2.1-py3-none-any.whl (1.6 MB) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) Collecting fsspec (from torch) Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB) Collecting traittypes>=0.2.0 (from ipydatawidgets>=1.1.1->pythreejs) Using cached traittypes-0.2.1-py2.py3-none-any.whl.metadata (1.0 kB) - Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) - Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) - Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) - Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: comm>=0.1.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (0.2.2) + Requirement already satisfied: ipython>=6.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (8.12.3) + Requirement already satisfied: widgetsnbextension~=4.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (4.0.13) + Requirement already satisfied: jupyterlab-widgets~=3.0.12 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipywidgets>=7.2.1->pythreejs) (3.0.13) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) INFO: pip is looking at multiple versions of networkx to determine which version is compatible with other requirements. This could take a while. Collecting networkx (from torch) Using cached networkx-3.1-py3-none-any.whl.metadata (5.3 kB) Collecting mpmath<1.4,>=1.1.0 (from sympy->torch) Using cached https://download.pytorch.org/whl/mpmath-1.3.0-py3-none-any.whl (536 kB) - Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) - Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) - Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.2) - Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) - Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) - Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) - Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) - Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) - Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) - Requirement already satisfied: parso<0.9.0,>=0.8.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) - Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) - Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) - Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) - Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.4.1) - Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) - Requirement already satisfied: six>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (1.16.0) + Requirement already satisfied: backcall in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.0) + Requirement already satisfied: decorator in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (5.1.1) + Requirement already satisfied: jedi>=0.16 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.19.2) + Requirement already satisfied: matplotlib-inline in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.1.7) + Requirement already satisfied: pickleshare in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.5) + Requirement already satisfied: prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.48) + Requirement already satisfied: pygments>=2.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.18.0) + Requirement already satisfied: stack-data in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.6.3) + Requirement already satisfied: pexpect>4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (4.9.0) + Requirement already satisfied: parso<0.9.0,>=0.8.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.8.4) + Requirement already satisfied: ptyprocess>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.7.0) + Requirement already satisfied: wcwidth in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from prompt-toolkit!=3.0.37,<3.1.0,>=3.0.30->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.13) + Requirement already satisfied: executing>=1.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (2.1.0) + Requirement already satisfied: asttokens>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (3.0.0) + Requirement already satisfied: pure-eval in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from stack-data->ipython>=6.1.0->ipywidgets>=7.2.1->pythreejs) (0.2.3) Using cached pythreejs-2.4.2-py3-none-any.whl (3.4 MB) Using cached openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl (42.6 MB) Using cached opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB) - Using cached tqdm-4.67.0-py3-none-any.whl (78 kB) + Using cached tqdm-4.67.1-py3-none-any.whl (78 kB) Using cached ipydatawidgets-4.3.5-py2.py3-none-any.whl (271 kB) Using cached numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB) Using cached openvino_telemetry-2024.5.0-py3-none-any.whl (23 kB) @@ -174,7 +173,7 @@ Lab instead.** Using cached sympy-1.13.3-py3-none-any.whl (6.2 MB) Using cached traittypes-0.2.1-py2.py3-none-any.whl (8.6 kB) Installing collected packages: openvino-telemetry, mpmath, traittypes, tqdm, sympy, numpy, networkx, fsspec, filelock, torch, openvino, opencv-python, ipydatawidgets, pythreejs - Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.5.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.67.0 traittypes-0.2.1 + Successfully installed filelock-3.16.1 fsspec-2024.10.0 ipydatawidgets-4.3.5 mpmath-1.3.0 networkx-3.1 numpy-1.24.4 opencv-python-4.10.0.84 openvino-2024.4.0 openvino-telemetry-2024.5.0 pythreejs-2.4.2 sympy-1.13.3 torch-2.4.1+cpu tqdm-4.67.1 traittypes-0.2.1 Note: you may need to restart the kernel to use updated packages. @@ -250,7 +249,7 @@ Download the model .. parsed-literal:: - model/human-pose-estimation-3d.tar.gz: 0%| | 0.00/17.6M [00:00`__ tackles the task of generating animation sequences from a single character image. It @@ -34,10 +36,14 @@ Learn more in `GitHub repo `__ and `paper `__. -.. warning:: +.. container:: alert alert-warning - This tutorial requires at least **96 GB** of RAM for model conversion and **40 GB** for inference. Changing the values of ``HEIGHT``, ``WIDTH`` and ``VIDEO_LENGTH`` variables will change the memory consumption but will also affect accuracy. + :: +

! WARNING !

+

+ This tutorial requires at least 96 GB of RAM for model conversion and 40 GB for inference. Changing the values of HEIGHT, WIDTH and VIDEO_LENGTH variables will change the memory consumption but will also affect accuracy. +

**Table of contents:** @@ -70,9 +76,6 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. -.. |image0| image:: https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/animate-anyone/animate-anyone.gif - - Prerequisites ------------- @@ -110,13 +113,6 @@ Prerequisites %load_ext skip_kernel_extension - -.. parsed-literal:: - - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - Note that we clone a fork of original repo with tweaked forward methods. .. code:: ipython3 @@ -171,9 +167,11 @@ Note that we clone a fork of original repo with tweaked forward methods. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. + torch.utils._pytree._register_pytree_node( + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. torch.utils._pytree._register_pytree_node( @@ -221,13 +219,6 @@ Prepare base model local_dir=local_dir, ) - - -.. parsed-literal:: - - diffusion_pytorch_model.bin: 0%| | 0.00/3.44G [00:00:2: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :6: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - :9: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - - Convert model to OpenVINO IR ---------------------------- @@ -423,7 +337,7 @@ semantic features are extracted through the CLIP image encoder for Cross-Attention. Temporal-Attention operates in the temporal dimension. Finally, the VAE decoder decodes the result into a video clip. -|image01| +.. image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png The pipeline contains 6 PyTorch modules: @@ -463,8 +377,6 @@ compression parameters. More details about weights compression can be found in `OpenVINO documentation `__. -.. |image01| image:: https://humanaigc.github.io/animate-anyone/static/images/f2_img.png - .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -522,12 +434,14 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: + WARNING:nncf:NNCF provides best results with torch==2.1.2, while current torch version is 2.2.2+cpu. If you encounter issues, consider switching to torch==2.1.2 INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (32 / 32) │ 100% (32 / 32) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (32 / 32) | 100% (32 / 32) | + +--------------+---------------------------+-----------------------------------+ @@ -543,6 +457,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + .. code:: ipython3 %%skip not $SHOULD_CONVERT @@ -568,11 +490,12 @@ of the pipeline, it will be better to convert them to separate models. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (40 / 40) │ 100% (40 / 40) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (40 / 40) | 100% (40 / 40) | + +--------------+---------------------------+-----------------------------------+ @@ -588,6 +511,14 @@ of the pipeline, it will be better to convert them to separate models. + + + + + + + + Reference UNet ~~~~~~~~~~~~~~ @@ -634,11 +565,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (270 / 270) │ 100% (270 / 270) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (270 / 270) | 100% (270 / 270) | + +--------------+---------------------------+-----------------------------------+ @@ -654,6 +586,14 @@ step. + + + + + + + + Denoising UNet ~~~~~~~~~~~~~~ @@ -727,11 +667,12 @@ step. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (534 / 534) │ 100% (534 / 534) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (534 / 534) | 100% (534 / 534) | + +--------------+---------------------------+-----------------------------------+ @@ -747,6 +688,14 @@ step. + + + + + + + + Pose Guider ~~~~~~~~~~~ @@ -773,11 +722,12 @@ efficiently integrate pose control signals into the denoising process. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (8 / 8) │ 100% (8 / 8) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (8 / 8) | 100% (8 / 8) | + +--------------+---------------------------+-----------------------------------+ @@ -793,6 +743,14 @@ efficiently integrate pose control signals into the denoising process. + + + + + + + + Image Encoder ~~~~~~~~~~~~~ @@ -818,19 +776,19 @@ required for both reference and denoising UNets. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /home/itrushkin/.virtualenvs/test/lib/python3.10/site-packages/transformers/modeling_utils.py:4225: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( - `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. .. parsed-literal:: INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (146 / 146) │ 100% (146 / 146) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + +--------------+---------------------------+-----------------------------------+ + | Num bits (N) | % all parameters (layers) | % ratio-defining parameters | + | | | (layers) | + +==============+===========================+===================================+ + | 8 | 100% (146 / 146) | 100% (146 / 146) | + +--------------+---------------------------+-----------------------------------+ @@ -846,6 +804,14 @@ required for both reference and denoising UNets. + + + + + + + + Inference --------- @@ -871,6 +837,15 @@ For starting work, please select inference device from dropdown list. device = device_widget() + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=5, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'GPU.3', 'AUTO'), value='A… + + + .. code:: ipython3 class OVPose2VideoPipeline(Pose2VideoPipeline): @@ -1168,7 +1143,7 @@ Video post-processing .. raw:: html @@ -1242,23 +1217,9 @@ Interactive inference demo = make_demo(fn=generate) try: - demo.queue().launch(debug=False) + demo.queue().launch(debug=True) except Exception: - demo.queue().launch(debug=False, share=True) + demo.queue().launch(debug=True, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/" - - -.. parsed-literal:: - - Running on local URL: http://127.0.0.1:7860 - - To create a public link, set `share=True` in `launch()`. - - - - - - - diff --git a/docs/notebooks/async-api-with-output.rst b/docs/notebooks/async-api-with-output.rst index 9f9130a4fe0db2..86ff1bc0aa9b0c 100644 --- a/docs/notebooks/async-api-with-output.rst +++ b/docs/notebooks/async-api-with-output.rst @@ -59,7 +59,14 @@ Imports .. code:: ipython3 %pip install -q "openvino>=2023.1.0" - %pip install -q opencv-python "matplotlib>=3.4" + %pip install -q opencv-python tqdm "matplotlib>=3.4" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + .. code:: ipython3 @@ -97,26 +104,36 @@ the person in each frame of the video. .. code:: ipython3 + from pathlib import Path + # directory where model will be downloaded base_model_dir = "model" # model name as named in Open Model Zoo model_name = "person-detection-0202" precision = "FP16" - model_path = f"model/intel/{model_name}/{precision}/{model_name}.xml" - download_command = f"omz_downloader " f"--name {model_name} " f"--precision {precision} " f"--output_dir {base_model_dir} " f"--cache_dir {base_model_dir}" - ! $download_command + model_path = Path("model") / f"{model_name}.xml" + + base_model_url = "https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1" + + if not Path(model_path).exists(): + utils.download_file(f"{base_model_url}/{model_name}/{precision}/{model_name}.xml", filename=model_path.name, directory=model_path.parent) + utils.download_file( + f"{base_model_url}/{model_name}/{precision}/{model_name}.bin", filename=model_path.name.replace(".xml", ".bin"), directory=model_path.parent + ) + .. parsed-literal:: - ################|| Downloading person-detection-0202 ||################ - - ========== Retrieving model/intel/person-detection-0202/FP16/person-detection-0202.xml from the cache - - ========== Retrieving model/intel/person-detection-0202/FP16/person-detection-0202.bin from the cache - - + person-detection-0202.xml: 0%| | 0.00/249k [00:00=4.30.2", "controlnet-aux>=0.0.6", "gradio>=3.36", + "datasets>=2.14.6", + "nncf>=2.7.0", + "opencv-python", "--extra-index-url", "https://download.pytorch.org/whl/cpu", ) - pip_install("openvino>=2023.1.0", "datasets>=2.14.6", "nncf>=2.7.0", "opencv-python") + pip_install("openvino>=2023.1.0") Instantiating Generation Pipeline --------------------------------- @@ -296,7 +299,7 @@ Now, let us check its result on example image: image_path = Path("example_image.jpg") if not image_path.exists(): - download_file(image_path, filename="example_image.jpg") + download_file(example_url, filename="example_image.jpg") img = Image.open(image_path) pose = pose_estimator(img) diff --git a/docs/notebooks/convert-to-openvino-with-output.rst b/docs/notebooks/convert-to-openvino-with-output.rst index 507dd407eae739..bc5a45f244e376 100644 --- a/docs/notebooks/convert-to-openvino-with-output.rst +++ b/docs/notebooks/convert-to-openvino-with-output.rst @@ -184,10 +184,10 @@ NLP model from Hugging Face and export it in ONNX format: .. parsed-literal:: - 2024-11-22 00:16:16.864961: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 00:16:16.903350: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-09 23:09:00.018226: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-09 23:09:00.080568: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 00:16:17.575066: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-09 23:09:00.743048: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -660,7 +660,7 @@ frameworks conversion guides. .. parsed-literal:: - 2024-11-22 00:16:33.997234: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. + 2024-12-09 23:09:17.262024: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... diff --git a/docs/notebooks/convnext-classification-with-output.rst b/docs/notebooks/convnext-classification-with-output.rst index 9466f30c22898e..1204ea2c17f106 100644 --- a/docs/notebooks/convnext-classification-with-output.rst +++ b/docs/notebooks/convnext-classification-with-output.rst @@ -192,7 +192,7 @@ And print results Predicted Class: 281 Predicted Label: n02123045 tabby, tabby cat - Predicted Probability: 0.5919997096061707 + Predicted Probability: 0.5351971983909607 Convert the model to OpenVINO Intermediate representation format diff --git a/docs/notebooks/cross-lingual-books-alignment-with-output.rst b/docs/notebooks/cross-lingual-books-alignment-with-output.rst index b116f0e1f5cda1..68f51ad137ff16 100644 --- a/docs/notebooks/cross-lingual-books-alignment-with-output.rst +++ b/docs/notebooks/cross-lingual-books-alignment-with-output.rst @@ -32,7 +32,7 @@ Prerequisites - ``requests`` - for getting books - ``pysbd`` - for splitting sentences -- ``transformers[torch]`` and ``openvino_dev`` - for getting sentence +- ``transformers[torch]`` and ``openvino`` - for getting sentence embeddings - ``seaborn`` - for alignment matrix visualization - ``ipywidgets`` - for displaying HTML and JS output in the notebook @@ -416,12 +416,12 @@ languages. It has the same architecture as the BERT model but has been trained on a different task: to produce identical embeddings for translation pairs. -|image01| +|image02| This makes LaBSE a great choice for our task and it can be reused for different language pairs still producing good results. -.. |image01| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/627d3a39-7076-479f-a7b1-392f49a0b83e +.. |image02| image:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/627d3a39-7076-479f-a7b1-392f49a0b83e .. code:: ipython3 diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst index c3d645f1841a17..853da533385284 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output.rst @@ -154,10 +154,10 @@ Imports .. parsed-literal:: - 2024-11-22 00:16:56.689204: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 00:16:56.724390: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-09 23:09:41.789833: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-09 23:09:41.824673: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 00:16:57.319913: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-09 23:09:42.418712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT .. parsed-literal:: @@ -213,7 +213,7 @@ notebook `__. .. parsed-literal:: - pretrained_model/unet_kits19_state_dict.pth: 0%| | 0.00/7.58M [00:00`__. .. parsed-literal:: - /tmp/ipykernel_3514722/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /tmp/ipykernel_2165966/1592321960.py:3: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(state_dict_file, map_location=torch.device("cpu")) @@ -444,7 +444,7 @@ this notebook. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: @@ -526,18 +526,18 @@ Convert quantized model to OpenVINO IR model and save it. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:340: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_low.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/quantization/layers.py:348: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! return self._level_high.item() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/monai/networks/nets/basic_unet.py:168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if x_e.shape[-i - 1] != x_0.shape[-i - 1]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 247214 / 262144 (94.3%) - Greatest absolute difference: 4.1846349239349365 at index (0, 0, 379, 430) (up to 1e-05 allowed) - Greatest relative difference: 15984.079041034269 at index (0, 0, 447, 390) (up to 1e-05 allowed) + Mismatched elements: 250458 / 262144 (95.5%) + Greatest absolute difference: 3.8674159049987793 at index (0, 0, 351, 76) (up to 1e-05 allowed) + Greatest relative difference: 12206.866810726728 at index (0, 0, 144, 31) (up to 1e-05 allowed) _check_trace( @@ -663,7 +663,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 8.99 ms + [ INFO ] Read model took 8.90 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -677,7 +677,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 240.78 ms + [ INFO ] Compile model took 264.91 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -714,17 +714,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 49.70 ms + [ INFO ] First inference took 48.49 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 425 iterations - [ INFO ] Duration: 15023.51 ms + [ INFO ] Count: 431 iterations + [ INFO ] Duration: 15002.34 ms [ INFO ] Latency: - [ INFO ] Median: 34.55 ms - [ INFO ] Average: 35.13 ms - [ INFO ] Min: 34.21 ms - [ INFO ] Max: 47.23 ms - [ INFO ] Throughput: 28.29 FPS + [ INFO ] Median: 34.52 ms + [ INFO ] Average: 34.59 ms + [ INFO ] Min: 34.20 ms + [ INFO ] Max: 36.19 ms + [ INFO ] Throughput: 28.73 FPS .. code:: ipython3 @@ -750,7 +750,7 @@ be run in the notebook with ``! benchmark_app`` or [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.LATENCY. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 11.10 ms + [ INFO ] Read model took 10.56 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] x (node: x) : f32 / [...] / [1,1,512,512] @@ -764,7 +764,7 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Model outputs: [ INFO ] ***NO_NAME*** (node: __module.final_conv/aten::_convolution/Add) : f32 / [...] / [1,1,512,512] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 251.41 ms + [ INFO ] Compile model took 248.98 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model49 @@ -801,17 +801,17 @@ be run in the notebook with ``! benchmark_app`` or [ INFO ] Fill input 'x' with random values [Step 10/11] Measuring performance (Start inference synchronously, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 29.68 ms + [ INFO ] First inference took 29.18 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 911 iterations - [ INFO ] Duration: 15009.49 ms + [ INFO ] Count: 908 iterations + [ INFO ] Duration: 15011.20 ms [ INFO ] Latency: - [ INFO ] Median: 15.73 ms - [ INFO ] Average: 16.27 ms - [ INFO ] Min: 15.41 ms - [ INFO ] Max: 24.40 ms - [ INFO ] Throughput: 60.69 FPS + [ INFO ] Median: 15.48 ms + [ INFO ] Average: 16.33 ms + [ INFO ] Min: 15.15 ms + [ INFO ] Max: 28.31 ms + [ INFO ] Throughput: 60.49 FPS Visually Compare Inference Results @@ -905,7 +905,7 @@ seed is displayed to enable reproducing specific runs of this cell. .. parsed-literal:: - Visualizing results with seed 1732231099 + Visualizing results with seed 1733782265 @@ -989,7 +989,7 @@ performs inference, and displays the results on the frames loaded in .. parsed-literal:: Loaded model to AUTO in 0.15 seconds. - Total time for 68 frames: 2.31 seconds, fps:29.91 + Total time for 68 frames: 2.32 seconds, fps:29.70 References diff --git a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png index a0c854d6dd33f6..fc10c642d8d2a1 100644 --- a/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png +++ b/docs/notebooks/ct-segmentation-quantize-nncf-with-output_files/ct-segmentation-quantize-nncf-with-output_37_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:588fb52eb7dcf0ede69419b9645ad6dc93526e8960af83679e12bac98e6817f6 -size 385527 +oid sha256:52955890ed558e516a361399057b8529ffd5103a7b63ed20a2549062b4d900b5 +size 386283 diff --git a/docs/notebooks/ddcolor-image-colorization-with-output.rst b/docs/notebooks/ddcolor-image-colorization-with-output.rst index cd3bf024065b55..11b92fe4897f5e 100644 --- a/docs/notebooks/ddcolor-image-colorization-with-output.rst +++ b/docs/notebooks/ddcolor-image-colorization-with-output.rst @@ -135,7 +135,7 @@ Prerequisites .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/timm/models/layers/__init__.py:48: FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers warnings.warn(f"Importing from {__name__} is deprecated, please import via timm.layers", FutureWarning) @@ -416,10 +416,10 @@ Perform model quantization .. parsed-literal:: - 2024-11-22 00:20:47.511999: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 00:20:47.551328: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-09 23:13:28.920989: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-09 23:13:28.960154: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 00:20:47.960841: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-09 23:13:29.365051: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -527,7 +527,7 @@ Tool + @@ -314,13 +313,13 @@ loading on device using ``core.complie_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:73: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2_layers/patch_embed.py:74: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}" - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dinov2.py:183: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if npatch == N and w == h: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/Depth-Anything-V2/depth_anything_v2/dpt.py:147: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True) @@ -412,7 +411,7 @@ range. .. parsed-literal:: - + @@ -429,11 +428,10 @@ Run inference on video VIDEO_FILE = "./Coco Walking in Berkeley.mp4" - if not Path(VIDEO_FILE).exists(): - download_file( - "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4", - VIDEO_FILE, - ) + download_file( + "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/Coco%20Walking%20in%20Berkeley.mp4", + VIDEO_FILE, + ) # Number of seconds of input video to process. Set `NUM_SECONDS` to 0 to process # the full video. @@ -636,7 +634,7 @@ Run inference on video .. parsed-literal:: - Processed 60 frames in 13.34 seconds. Total FPS (including video processing): 4.50.Inference FPS: 10.65 + Processed 60 frames in 13.15 seconds. Total FPS (including video processing): 4.56.Inference FPS: 10.69 Video saved to 'output/Coco Walking in Berkeley_depth_anything.mp4'. @@ -663,7 +661,7 @@ Run inference on video .. parsed-literal:: Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -797,10 +795,10 @@ quantization code below may take some time. .. parsed-literal:: - 2024-11-22 00:29:02.540402: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 00:29:02.574640: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-09 23:21:25.394147: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-09 23:21:25.427427: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 00:29:03.160362: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-09 23:21:26.001101: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -926,10 +924,10 @@ data. .. parsed-literal:: - Processed 60 frames in 12.91 seconds. Total FPS (including video processing): 4.65.Inference FPS: 12.59 + Processed 60 frames in 12.60 seconds. Total FPS (including video processing): 4.76.Inference FPS: 13.15 Video saved to 'output/Coco Walking in Berkeley_depth_anything_int8.mp4'. Showing video saved at - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/depth-anything/output/Coco Walking in Berkeley_depth_anything.mp4 If you cannot see the video in your browser, please click on the following link to download the video @@ -1009,9 +1007,9 @@ Tool =4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) - Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) - Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) + Requirement already satisfied: torch in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: torchvision in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.19.1+cpu) + Requirement already satisfied: opencv-python in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (4.10.0.84) + Requirement already satisfied: wheel in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.45.1) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.16.1) + Requirement already satisfied: typing-extensions>=4.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (3.1.4) + Requirement already satisfied: fsspec in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch) (2024.9.0) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (1.23.5) + Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchvision) (10.4.0) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.5) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0) Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cpu Collecting git+https://github.com/facebookresearch/detectron2.git - Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-4klmx21d + Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-89enhchj .. parsed-literal:: - Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-4klmx21d + Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-89enhchj .. parsed-literal:: @@ -125,73 +125,73 @@ Install required packages for running model Resolved https://github.com/facebookresearch/detectron2.git to commit c69939aa85460e8135f40bce908a6cddaa73065f Preparing metadata (setup.py): started Preparing metadata (setup.py): finished with status 'done' - Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) - Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) - Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) - Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) + Requirement already satisfied: Pillow>=7.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (10.4.0) + Requirement already satisfied: black in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.3.0) + Requirement already satisfied: cloudpickle in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.1.0) + Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.5.post20221221) Collecting hydra-core>=1.1 (from detectron2==0.6) Using cached hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB) Collecting iopath<0.1.10,>=0.1.7 (from detectron2==0.6) Using cached https://download.pytorch.org/whl/iopath-0.1.9-py3-none-any.whl (27 kB) - Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) + Requirement already satisfied: matplotlib in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (3.7.5) Collecting omegaconf<2.4,>=2.1 (from detectron2==0.6) Using cached omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.2) - Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) - Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) - Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) - Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) - Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.67.0) - Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) - Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) - Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (3.0.0) - Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) - Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) - Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.55.0) - Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) - Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) - Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) - Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) - Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) - Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) - Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) - Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.1.0) - Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) - Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) - Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.68.0) - Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.36.0) - Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) - Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) - Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) - Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) - Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) - Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) - Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) - Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.45.0) - Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) - Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) - Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) - Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) - Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.16.0) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) - Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) - Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (24.2) + Requirement already satisfied: pycocotools>=2.0.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.0.7) + Requirement already satisfied: tabulate in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.9.0) + Requirement already satisfied: tensorboard in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.12.3) + Requirement already satisfied: termcolor>=1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (2.4.0) + Requirement already satisfied: tqdm>4.29.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (4.67.1) + Requirement already satisfied: yacs>=0.1.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from detectron2==0.6) (0.1.8) + Requirement already satisfied: numpy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.23.5) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0.2) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (4.9.3) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.1->detectron2==0.6) (6.4.5) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (3.0.0) + Requirement already satisfied: contourpy>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.1.1) + Requirement already satisfied: cycler>=0.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (0.12.1) + Requirement already satisfied: fonttools>=4.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (4.55.2) + Requirement already satisfied: kiwisolver>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (1.4.7) + Requirement already satisfied: pyparsing>=2.3.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (3.1.4) + Requirement already satisfied: python-dateutil>=2.7 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from matplotlib->detectron2==0.6) (2.9.0.post0) + Requirement already satisfied: click>=8.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (8.1.7) + Requirement already satisfied: mypy-extensions>=0.4.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (1.0.0) + Requirement already satisfied: pathspec>=0.9.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (0.12.1) + Requirement already satisfied: platformdirs>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.3.6) + Requirement already satisfied: tomli>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (2.2.1) + Requirement already satisfied: typing-extensions>=4.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from black->detectron2==0.6) (4.12.2) + Requirement already satisfied: absl-py>=0.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.4.0) + Requirement already satisfied: grpcio>=1.48.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.68.1) + Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.36.0) + Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (1.0.0) + Requirement already satisfied: markdown>=2.6.8 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.7) + Requirement already satisfied: protobuf>=3.19.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.20.3) + Requirement already satisfied: requests<3,>=2.21.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (2.32.3) + Requirement already satisfied: setuptools>=41.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (44.0.0) + Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.7.2) + Requirement already satisfied: werkzeug>=1.0.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (3.0.6) + Requirement already satisfied: wheel>=0.26 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from tensorboard->detectron2==0.6) (0.45.1) + Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (5.5.0) + Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.1) + Requirement already satisfied: rsa<5,>=3.1.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.9) + Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (2.0.0) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.1->detectron2==0.6) (3.20.2) + Requirement already satisfied: importlib-metadata>=4.4 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (8.5.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib->detectron2==0.6) (1.17.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard->detectron2==0.6) (2.1.5) + Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.6.1) + Requirement already satisfied: oauthlib>=3.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard->detectron2==0.6) (3.2.2) Using cached hydra_core-1.3.2-py3-none-any.whl (154 kB) Using cached omegaconf-2.3.0-py3-none-any.whl (79 kB) Building wheels for collected packages: detectron2 Building wheel for detectron2 (setup.py): started Building wheel for detectron2 (setup.py): finished with status 'done' - Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313367 sha256=4eb79589c47d522c993509a8f16dfbf494af0f494c6a73577d9d3668c1ee4a05 - Stored in directory: /tmp/pip-ephem-wheel-cache-mkdcktsx/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 + Created wheel for detectron2: filename=detectron2-0.6-cp38-cp38-linux_x86_64.whl size=8313251 sha256=a744a8ccf54176a60e63af7e14e6a7f431f5b19935a3c1260a7d39f7a7f84bc8 + Stored in directory: /tmp/pip-ephem-wheel-cache-cb2ga2gq/wheels/19/ac/65/e48e5e4ec2702274d927c5a6efb75709b24014371d3bb778f2 Successfully built detectron2 Installing collected packages: omegaconf, iopath, hydra-core, detectron2 Attempting uninstall: omegaconf @@ -203,10 +203,10 @@ Install required packages for running model Uninstalling iopath-0.1.10: Successfully uninstalled iopath-0.1.10 Successfully installed detectron2-0.6 hydra-core-1.3.2 iopath-0.1.9 omegaconf-2.3.0 - Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) - Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) - Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.5.0) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.2) + Requirement already satisfied: openvino>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2024.4.0) + Requirement already satisfied: numpy<2.1.0,>=1.16.6 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (1.23.5) + Requirement already satisfied: openvino-telemetry>=2023.2.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (2024.5.0) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from openvino>=2023.1.0) (24.2) Define helpers for PyTorch model initialization and conversion diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg index 2c18ecdc61719a..9ffd8dbc558859 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:edc1fd6c9bb94b1ff9dde163988de0d5635f35a9cb918138eb058de89fe36b6c -size 58029 +oid sha256:ec1aaa179217e234b7c93d22f9da2f1ac0281e5bf8e4271c4094c3d680793782 +size 58047 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png index 0890e13959d7b2..0c626a2f115cc3 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_22_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b54cfa3647ce484120c2dac840789885273b1a61d0fdf6fd1fdb93e78753c114 -size 509016 +oid sha256:5ae2588579f79d5d3e23a9fd9870f28f3bd063b9166da901cd639f16f0f04fca +size 508747 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg index d2b1ec1ee92784..6063ffe4fca6ec 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0ffdd1e786238678562e14aa201c2a602b1733bb7db8b1c175f7d86b3c011fa2 -size 54276 +oid sha256:d414af832026a73bf7d8a8165a202c499989ddbc4db0826e6e0ca1951b2b4605 +size 54234 diff --git a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png index d970f117246904..656018b2fa8884 100644 --- a/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png +++ b/docs/notebooks/detectron2-to-openvino-with-output_files/detectron2-to-openvino-with-output_32_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6b8a9ccae3ca190acfaa9ddaa9be7641e02edae972b15c49f21cf9a8de9ae454 -size 456077 +oid sha256:8aa8664400b8c51d604d23e669d6e44f766d4eb6b9958d38f4757d5e1cbefe88 +size 457666 diff --git a/docs/notebooks/dynamicrafter-animating-images-with-output.rst b/docs/notebooks/dynamicrafter-animating-images-with-output.rst index 13b4c9475f7092..fac46c9f3e6cad 100644 --- a/docs/notebooks/dynamicrafter-animating-images-with-output.rst +++ b/docs/notebooks/dynamicrafter-animating-images-with-output.rst @@ -151,6 +151,13 @@ Prerequisites %pip install -q "openvino>=2024.2.0" "nncf>=2.11.0" "datasets>=2.20.0" %pip install -q "gradio>=4.19" omegaconf einops pytorch_lightning kornia "open_clip_torch==2.22.0" transformers av opencv-python "torch==2.2.2" --extra-index-url https://download.pytorch.org/whl/cpu + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + .. code:: ipython3 from pathlib import Path @@ -168,6 +175,15 @@ Prerequisites ) open("notebook_utils.py", "w").write(r.text) + + + +.. parsed-literal:: + + 24624 + + + .. code:: ipython3 from cmd_helper import clone_repo @@ -175,6 +191,15 @@ Prerequisites clone_repo("https://github.com/Doubiiu/DynamiCrafter.git", "26e665cd6c174234238d2ded661e2e56f875d360") + + + +.. parsed-literal:: + + PosixPath('DynamiCrafter') + + + Load and run the original pipeline ---------------------------------- @@ -238,7 +263,7 @@ We will use model for 256x256 resolution as example. Also, models for hf_hub_download(repo_id=REPO_ID, filename="model.ckpt", local_dir="./checkpoints/dynamicrafter_256_v1/", local_dir_use_symlinks=False) ckpt_path = "checkpoints/dynamicrafter_256_v1/model.ckpt" - config_file = "dynamicrafter/configs/inference_256_v1.0.yaml" + config_file = "DynamiCrafter/configs/inference_256_v1.0.yaml" config = OmegaConf.load(config_file) model_config = config.pop("model", OmegaConf.create()) model_config["params"]["unet_config"]["params"]["use_checkpoint"] = False @@ -252,11 +277,56 @@ We will use model for 256x256 resolution as example. Also, models for model = download_model() +.. parsed-literal:: + + Note: switching to '26e665cd6c174234238d2ded661e2e56f875d360'. + + You are in 'detached HEAD' state. You can look around, make experimental + changes and commit them, and you can discard any commits you make in this + state without impacting any branches by switching back to a branch. + + If you want to create a new branch to retain commits you create, you may + do so (now or later) by using -c with the switch command. Example: + + git switch -c + + Or undo this operation with: + + git switch - + + Turn off this advice by setting config variable advice.detachedHead to false + + HEAD is now at 26e665c add dataset + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1204: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. + For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. + warnings.warn( + + + +.. parsed-literal:: + + model.ckpt: 0%| | 0.00/10.4G [00:00>> model checkpoint loaded. - + Convert the model to OpenVINO IR -------------------------------- @@ -333,6 +403,17 @@ Convert CLIP text encoder del cond_stage_model gc.collect(); + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + Convert CLIP image encoder ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -355,6 +436,49 @@ resolutions. del model.embedder gc.collect(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/utils/image.py:226: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if input.numel() == 0: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:573: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if size == input_size: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:579: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + antialias = antialias and (max(factors) > 1) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:581: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if antialias: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:584: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + sigmas = (max((factors[0] - 1.0) / 2.0, 0.001), max((factors[1] - 1.0) / 2.0, 0.001)) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/geometry/transform/affwarp.py:589: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + ks = int(max(2.0 * 2 * sigmas[0], 3)), int(max(2.0 * 2 * sigmas[1], 3)) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + sigma = tensor([sigma], device=input.device, dtype=input.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/gaussian.py:55: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + sigma = tensor([sigma], device=input.device, dtype=input.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/core/check.py:78: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if x_shape_to_check[i] != dim: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/filters/kernels.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + mean = tensor([[mean]], device=sigma.device, dtype=sigma.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:101: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if len(mean.shape) == 0 or mean.shape[0] == 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if len(std.shape) == 0 or std.shape[0] == 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:107: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if mean.shape and mean.shape[0] != 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:108: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if mean.shape[0] != data.shape[1] and mean.shape[:2] != data.shape[:2]: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:112: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if std.shape and std.shape[0] != 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:113: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if std.shape[0] != data.shape[1] and std.shape[:2] != data.shape[:2]: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:116: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + mean = torch.as_tensor(mean, device=data.device, dtype=data.dtype) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/kornia/enhance/normalize.py:117: TracerWarning: torch.as_tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + std = torch.as_tensor(std, device=data.device, dtype=data.dtype) + + Convert AE encoder ~~~~~~~~~~~~~~~~~~ @@ -377,6 +501,13 @@ Convert AE encoder del model.first_stage_model.encoder gc.collect(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/ae_modules.py:67: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + w_ = w_ * (int(c)**(-0.5)) + + Convert Diffusion U-Net model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -414,6 +545,21 @@ Convert Diffusion U-Net model del model.model.diffusion_model gc.collect(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:556: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if l_context == 77 + t*16: ## !!! HARD CODE here + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if batch_size: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:232: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if self.use_temporal_conv and batch_size: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert x.shape[1] == self.channels + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/dynamicrafter-animating-images/DynamiCrafter/lvdm/modules/networks/openaimodel3d.py:99: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + assert x.shape[1] == self.channels + + Convert AE decoder ~~~~~~~~~~~~~~~~~~ @@ -785,7 +931,7 @@ Run OpenVINO pipeline inference .. code:: ipython3 - image_path = "dynamicrafter/prompts/256/art.png" + image_path = "DynamiCrafter/prompts/256/art.png" prompt = "man fishing in a boat at sunset" seed = 234 image = Image.open(image_path) @@ -797,15 +943,15 @@ Run OpenVINO pipeline inference .. parsed-literal:: Seed set to 234 - /tmp/ipykernel_971108/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) + /tmp/ipykernel_2173449/2451984876.py:25: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device) - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-08-06 13:54:24 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 164.28 seconds - + start: man fishing in a boat at sunset 2024-12-09 23:46:36 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 194.37 seconds + .. code:: ipython3 @@ -828,7 +974,7 @@ Run OpenVINO pipeline inference - + @@ -1000,6 +1146,19 @@ To collect intermediate model inputs for calibration we should customize 0%| | 0/300 [00:00>> model checkpoint loaded. - + .. code:: ipython3 %%skip not $to_quantize.value - image_path = "dynamicrafter/prompts/256/art.png" + image_path = "DynamiCrafter/prompts/256/art.png" prompt = "man fishing in a boat at sunset" seed = 234 image = Image.open(image_path) @@ -1317,13 +1375,13 @@ Let’s run the optimized pipeline .. parsed-literal:: Seed set to 234 - + .. parsed-literal:: - start: man fishing in a boat at sunset 2024-08-06 15:09:26 - Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 81.47 seconds - + start: man fishing in a boat at sunset 2024-12-10 01:17:34 + Saved in man_fishing_in_a_boat_at_sunset.mp4. Time used: 98.80 seconds + .. code:: ipython3 @@ -1345,7 +1403,7 @@ Let’s run the optimized pipeline - + Compare model file sizes @@ -1373,7 +1431,7 @@ Compare model file sizes encoder_first_stage_ir compression rate: 3.986 embedder_ir compression rate: 3.977 model_ir compression rate: 3.981 - + Compare inference time of the FP32 and INT8 models ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1427,10 +1485,10 @@ models, we use median inference time on calibration subset. .. parsed-literal:: - FP32 latency: 162.304 - INT8 latency: 79.590 - Performance speed up: 2.039 - + FP32 latency: 193.524 + INT8 latency: 97.073 + Performance speed up: 1.994 + Interactive inference --------------------- @@ -1454,6 +1512,15 @@ to launch the interactive demo. use_quantized_models + + + +.. parsed-literal:: + + Checkbox(value=True, description='Use quantized models') + + + .. code:: ipython3 from functools import partial @@ -1472,9 +1539,23 @@ to launch the interactive demo. demo = make_demo(fn=get_image_fn) try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(debug=True, share=True) + demo.queue().launch(debug=False, share=True) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/efficient-sam-with-output.rst b/docs/notebooks/efficient-sam-with-output.rst index 2341db94e22f68..ce83a3675d1d8c 100644 --- a/docs/notebooks/efficient-sam-with-output.rst +++ b/docs/notebooks/efficient-sam-with-output.rst @@ -82,11 +82,22 @@ Prerequisites .. code:: ipython3 - %pip install -q "openvino>=2023.3.0" "nncf>=2.7.0" opencv-python "gradio>=4.13" "matplotlib>=3.4" torch torchvision tqdm --extra-index-url https://download.pytorch.org/whl/cpu + import platform + + %pip install -q "openvino>=2024.5.0" "nncf>=2.14.0" + %pip install -q "torch>=2.2.0" "torchaudio>=2.2.0" "torchvision>=0.17.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q opencv-python "gradio>=4.13" "matplotlib>=3.4" tqdm + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" .. parsed-literal:: + ERROR: Could not find a version that satisfies the requirement openvino>=2024.5.0 (from versions: 2021.3.0, 2021.4.0, 2021.4.1, 2021.4.2, 2022.1.0, 2022.2.0, 2022.3.0, 2022.3.1, 2022.3.2, 2023.0.0.dev20230119, 2023.0.0.dev20230217, 2023.0.0.dev20230407, 2023.0.0.dev20230427, 2023.0.0, 2023.0.1, 2023.0.2, 2023.1.0.dev20230623, 2023.1.0.dev20230728, 2023.1.0.dev20230811, 2023.1.0, 2023.2.0.dev20230922, 2023.2.0, 2023.3.0, 2024.0.0, 2024.1.0, 2024.2.0, 2024.3.0, 2024.4.0, 2024.4.1.dev20240926) + ERROR: No matching distribution found for openvino>=2024.5.0 + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -130,7 +141,7 @@ Prerequisites .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM Load PyTorch model @@ -203,6 +214,13 @@ build PyTorch model pt_model.eval(); + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:303: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + state_dict = torch.load(f, map_location="cpu") + + Run PyTorch model inference --------------------------- @@ -385,23 +403,23 @@ disk using ``openvino.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:220: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:241: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:163: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! size = int(math.sqrt(xy_num)) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert size * size == xy_num - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if size != h or size != w: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam_encoder.py:251: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert x.shape[2] == num_patches - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if num_pts > self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:92: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! elif num_pts < self.decoder_max_num_input_points: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam/EfficientSAM/efficient_sam/efficient_sam.py:126: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if output_w > 0 and output_h > 0: @@ -648,10 +666,10 @@ architecture type, we should specify ``transformer`` in ``model_type``. .. parsed-literal:: - 2024-11-22 00:51:57.265752: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 00:51:57.297997: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 01:35:21.740526: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 01:35:21.772231: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 00:51:57.938257: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-10 01:35:22.412391: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -818,7 +836,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 29.71 ms + [ INFO ] Read model took 29.92 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -838,7 +856,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1398.31 ms + [ INFO ] Compile model took 1396.28 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -879,17 +897,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 793.15 ms + [ INFO ] First inference took 850.98 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 55 iterations - [ INFO ] Duration: 17124.15 ms + [ INFO ] Count: 49 iterations + [ INFO ] Duration: 16117.80 ms [ INFO ] Latency: - [ INFO ] Median: 1829.15 ms - [ INFO ] Average: 1806.67 ms - [ INFO ] Min: 872.57 ms - [ INFO ] Max: 2037.03 ms - [ INFO ] Throughput: 3.21 FPS + [ INFO ] Median: 1890.12 ms + [ INFO ] Average: 1899.68 ms + [ INFO ] Min: 1013.52 ms + [ INFO ] Max: 2315.56 ms + [ INFO ] Throughput: 3.04 FPS .. code:: ipython3 @@ -915,7 +933,7 @@ models, we use ``bencmark_app``. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 43.85 ms + [ INFO ] Read model took 43.16 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] batched_images (node: batched_images) : f32 / [...] / [?,?,?,?] @@ -935,7 +953,7 @@ models, we use ``bencmark_app``. [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_3) : f32 / [...] / [?,?,?,?,?] [ INFO ] ***NO_NAME*** (node: aten::reshape/Reshape_2) : f32 / [...] / [?,?,?] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1631.76 ms + [ INFO ] Compile model took 1639.65 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -976,17 +994,17 @@ models, we use ``bencmark_app``. [ INFO ] Fill input 'batched_point_labels' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). - [ INFO ] First inference took 583.55 ms + [ INFO ] First inference took 586.73 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 56 iterations - [ INFO ] Duration: 16266.69 ms + [ INFO ] Count: 55 iterations + [ INFO ] Duration: 15880.08 ms [ INFO ] Latency: - [ INFO ] Median: 1710.59 ms - [ INFO ] Average: 1692.97 ms - [ INFO ] Min: 713.08 ms - [ INFO ] Max: 1952.47 ms - [ INFO ] Throughput: 3.44 FPS + [ INFO ] Median: 1710.19 ms + [ INFO ] Average: 1694.56 ms + [ INFO ] Min: 569.82 ms + [ INFO ] Max: 1827.81 ms + [ INFO ] Throughput: 3.46 FPS Interactive segmentation demo @@ -1316,7 +1334,7 @@ Interactive segmentation demo .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/efficient-sam Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png index f9dfb53e3b8796..ee488196e09a35 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_17_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cffb9233e156bb558299a8c9bd3931dad6999f9bf7f358b208549949411460d1 -size 1259114 +oid sha256:c724c8a2e1ea229d28fc4828d1e0f8e3709b56e66b4568cd5c300123a6b6990b +size 1259642 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png index 108e6e0e4564e0..25a70458403cd0 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_25_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5760726cd720e435c5d3a85315e772a741d583553996d8cfe7833f5d941e79f3 -size 1260778 +oid sha256:8086bb37d6a8400d681ce701a0ccd8aca10ef94cbb1d2fd387ae08f06e26342a +size 1262788 diff --git a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png index c767ab3d6193bd..cb5a9e6e89c825 100644 --- a/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png +++ b/docs/notebooks/efficient-sam-with-output_files/efficient-sam-with-output_36_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3909739937c5c50e2b26b3cba0b8b30e98e13fee3eab6c4f382735ec82ae9250 -size 1261525 +oid sha256:a18bb4842ab402d752631d693ed64876b58b8cd3cff35bbb3342ba67b35f2c30 +size 1260902 diff --git a/docs/notebooks/encodec-audio-compression-with-output.rst b/docs/notebooks/encodec-audio-compression-with-output.rst index 4cf2479f638656..4d10def61a4a57 100644 --- a/docs/notebooks/encodec-audio-compression-with-output.rst +++ b/docs/notebooks/encodec-audio-compression-with-output.rst @@ -72,8 +72,6 @@ Install required dependencies: .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -142,7 +140,7 @@ bandwidth. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -302,7 +300,7 @@ similar as possible to the original. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) @@ -402,13 +400,13 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:60: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! ideal_length = (math.ceil(n_frames) - 1) * stride + (kernel_size - padding_total) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:85: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert padding_left >= 0 and padding_right >= 0, (padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:87: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! max_pad = max(padding_left, padding_right) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:89: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if length <= max_pad: @@ -428,11 +426,11 @@ with ``ov.save_model``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:358: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. quantized_out = torch.tensor(0.0, device=q_indices.device) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/quantization/core_vq.py:359: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results). for i, indices in enumerate(q_indices): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/encodec/modules/conv.py:103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (padding_left + padding_right) <= x.shape[-1] diff --git a/docs/notebooks/fast-segment-anything-with-output.rst b/docs/notebooks/fast-segment-anything-with-output.rst index 9becf2719559bc..0071e2dca60e74 100644 --- a/docs/notebooks/fast-segment-anything-with-output.rst +++ b/docs/notebooks/fast-segment-anything-with-output.rst @@ -85,8 +85,6 @@ Install requirements .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - torchaudio 2.4.1+cpu requires torch==2.4.1, but you have torch 2.2.2+cpu which is incompatible. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -158,7 +156,9 @@ model and generate a segmentation map. .. parsed-literal:: - 100%|██████████| 138M/138M [00:02<00:00, 48.9MB/s] + 100%|██████████| 138M/138M [00:03<00:00, 46.3MB/s] + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/ultralytics/nn/tasks.py:732: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + ckpt = torch.load(file, map_location="cpu") @@ -170,8 +170,8 @@ model and generate a segmentation map. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 642.9ms - Speed: 3.9ms preprocess, 642.9ms inference, 771.9ms postprocess per image at shape (1, 3, 768, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 768x1024 37 objects, 638.3ms + Speed: 3.4ms preprocess, 638.3ms inference, 500.4ms postprocess per image at shape (1, 3, 768, 1024) The model returns segmentation maps for all the objects on the image. @@ -209,15 +209,15 @@ tracing. The FastSAM model itself is based on YOLOv8 model. .. parsed-literal:: - Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics YOLOv8.2.24 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) PyTorch: starting from 'FastSAM-x.pt' with input shape (1, 3, 1024, 1024) BCHW and output shape(s) ((1, 37, 21504), (1, 32, 256, 256)) (138.3 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 6.1s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) + OpenVINO: export success ✅ 6.2s, saved as 'FastSAM-x_openvino_model/' (276.1 MB) - Export complete (9.1s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything + Export complete (9.2s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything Predict: yolo predict task=segment model=FastSAM-x_openvino_model imgsz=1024 Validate: yolo val task=segment model=FastSAM-x_openvino_model imgsz=1024 data=ultralytics/datasets/sa.yaml Visualize: https://netron.app @@ -321,8 +321,8 @@ pipeline. .. parsed-literal:: - image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 494.2ms - Speed: 6.6ms preprocess, 494.2ms inference, 30.3ms postprocess per image at shape (1, 3, 1024, 1024) + image 1/1 /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/fast-segment-anything/coco_bike.jpg: 1024x1024 42 objects, 498.5ms + Speed: 6.1ms preprocess, 498.5ms inference, 31.6ms postprocess per image at shape (1, 3, 1024, 1024) One can observe the converted model outputs in the next cell, they is @@ -521,6 +521,11 @@ repo <-with-output.html>`__. preset=nncf.QuantizationPreset.MIXED) +.. parsed-literal:: + + :7: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console) + + .. parsed-literal:: INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino @@ -615,8 +620,8 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 72 seconds. - Resulting in 1.78 fps + Segmented in 68 seconds. + Resulting in 1.88 fps .. code:: ipython3 @@ -643,9 +648,9 @@ calibration dataset to measure the performance. .. parsed-literal:: - Segmented in 23 seconds - Resulting in 5.57 fps - That is 3.13 times faster! + Segmented in 21 seconds + Resulting in 6.1 fps + That is 3.24 times faster! Try out the converted pipeline diff --git a/docs/notebooks/florence2-with-output.rst b/docs/notebooks/florence2-with-output.rst index e4ab6fbcbd3a3b..7ec9ce6e6557ca 100644 --- a/docs/notebooks/florence2-with-output.rst +++ b/docs/notebooks/florence2-with-output.rst @@ -100,10 +100,10 @@ available model. By default, we will use .. parsed-literal:: - 2024-11-22 01:05:34.426758: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:05:34.462006: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 01:48:13.363088: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 01:48:13.396921: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 01:05:35.115966: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-10 01:48:14.055295: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -193,31 +193,31 @@ pipeline. .. parsed-literal:: - config.json: 0%| | 0.00/2.43k [00:00 1 or self.sliding_window is not None: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/chkpt/modeling_florence2.py:1205: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False diff --git a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png index c233468fe95f4e..0ffc56ebd94d65 100644 --- a/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png +++ b/docs/notebooks/florence2-with-output_files/florence2-with-output_18_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d15ed97d6e50919caff2aee785bc4c90f91dcfcc9bb248f70e9d79bb203be64f -size 259663 +oid sha256:552934f1e05cf6d598ce249bb662530388c1f3335dc2a6af6c304825c8aa023a +size 259656 diff --git a/docs/notebooks/freevc-voice-conversion-with-output.rst b/docs/notebooks/freevc-voice-conversion-with-output.rst index eb1dffbcf5da08..69a935f4c4f78d 100644 --- a/docs/notebooks/freevc-voice-conversion-with-output.rst +++ b/docs/notebooks/freevc-voice-conversion-with-output.rst @@ -133,8 +133,8 @@ Install extra requirements Downloading... From: https://drive.google.com/uc?id=12-cB34qCTvByWT-QtOcZaqwwO21FLSqU&confirm=t&uuid=a703c43c-ccce-436c-8799-c11b88e9e7e4 - To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt - 100%|██████████| 1.26G/1.26G [00:26<00:00, 47.5MB/s] + To: /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/freevc-voice-conversion/FreeVC/wavlm/WavLM-Large.pt + 100%|██████████| 1.26G/1.26G [01:03<00:00, 19.9MB/s] .. code:: ipython3 @@ -153,7 +153,7 @@ Install extra requirements .. parsed-literal:: - checkpoints/freevc.pth: 0%| | 0.00/451M [00:00 - + Your browser does not support the audio element. diff --git a/docs/notebooks/glm-edge-v-with-output.rst b/docs/notebooks/glm-edge-v-with-output.rst new file mode 100644 index 00000000000000..2449d414d82594 --- /dev/null +++ b/docs/notebooks/glm-edge-v-with-output.rst @@ -0,0 +1,516 @@ +Visual-language assistant with GLM-Edge-V and OpenVINO +------------------------------------------------------ + +The +`GLM-Edge `__ +series is `Zhipu `__\ ’s attempt to meet +real-world deployment scenarios for edge devices. It consists of two +sizes of large language dialogue models and multimodal understanding +models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, +GLM-Edge-V-5B). Among them, the 1.5B / 2B models are mainly targeted at +platforms like mobile phones and car machines, while the 4B / 5B models +are aimed at platforms like PCs. Based on the technological advancements +of the GLM-4 series, some targeted adjustments have been made to the +model structure and size, balancing model performance, real-world +inference efficiency, and deployment convenience. Through deep +collaboration with partner enterprises and relentless efforts in +inference optimization, the GLM-Edge series models can run at extremely +high speeds on some edge platforms. + +In this tutorial we consider how to launch multimodal model GLM-Edge-V +using OpenVINO for creation multimodal chatbot. Additionally, we +optimize model to low precision using +`NNCF `__ + +**Table of contents:** + +- `Prerequisites <#prerequisites>`__ +- `Select Model <#select-model>`__ +- `Convert and Optimize model <#convert-and-optimize-model>`__ + + - `Compress model weights to + 4-bit <#compress-model-weights-to-4-bit>`__ + +- `Select inference device <#select-inference-device>`__ +- `Run OpenVINO model <#run-openvino-model>`__ +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +install required packages and setup helper functions. + +.. code:: ipython3 + + %pip install -q "torch>=2.1" "torchvision" "protobuf>=3.20" "gradio>=4.26" "Pillow" "accelerate" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "openvino>=2024.5.0" "nncf>=2.14.0" + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + ERROR: Could not find a version that satisfies the requirement openvino>=2024.5.0 (from versions: 2021.3.0, 2021.4.0, 2021.4.1, 2021.4.2, 2022.1.0, 2022.2.0, 2022.3.0, 2022.3.1, 2022.3.2, 2023.0.0.dev20230119, 2023.0.0.dev20230217, 2023.0.0.dev20230407, 2023.0.0.dev20230427, 2023.0.0, 2023.0.1, 2023.0.2, 2023.1.0.dev20230623, 2023.1.0.dev20230728, 2023.1.0.dev20230811, 2023.1.0, 2023.2.0.dev20230922, 2023.2.0, 2023.3.0, 2024.0.0, 2024.1.0, 2024.2.0, 2024.3.0, 2024.4.0, 2024.4.1.dev20240926) + ERROR: No matching distribution found for openvino>=2024.5.0 + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + %pip install -q "git+https://github.com/huggingface/transformers" + + +.. parsed-literal:: + + error: subprocess-exited-with-error + + × Preparing metadata (pyproject.toml) did not run successfully. + │ exit code: 1 + ╰─> [6 lines of output] + + Cargo, the Rust package manager, is not installed or is not on PATH. + This package requires Rust and Cargo to compile extensions. Install it through + the system's package manager or via https://rustup.rs/ + + Checking for Rust toolchain.... + [end of output] + + note: This error originates from a subprocess, and is likely not a problem with pip. + error: metadata-generation-failed + + × Encountered error while generating package metadata. + ╰─> See above for output. + + note: This is an issue with the package mentioned above, not pip. + hint: See above for details. + Note: you may need to restart the kernel to use updated packages. + + +.. code:: ipython3 + + import requests + from pathlib import Path + + if not Path("glmv_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/glm-edge-v/glmv_helper.py") + open("glmv_helper.py", "w").write(r.text) + + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/glm-edge-v/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + if not Path("notebook_utils.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") + open("notebook_utils.py", "w").write(r.text) + +Select Model +------------ + + + +The tutorial supports the following models from GLM-Edge-V model family: + +- `glm-edge-v-2b `__ +- `glm-edge-v-5b `__ + +You can select one from the provided options below. + +.. code:: ipython3 + + import ipywidgets as widgets + + # Select model + model_ids = [ + "THUDM/glm-edge-v-2b", + "THUDM/glm-edge-v-5b", + ] + + model_dropdown = widgets.Dropdown( + options=model_ids, + value=model_ids[0], + description="Model:", + disabled=False, + ) + + model_dropdown + + + + +.. parsed-literal:: + + Dropdown(description='Model:', options=('THUDM/glm-edge-v-2b', 'THUDM/glm-edge-v-5b'), value='THUDM/glm-edge-v… + + + +Convert and Optimize model +-------------------------- + + + +GLM-Edge-V is PyTorch model. OpenVINO supports PyTorch models via +conversion to OpenVINO Intermediate Representation (IR). `OpenVINO model +conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +The script ``glmv_helper.py`` contains helper function for model +conversion, please check its content if you interested in conversion +details. + +.. raw:: html + +
+ +Click here for more detailed explanation of conversion steps GLM-Edge-V +is autoregressive transformer generative model, it means that each next +model step depends from model output from previous step. The generation +approach is based on the assumption that the probability distribution of +a word sequence can be decomposed into the product of conditional next +word distributions. In other words, model predicts the next token in the +loop guided by previously generated tokens until the stop-condition will +be not reached (generated sequence of maximum length or end of string +token obtained). The way the next token will be selected over predicted +probabilities is driven by the selected decoding methodology. You can +find more information about the most popular decoding methods in this +blog. The entry point for the generation process for models from the +Hugging Face Transformers library is the ``generate`` method. You can +find more information about its parameters and configuration in the +documentation. To preserve flexibility in the selection decoding +methodology, we will convert only model inference for one step. + +GLM-Edge-V model consists of 3 parts: + +- **Vision Model** for encoding input images into embedding space. +- **Embedding Model** for conversion input text tokens into embedding + space +- **Language Model** for generation answer based on input embeddings + provided by Image Encoder and Input Embedding models. + +.. raw:: html + +
+ +Compress model weights to 4-bit +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For reducing memory +consumption, weights compression optimization can be applied using +`NNCF `__. + +.. raw:: html + +
+ +Click here for more details about weight compression Weight compression +aims to reduce the memory footprint of a model. It can also lead to +significant performance improvement for large memory-bound models, such +as Large Language Models (LLMs). LLMs and other models, which require +extensive memory to store the weights during inference, can benefit from +weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. raw:: html + +
+ +.. code:: ipython3 + + from pathlib import Path + import nncf + from glmv_helper import convert_glmv_model + + + model_id = model_dropdown.value + out_dir = Path("model") / Path(model_id).name / "INT4" + compression_configuration = { + "mode": nncf.CompressWeightsMode.INT4_SYM, + "group_size": 64, + "ratio": 0.6, + } + convert_glmv_model(model_id, out_dir, compression_configuration) + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + 2024-12-10 01:51:54.756921: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 01:51:54.790860: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + 2024-12-10 01:51:55.339388: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + + +.. parsed-literal:: + + ⌛ glm-edge-v-2b conversion started. Be patient, it may takes some time. + ⌛ Load Original model + ✅ Original model successfully loaded + ⌛ Convert Input embedding model + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + + +.. parsed-literal:: + + ✅ Input embedding model successfully converted + ⌛ Convert Image embedding model + + +.. parsed-literal:: + + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/siglip.py:48: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + grid_size = int(s**0.5) + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/siglip.py:53: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + image_emb = torch.cat([self.boi.repeat(len(image_emb), 1, 1), image_emb, self.eoi.repeat(len(image_emb), 1, 1)], dim=1) + + +.. parsed-literal:: + + ✅ Image embedding model successfully converted + ⌛ Convert Language model + + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:458: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/modeling_glm.py:995: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if sequence_length != 1: + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/modeling_glm.py:153: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + rotary_dim = int(q.shape[-1] * partial_rotary_factor) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/THUDM/glm-edge-v-2b/30c2bc691c9d46433abfd450e04441458d503f34/modeling_glm.py:249: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:168: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + if a.grad is not None: + + +.. parsed-literal:: + + ✅ Language model successfully converted + ⌛ Weights compression with int4_sym mode started + + + +.. parsed-literal:: + + Output() + + + + + + + + + +.. parsed-literal:: + + INFO:nncf:Statistics of the bitwidth distribution: + ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ + │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ + ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ + │ 8 │ 45% (115 / 169) │ 40% (114 / 168) │ + ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ + │ 4 │ 55% (54 / 169) │ 60% (54 / 168) │ + ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ + + + +.. parsed-literal:: + + Output() + + + + + + + + + +.. parsed-literal:: + + ✅ Weights compression finished + ✅ glm-edge-v-2b model conversion finished. You can find results in model/glm-edge-v-2b/INT4 + + +Select inference device +----------------------- + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(default="AUTO", exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +Run OpenVINO model +------------------ + + + +``OvGLMv`` class provides convenient way for running model. It accepts +directory with converted model and inference device as arguments. For +running model we will use ``generate`` method. + +.. code:: ipython3 + + from glmv_helper import OvGLMv + + model = OvGLMv(out_dir, device.value) + +.. code:: ipython3 + + import requests + from PIL import Image + + url = "https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11" + image = Image.open(requests.get(url, stream=True).raw) + + query = "Please describe this picture" + + print(f"Question:\n {query}") + image + + +.. parsed-literal:: + + Question: + Please describe this picture + + + + +.. image:: glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png + + + +.. code:: ipython3 + + from transformers import TextStreamer, AutoImageProcessor, AutoTokenizer + import torch + + messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": query}]}] + + processor = AutoImageProcessor.from_pretrained(out_dir, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(out_dir, trust_remote_code=True) + inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_dict=True, tokenize=True, return_tensors="pt").to("cpu") + generate_kwargs = { + **inputs, + "pixel_values": torch.tensor(processor(image).pixel_values).to("cpu"), + "max_new_tokens": 100, + "do_sample": True, + "top_k": 20, + "streamer": TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True), + } + + print("Answer:") + output = model.generate(**generate_kwargs) + + +.. parsed-literal:: + + Answer: + The image depicts a cat resting inside a cardboard box placed on a soft carpeted floor. The cat is lying with its head towards the bottom of the box, and its front paws are stretched out with the right one slightly forward, while its back and hind legs are positioned in the box. The box appears to be in partial disassembly, with the flaps folded down and one side raised slightly off the ground. The cat's fur is well-groomed and + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + from gradio_helper import make_demo + + demo = make_demo(model, processor, tokenizer) + + try: + demo.launch(debug=False, height=600) + except Exception: + demo.launch(debug=False, share=True, height=600) + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # Read more in the docs: https://gradio.app/docs/ + + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + diff --git a/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.jpg b/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.jpg new file mode 100644 index 00000000000000..c6aeec77cd3cb2 --- /dev/null +++ b/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0d22d75f23474fb4f8aec8c0bf0fdf5d9377f3379e82a3887003e6da47e7e +size 60425 diff --git a/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png b/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png new file mode 100644 index 00000000000000..c6673a757ab5dc --- /dev/null +++ b/docs/notebooks/glm-edge-v-with-output_files/glm-edge-v-with-output_13_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c715d8adee4bf7519690de20b57ef2edaa2f914c86a64d107f99a919dcdad218 +size 854224 diff --git a/docs/notebooks/grounded-segment-anything-with-output.rst b/docs/notebooks/grounded-segment-anything-with-output.rst index a51ce8249239f9..6449fb1a6a9507 100644 --- a/docs/notebooks/grounded-segment-anything-with-output.rst +++ b/docs/notebooks/grounded-segment-anything-with-output.rst @@ -201,7 +201,7 @@ Download checkpoints and load PyTorch models .. parsed-literal:: - checkpoints/groundingdino_swint_ogc.pth: 0%| | 0.00/662M [00:00 + @@ -215,7 +215,7 @@ Do Inference .. parsed-literal:: - + diff --git a/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png new file mode 100644 index 00000000000000..3677caabff4380 --- /dev/null +++ b/docs/notebooks/hello-segmentation-with-output_files/hello-segmentation-with-output_11_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76113c575caa9c8a8aca45d3ec6ebd7a4b513dadffd8e9e63861a7a041d7e5de +size 249032 diff --git a/docs/notebooks/hello-world-with-output.rst b/docs/notebooks/hello-world-with-output.rst index 5bd1216db29701..94d6dca5798876 100644 --- a/docs/notebooks/hello-world-with-output.rst +++ b/docs/notebooks/hello-world-with-output.rst @@ -98,13 +98,13 @@ Download the Model and data samples .. parsed-literal:: - artifacts/v3-small_224_1.0_float.xml: 0%| | 0.00/294k [00:00=4.33.0" "torch>=2.1.0" %pip install -q ipywidgets - %pip install -q "openvino>=2023.1.0" + %pip install -q "openvino>=2023.1.0" "Pillow" .. parsed-literal:: @@ -132,10 +132,10 @@ tutorials `__. from optimum.intel.openvino import OVModelForSequenceClassification - -.. parsed-literal:: - - huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... - To disable this warning, you can either: - - Avoid using `tokenizers` before the fork if possible - - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) - - Initialize and Convert the Model Automatically using OVModel class ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -436,7 +427,7 @@ Full list of supported arguments available via ``--help`` .. parsed-literal:: - 2024-11-22 01:15:03.858078: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-10 01:57:20.152345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code] [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] @@ -445,8 +436,10 @@ Full list of supported arguments available via ``--help`` [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym] [--group-size GROUP_SIZE] + [--backup-precision {none,int8_sym,int8_asym}] [--dataset DATASET] [--all-layers] [--awq] [--scale-estimation] [--gptq] + [--lora-correction] [--sensitivity-metric SENSITIVITY_METRIC] [--num-samples NUM_SAMPLES] [--disable-stateful] @@ -467,20 +460,20 @@ Full list of supported arguments available via ``--help`` --task TASK The task to export the model for. If not specified, the task will be auto-inferred based on the model. Available tasks depend on the model, but are among: - ['audio-xvector', 'image-text-to-text', 'mask- - generation', 'text-generation', 'masked-im', 'image- - classification', 'token-classification', 'question- - answering', 'automatic-speech-recognition', 'multiple- - choice', 'image-segmentation', 'semantic- - segmentation', 'text2text-generation', 'feature- - extraction', 'image-to-text', 'text-to-audio', 'text- - to-image', 'zero-shot-object-detection', 'inpainting', - 'zero-shot-image-classification', 'object-detection', - 'text-classification', 'image-to-image', 'sentence- - similarity', 'audio-frame-classification', 'depth- - estimation', 'audio-classification', 'fill-mask']. For - decoder models, use `xxx-with-past` to export the - model using past key values in the decoder. + ['zero-shot-object-detection', 'multiple-choice', + 'audio-xvector', 'masked-im', 'text2text-generation', + 'inpainting', 'image-segmentation', 'semantic- + segmentation', 'question-answering', 'token- + classification', 'audio-frame-classification', + 'feature-extraction', 'text-to-audio', 'image-to- + image', 'fill-mask', 'automatic-speech-recognition', + 'image-classification', 'text-classification', 'zero- + shot-image-classification', 'object-detection', + 'image-to-text', 'audio-classification', 'sentence- + similarity', 'depth-estimation', 'text-to-image', + 'mask-generation', 'text-generation']. For decoder + models, use `xxx-with-past` to export the model using + past key values in the decoder. --framework {pt,tf} The framework to use for the export. If not provided, will attempt to use the local checkpoint's original framework or what is available in the environment. @@ -514,12 +507,27 @@ Full list of supported arguments available via ``--help`` --group-size GROUP_SIZE The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization. + --backup-precision {none,int8_sym,int8_asym} + Defines a backup precision for mixed-precision weight + compression. Only valid for int4 weight format. If not + provided, backup precision is int8_asym. 'none' stands + for original floating-point precision of the model + weights, in this case weights are retained in their + original precision without any quantization. + 'int8_sym' stands for 8-bit integer symmetric + quantization without zero point. 'int8_asym' stands + for 8-bit integer asymmetric quantization with zero + points per each quantization group. --dataset DATASET The dataset used for data-aware compression or - quantization with NNCF. You can use the one from the - list ['wikitext2','c4','c4-new'] for language models - or ['conceptual_captions','laion/220k-GPT4Vision- - captions-from-LIVIS','laion/filtered-wit'] for - diffusion models. + quantization with NNCF. For language models you can + use the one from the list + ['auto','wikitext2','c4','c4-new']. With 'auto' the + dataset will be collected from model's generations. + For diffusion models it should be on of + ['conceptual_captions','laion/220k-GPT4Vision- + captions-from-LIVIS','laion/filtered-wit']. For visual + language models the dataset must be set to + 'contextual'. --all-layers Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight compression is applied, they are compressed to INT8. @@ -527,7 +535,7 @@ Full list of supported arguments available via ``--help`` generation quality of INT4-compressed LLMs, but requires additional time for tuning weights on a calibration dataset. To run AWQ, please also provide a - dataset argument. Note: it's possible that there will + dataset argument. Note: it is possible that there will be no matching patterns in the model to apply AWQ, in such case it will be skipped. --scale-estimation Indicates whether to apply a scale estimation @@ -541,9 +549,15 @@ Full list of supported arguments available via ``--help`` to minimize the difference between activations of a compressed and original layer. Please note, that applying GPTQ takes additional memory and time. + --lora-correction Indicates whether to apply LoRA Correction algorithm. + When enabled, this algorithm introduces low-rank + adaptation layers in the model that can recover + accuracy after weight compression at some cost of + inference latency. Please note, that applying LoRA + Correction algorithm takes additional memory and time. --sensitivity-metric SENSITIVITY_METRIC The sensitivity metric for assigning quantization - precision to layers. Can be one of the following: + precision to layers. It can be one of the following: ['weight_quantization_error', 'hessian_input_activation', 'mean_activation_variance', 'max_activation_variance', @@ -561,7 +575,7 @@ Full list of supported arguments available via ``--help`` performance. Use it when you intentionally want to use a stateless model, for example, to be compatible with existing OpenVINO native inference code that expects - kv-cache inputs and outputs in the model. + KV-cache inputs and outputs in the model. --disable-convert-tokenizer Do not add converted tokenizer and detokenizer OpenVINO models. @@ -585,7 +599,7 @@ compression: .. parsed-literal:: - 2024-11-22 01:15:09.417610: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-10 01:57:25.755800: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight'] - This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). @@ -636,9 +650,8 @@ OpenVINO `__ 3. `Stable Diffusion v2.1 using Optimum-Intel OpenVINO `__ 4. `Image generation with Stable Diffusion -XL `__ 5. `Instruction following using -Databricks Dolly 2.0 `__ 6. `Create -LLM-powered Chatbot using OpenVINO `__ 7. `Document -Visual Question Answering Using Pix2Struct and -OpenVINO `__ 8. `Automatic speech recognition -using Distil-Whisper and OpenVINO `__ +XL `__ 5. `Create LLM-powered Chatbot using +OpenVINO `__ 6. `Document Visual Question Answering +Using Pix2Struct and OpenVINO `__ 7. `Automatic +speech recognition using Distil-Whisper and +OpenVINO `__ diff --git a/docs/notebooks/hunyuan-dit-image-generation-with-output.rst b/docs/notebooks/hunyuan-dit-image-generation-with-output.rst index 01b20ab650824e..61c412fe6f5e62 100644 --- a/docs/notebooks/hunyuan-dit-image-generation-with-output.rst +++ b/docs/notebooks/hunyuan-dit-image-generation-with-output.rst @@ -36,6 +36,7 @@ using OpenVINO. Additionally, we will use `NNCF `__ for optimizing model in low precision. + **Table of contents:** - `Prerequisites <#prerequisites>`__ diff --git a/docs/notebooks/image-classification-quantization-with-output.rst b/docs/notebooks/image-classification-quantization-with-output.rst index 491ca0eed2881a..177ffd97209a57 100644 --- a/docs/notebooks/image-classification-quantization-with-output.rst +++ b/docs/notebooks/image-classification-quantization-with-output.rst @@ -194,7 +194,7 @@ Preprocessing for model obtained from training .. parsed-literal:: - 100%|██████████| 170498071/170498071 [00:07<00:00, 23705445.93it/s] + 100%|██████████| 170498071/170498071 [00:07<00:00, 22538385.96it/s] .. parsed-literal:: @@ -266,10 +266,10 @@ about supported parameters can be found on this .. parsed-literal:: - 2024-11-22 01:15:46.610115: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:15:46.641664: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 01:58:02.605724: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 01:58:02.638370: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-11-22 01:15:47.181563: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT + 2024-12-10 01:58:03.190744: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT @@ -431,7 +431,7 @@ Tool `__ to speed up pipeline. + **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -82,7 +83,6 @@ pipeline. pipelines <#compare-inference-time-of-the-fp16-and-int8-pipelines>`__ - `Interactive demo <#interactive-demo>`__ - Installation Instructions ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/notebooks/janus-multimodal-generation-with-output.rst b/docs/notebooks/janus-multimodal-generation-with-output.rst new file mode 100644 index 00000000000000..a8a5cc599699c6 --- /dev/null +++ b/docs/notebooks/janus-multimodal-generation-with-output.rst @@ -0,0 +1,472 @@ +Multimodal understanding and generation with Janus and OpenVINO +=============================================================== + +Janus is a novel autoregressive framework that unifies multimodal +understanding and generation. It addresses the limitations of previous +approaches by decoupling visual encoding into separate pathways, while +still utilizing a single, unified transformer architecture for +processing. The decoupling not only alleviates the conflict between the +visual encoder’s roles in understanding and generation, but also +enhances the framework’s flexibility. Janus surpasses previous unified +model and matches or exceeds the performance of task-specific models. +The simplicity, high flexibility, and effectiveness of Janus make it a +strong candidate for next-generation unified multimodal models. + +More details can be found in the +`paper `__, original +`repository `__ and `model +card `__ + +In this tutorial we consider how to run and optimize Janus using +OpenVINO. + +**Table of contents:** + +- `Prerequisites <#prerequisites>`__ +- `Convert and Optimize model <#convert-and-optimize-model>`__ + + - `Compress model weights to + 4-bit <#compress-model-weights-to-4-bit>`__ + +- `Create Inference Pipeline <#create-inference-pipeline>`__ + + - `Select Inference Device <#select-inference-device>`__ + - `Run visual language chat <#run-visual-language-chat>`__ + - `Run Image generation <#run-image-generation>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + from pathlib import Path + import requests + + utility_files = ["notebook_utils.py"] + local_helpers = ["ov_janus_helper.py", "gradio_helper.py"] + + base_utils_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/" + base_local_files_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/janus-multimodal-generation/" + + + for util_path in utility_files: + if not Path(util_path).exists(): + r = requests.get(base_utils_url + util_path) + with open(util_path, "w") as f: + f.write(r.text) + + for util_path in local_helpers: + if not Path(util_path).exists(): + r = requests.get(base_local_files_url + util_path) + with open(util_path, "w") as f: + f.write(r.text) + +.. code:: ipython3 + + import platform + + %pip install -q "gradio>=4.19" "torch>=2.2" "torchvision" "safetensors" "transformers>=4.38" "nncf>=2.14" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "git+https://github.com/deepseek-ai/Janus" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -U --pre "openvino>2024.5" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + +Convert and Optimize model +-------------------------- + + + +Janus is PyTorch model. OpenVINO supports PyTorch models via conversion +to OpenVINO Intermediate Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original PyTorch model instance and example input for tracing and +returns ``ov.Model`` representing this model in OpenVINO framework. +Converted model can be used for saving on disk using ``ov.save_model`` +function or directly loading on device using ``core.complie_model``. + +The script ``ov_janus_helper.py`` contains helper function for model +conversion, please check its content if you interested in conversion +details. + +.. raw:: html + +
+ +.. raw:: html + + + +Click here for more detailed explanation of conversion steps + +.. raw:: html + + + +Janus is autoregressive transformer generative model, it means that each +next model step depends from model output from previous step. The +generation approach is based on the assumption that the probability +distribution of a token sequence can be decomposed into the product of +conditional next token distributions. In other words, model predicts the +next token in the loop guided by previously generated tokens until the +stop-condition will be not reached (generated sequence of maximum length +or end of generation token obtained). The way the next token will be +selected over predicted probabilities is driven by the selected decoding +methodology. You can find more information about the most popular +decoding methods in this blog. The entry point for the generation +process for models from the Hugging Face Transformers library is the +``generate`` method. You can find more information about its parameters +and configuration in the documentation. To preserve flexibility in the +selection decoding methodology, we will convert only model inference for +one step. + +For both tasks, image understanding and image generation, Janus utilizes +the same basic transformer architecture in ``language_model`` and change +only components responsible for preparing input embeddings (joined image +embeddings prepared using ``vision_embeddings_model`` and text +embeddings prepared using ``text_embeddings_model`` for image +understanding and ``text_embeddings_model`` on the first step as initial +prompt embeddings and ``gen_embeddings_model`` for the next) and +conversion final hidden state to tokens probabilities (``lm_head`` for +text tokens, ``gen_head`` for image tokens). Additionally, for image +generation model uses ``gen_decoder`` to convert generated image tokens +to images. + +To sum up above, model consists of 7 parts: \* **Image Embeddings** for +encoding input images into embedding space in image understanding task. +\* **Text Embedding** for conversion input text tokens into embedding +space \* **Gen Embeddings** for encoding image generation tokens to +embeddings space in image generation task \* **Language Model** for +generation hidden state guided by input embeddings \* **LM Head** for +conversion Language Model hidden state to text generation token +probabilities \* **Gen Head** for conversion Language Model hidden state +to image generation token probabilities \* **Gen Decoder** for decoding +generated image from latent token space to image tensor space. + +For preserving original model flexibility of switching between tasks, we +also should preserve original model partitioning and convert each model +part separately. + +.. raw:: html + +
+ +Compress model weights to 4-bit +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For reducing memory +consumption, weights compression optimization can be applied using +`NNCF `__. + +.. raw:: html + +
+ +.. raw:: html + + + +Click here for more details about weight compression + +.. raw:: html + + + +Weight compression aims to reduce the memory footprint of a model. It +can also lead to significant performance improvement for large +memory-bound models, such as Large Language Models (LLMs). LLMs and +other models, which require extensive memory to store the weights during +inference, can benefit from weight compression in the following ways: + +- enabling the inference of exceptionally large models that cannot be + accommodated in the memory of the device; + +- improving the inference performance of the models by reducing the + latency of the memory access when computing the operations with + weights, for example, Linear layers. + +`Neural Network Compression Framework +(NNCF) `__ provides 4-bit / +8-bit mixed weight quantization as a compression method primarily +designed to optimize LLMs. The main difference between weights +compression and full model quantization (post-training quantization) is +that activations remain floating-point in the case of weights +compression which leads to a better accuracy. Weight compression for +LLMs provides a solid inference performance improvement which is on par +with the performance of the full model quantization. In addition, weight +compression is data-free and does not require a calibration dataset, +making it easy to use. + +``nncf.compress_weights`` function can be used for performing weights +compression. The function accepts an OpenVINO model and other +compression parameters. Compared to INT8 compression, INT4 compression +improves performance even more, but introduces a minor drop in +prediction quality. + +More details about weights compression, can be found in `OpenVINO +documentation `__. + +.. raw:: html + +
+ +.. code:: ipython3 + + import nncf + from ov_janus_helper import convert_janus_model + + model_id = "deepseek-ai/Janus-1.3B" + model_path = Path(model_id.split("/")[-1] + "-ov") + + compression_configuration = { + "mode": nncf.CompressWeightsMode.INT4_ASYM, + "group_size": 64, + "ratio": 1.0, + } + + # uncomment the line to see model conversion code + # ??convert_janus_model + + +.. parsed-literal:: + + INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, tensorflow, onnx, openvino + + +.. parsed-literal:: + + 2024-11-26 20:09:59.629857: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-26 20:09:59.643309: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1732637399.658322 1754417 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1732637399.662894 1754417 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-11-26 20:09:59.679869: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + + +.. parsed-literal:: + + Python version is above 3.10, patching the collections module. + + +.. parsed-literal:: + + /home/ea/work/py311/lib/python3.11/site-packages/transformers/models/auto/image_processing_auto.py:520: FutureWarning: The image_processor_class argument is deprecated and will be removed in v4.42. Please use `slow_image_processor_class`, or `fast_image_processor_class` instead + warnings.warn( + + +.. code:: ipython3 + + convert_janus_model(model_id, model_path, compression_configuration) + + +.. parsed-literal:: + + ✅ Janus-1.3B model already converted. You can find results in Janus-1.3B-ov + + +Create Inference Pipeline +------------------------- + + + +``OVJanusModel`` defined in ``ov_janus_helper.py`` provides unified +interface for running model inference for both text and image +generation. It accepts model directory and target device for inference. + +Select Inference Device +~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget("CPU", ["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +.. code:: ipython3 + + from ov_janus_helper import OVJanusModel + from janus.models import VLChatProcessor + + # uncomment the line to see model inference code + + # ??OVJanusModel + +``VLChatPRocessor`` class used for pre- and postprocessing steps in +original Janus model. Our model is also compatible with the same +processor code and we can reuse it. + +.. code:: ipython3 + + ov_model = OVJanusModel(model_path, device.value) + + processor = VLChatProcessor.from_pretrained(model_path) + + +.. parsed-literal:: + + Some kwargs in processor config are unused and will not have any effect: image_end_tag, sft_format, image_tag, num_image_tokens, add_special_token, mask_prompt, ignore_id, image_start_tag. + + +Run visual language chat +~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from PIL import Image + from io import BytesIO + from janus.utils.io import load_pil_images + + + input_prompt = "Describe image in details" + image_path = Path("cat_in_box.png") + + if not image_path.exists(): + response = requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11") + image = Image.open(BytesIO(response.content)).convert("RGB") + image.save(image_path) + + conversation = [ + { + "role": "User", + "content": f"{input_prompt}\n", + "images": [str(image_path)], + }, + {"role": "Assistant", "content": ""}, + ] + pil_images = load_pil_images(conversation) + +.. code:: ipython3 + + from transformers import TextStreamer + + prepare_inputs = processor(conversations=conversation, images=pil_images, force_batchify=True) + # run image encoder to get the image embeddings + inputs_embeds = ov_model.prepare_inputs_embeds(**prepare_inputs) + + streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True) + + print(f"Question:\n{input_prompt}") + display(pil_images[0]) + print("Answer:") + + answer_token_ids = ov_model.language_model.generate( + inputs_embeds=inputs_embeds, + attention_mask=prepare_inputs.attention_mask, + pad_token_id=processor.tokenizer.eos_token_id, + bos_token_id=processor.tokenizer.bos_token_id, + eos_token_id=processor.tokenizer.eos_token_id, + max_new_tokens=128, + do_sample=False, + streamer=streamer, + ) + + +.. parsed-literal:: + + Question: + Describe image in details + + + +.. image:: janus-multimodal-generation-with-output_files/janus-multimodal-generation-with-output_14_1.png + + +.. parsed-literal:: + + Answer: + The image depicts a gray and white tabby cat lying comfortably inside a cardboard box. The cat is lying on its back with its legs and paws spread out in a relaxed manner. The cat's eyes are closed, and it appears to be enjoying a nap. The box is placed on a light-colored carpet, and in the background, there is a portion of a white couch visible. The lighting in the room is soft and natural, suggesting that the photo was taken during the daytime. The overall scene conveys a sense of tranquility and contentment. + + +Run Image generation +~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from ov_janus_helper import generate_image + + # Uncomment the line to see image generation code + # ??generate_image + +.. code:: ipython3 + + from transformers import set_seed + + set_seed(12345) + + images = generate_image( + ov_model, + processor, + "A close-up professional photo of Yorkshire Terrier on beach, extrimely detailed, hyper realistic, full hd", + output_dir=None, + parallel_size=1, + ) + + + +.. parsed-literal:: + + 0%| | 0/576 [00:00`__ is a Python library for +accelerator-oriented array computation and program transformation, +designed for high-performance numerical computing and large-scale +machine learning. JAX provides a familiar NumPy-style API for ease of +adoption by researchers and engineers. + +In this tutorial we will show how to convert JAX +`ViT `__ +and +`Mixer `__ +models in OpenVINO format. + +.. raw:: html + +
+ +.. raw:: html + + + +Click here for more detailed information about the models + +.. raw:: html + + + +Vision Transformer +~~~~~~~~~~~~~~~~~~ + +Overview of the model: authors split an image into fixed-size patches, +linearly embed each of them, add position embeddings, and feed the +resulting sequence of vectors to a standard Transformer encoder. In +order to perform classification, authors use the standard approach of +adding an extra learnable “classification token” to the sequence. + +MLP-Mixer +~~~~~~~~~ + +MLP-Mixer (Mixer for short) consists of per-patch linear embeddings, +Mixer layers, and a classifier head. Mixer layers contain one +token-mixing MLP and one channel-mixing MLP, each consisting of two +fully-connected layers and a GELU nonlinearity. Other components +include: skip-connections, dropout, and linear classifier head. + +.. raw:: html + +
+ + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Load and run the original model and a + sample <#load-and-run-the-original-model-and-a-sample>`__ +- `Convert the model to OpenVINO + IR <#convert-the-model-to-openvino-ir>`__ +- `Compiling the model <#compiling-the-model>`__ +- `Run OpenVINO model inference <#run-openvino-model-inference>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + import requests + + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", + ) + open("cmd_helper.py", "w").write(r.text) + +.. code:: ipython3 + + from cmd_helper import clone_repo + + + clone_repo("https://github.com/google-research/vision_transformer.git") + +.. code:: ipython3 + + %pip install -q "openvino>=2024.5.0" + %pip install -q Pillow "jax>=0.4.2" "absl-py>=0.12.0" "flax>=0.6.4" "pandas>=1.1.0" "tensorflow-cpu>=2.4.0" tf_keras tqdm "einops>=0.3.0" "ml-collections>=0.1.0" + +.. code:: ipython3 + + import PIL + import jax + import numpy as np + + from vit_jax import checkpoint + from vit_jax import models_vit + from vit_jax import models_mixer + from vit_jax.configs import models as models_config + + import openvino as ov + +.. code:: ipython3 + + import ipywidgets as widgets + + available_models = ["ViT-B_32", "Mixer-B_16"] + + + model_to_use = widgets.Select( + options=available_models, + value=available_models[0], + description="Select model:", + disabled=False, + ) + + model_to_use + + + + +.. parsed-literal:: + + Select(description='Select model:', options=('ViT-B_32', 'Mixer-B_16'), value='ViT-B_32') + + + +Load and run the original model and a sample +-------------------------------------------- + + + +Download a pre-trained model. + +.. code:: ipython3 + + from notebook_utils import download_file + + + model_name = model_to_use.value + model_config = models_config.MODEL_CONFIGS[model_name] + + + if model_name.startswith("Mixer"): + # Download model trained on imagenet2012 + model_name_path = download_file(f"https://storage.googleapis.com/mixer_models/imagenet1k/{model_name}.npz", filename=f"{model_name}_imagenet2012.npz") + model = models_mixer.MlpMixer(num_classes=1000, **model_config) + else: + # Download model pre-trained on imagenet21k and fine-tuned on imagenet2012. + model_name_path = download_file( + f"https://storage.googleapis.com/vit_models/imagenet21k+imagenet2012/{model_name}.npz", filename=f"{model_name}_imagenet2012.npz" + ) + model = models_vit.VisionTransformer(num_classes=1000, **model_config) + + + +.. parsed-literal:: + + ViT-B_32_imagenet2012.npz: 0%| | 0.00/337M [00:00`__ +should be used for these purposes. ``ov.convert_model`` function accepts +original JAX model instance and example input for tracing and returns +``ov.Model`` representing this model in OpenVINO framework. Converted +model can be used for saving on disk using ``ov.save_model`` function or +directly loading on device using ``core.complie_model``. + +Before conversion we need to create the +`Jaxprs `__ +(JAX’s internal intermediate representation (IR) of programs) object by +tracing a Python function using the +`jax.make_jaxpr `__ +function. [``jax.make_jaxpr``] take a function as argument, that should +perform the forward pass. In our case it is calling of ``model.apply`` +method. But ``model.apply`` requires not only input data, but also +``params`` and keyword argument ``train=False`` in our case. To handle +it create a wrapper function ``model_apply`` that calls +``model.apply(params, x, train=False)``. + +.. code:: ipython3 + + from pathlib import Path + + + model_path = Path(f"models/{model_name}.xml") + + + def model_apply(x): + return model.apply(dict(params=params), x, train=False) + + + jaxpr = jax.make_jaxpr(model_apply)((np.array(img) / 128 - 1)[None, ...]) + + converted_model = ov.convert_model(jaxpr) + ov.save_model(converted_model, model_path) + +Compiling the model +------------------- + + + +Select device from dropdown list for running inference using OpenVINO. + +.. code:: ipython3 + + from notebook_utils import device_widget + + + core = ov.Core() + + device = device_widget() + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + compiled_model = core.compile_model(model_path, device.value) + +Run OpenVINO model inference +---------------------------- + +.. code:: ipython3 + + (logits_ov,) = list(compiled_model(data).values())[0] + + preds = np.array(jax.nn.softmax(logits_ov)) + for idx in preds.argsort()[:-11:-1]: + print(f"{preds[idx]:.5f} : {imagenet_labels[idx]}", end="") + + +.. parsed-literal:: + + 0.95255 : alp + 0.03881 : valley, vale + 0.00192 : cliff, drop, drop-off + 0.00173 : ski + 0.00059 : lakeside, lakeshore + 0.00049 : promontory, headland, head, foreland + 0.00036 : volcano + 0.00021 : snowmobile + 0.00017 : mountain_bike, all-terrain_bike, off-roader + 0.00017 : mountain_tent + diff --git a/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg b/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg new file mode 100644 index 00000000000000..4e389f1fcb75af --- /dev/null +++ b/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b9ce29fc2d800faa2667de9fc47770370f12c829217c22142bfcd1f5e1a2752 +size 33195 diff --git a/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png b/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png new file mode 100644 index 00000000000000..901c02bacbed30 --- /dev/null +++ b/docs/notebooks/jax-classification-to-openvino-with-output_files/jax-classification-to-openvino-with-output_16_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe240660061089dfc38c95d77b074051cc37b794c4d096e5841cf8d575311d9 +size 237944 diff --git a/docs/notebooks/knowledge-graphs-conve-with-output.rst b/docs/notebooks/knowledge-graphs-conve-with-output.rst index aa8b1a20ea554f..4d01d076afd676 100644 --- a/docs/notebooks/knowledge-graphs-conve-with-output.rst +++ b/docs/notebooks/knowledge-graphs-conve-with-output.rst @@ -196,19 +196,19 @@ Settings: Including path to the serialized model files and input data files .. parsed-literal:: - data/kg_training_entids.txt: 0%| | 0.00/3.79k [00:00`__ .. parsed-literal:: - 2024-11-22 01:21:24.800927: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:21:24.825776: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:10:00.149367: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:10:00.174583: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -373,14 +373,14 @@ Vision model accept ``pixel_values`` and returns ``image_embeds``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:452: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:452: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:519: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:519: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:559: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:559: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim): @@ -408,7 +408,7 @@ Convert Image To Text Projection model .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:165: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:168: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at aten/src/ATen/core/TensorBody.h:489.) if a.grad is not None: @@ -543,13 +543,13 @@ generated text by ``AutoProcessor``. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:859: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:859: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if max_pos > self.weights.size(0): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1168: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:975: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:975: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (batch_size, 1, seq_length, src_len): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1261: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/kosmos2/modeling_kosmos2.py:1261: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -1391,9 +1391,9 @@ pipelines, we use mean inference time on 7 samples. .. parsed-literal:: - FP32 pipeline: 2.727 seconds - Optimized pipeline: 1.146 seconds - Performance speed-up: 2.380 + FP32 pipeline: 2.760 seconds + Optimized pipeline: 1.136 seconds + Performance speed-up: 2.430 Interactive inference diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg index c4966e68a0f7c6..8cbf8c6845558b 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d99c65937fed48b5c1ef214891a3ded6fc4acabbad731ecafdf30d897cd8807b -size 121119 +oid sha256:90eb5c813dbef6b48b4d6e6acca89940550e650f29648178615bc5b73cfbad07 +size 123201 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png index 717e205ccbaa23..76747126a0b8a7 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_29_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4e416163b28e55e213c884e64462792c0cb5f9ae1389961c3a5467ef2c1ac101 -size 1150960 +oid sha256:2c680f410cf278d774523ad5338a2a1c4a5fe705113306c7abbec065c2108968 +size 1150690 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png index 85633bcfcf04ae..3a29f664a441a1 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_48_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7561941945a717b6a4f6e6bda157e86c62c5ff638acad518558c176a0ba21be5 -size 1149449 +oid sha256:39a74767a21f27ea1076d4d999630d18c019b8de712c05c75fca7ef1a7979199 +size 1148499 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg index 5aed31c2359d29..6586a554fa5fcc 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:de647e8e1a39e8ee78c7c90a14f373b972e4f381f3348d6b28d0fe18a912eb51 -size 122484 +oid sha256:18799247eb9a64ea7a8828cd7587fcc1b428cc2d5e300dcf64393ce9bd0e4bc9 +size 124329 diff --git a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png index 5eb34946e278d0..0193662b0a661b 100644 --- a/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png +++ b/docs/notebooks/kosmos2-multimodal-large-language-model-with-output_files/kosmos2-multimodal-large-language-model-with-output_8_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:77941b5ac0c4ca3379b3a66eb94aeaa24b8c68e225f6e9369ca1cb262feaab7a -size 1150730 +oid sha256:ea65e060c07381de785e4c03e02fadd599b89d605a00be7e62987cb582d00d97 +size 1150941 diff --git a/docs/notebooks/language-quantize-bert-with-output.rst b/docs/notebooks/language-quantize-bert-with-output.rst index 2ba6bca451ad0b..e9c92052b26bae 100644 --- a/docs/notebooks/language-quantize-bert-with-output.rst +++ b/docs/notebooks/language-quantize-bert-with-output.rst @@ -101,8 +101,8 @@ Imports .. parsed-literal:: - 2024-11-22 01:28:13.948145: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:28:13.973147: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:16:53.582571: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:16:53.608080: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -149,7 +149,7 @@ Perform the following: .. parsed-literal:: - model/MRPC.zip: 0%| | 0.00/387M [00:00=2.1.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu + %pip install -q "torch>=2.3.0" "torchvision" "torchaudio" --index-url https://download.pytorch.org/whl/cpu %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --index-url https://download.pytorch.org/whl/cpu %pip install -q "nncf>=2.14.0" "sentencepiece" "tokenizers>=0.12.1" "transformers>=4.45.0" "gradio>=4.36" %pip install -q -U "openvino-tokenizers>=2024.5.0" "openvino>=2024.5.0" "openvino-genai>=2024.5.0"| diff --git a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst index dc2a129c207ec5..6696ee663a8a30 100644 --- a/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/llava-next-multimodal-chatbot-with-output.rst @@ -59,9 +59,9 @@ Prerequisites .. code:: ipython3 - # %pip install -q "nncf>=2.14.0" "torch>=2.1" "transformers>=4.39.1" "accelerate" "pillow" "gradio>=4.26" "datasets>=2.14.6" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu - # %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5" - # %pip install -q "git+https://github.com/hugggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "nncf>=2.14.0" "torch>=2.1" "transformers>=4.39.1" "accelerate" "pillow" "gradio>=4.26" "datasets>=2.14.6" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5" + %pip install -q "git+https://github.com/hugggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu .. code:: ipython3 diff --git a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst index 6aa437b9f2d37a..8f94b7ce67973a 100644 --- a/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst +++ b/docs/notebooks/llm-agent-rag-llamaindex-with-output.rst @@ -230,7 +230,7 @@ code: if repo_name == "OpenVINO": hf_hub.snapshot_download(llm_model_id.value, local_dir=llm_model_path) else: - !optimum_cli(llm_model_id.value, llm_model_path, additional_args=-{"task": "text-generation-with-past", "weight-format": "int4"}) + optimum_cli(llm_model_id.value, llm_model_path, additional_args=-{"task": "text-generation-with-past", "weight-format": "int4"}) Download Embedding model ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/notebooks/llm-agent-react-langchain-with-output.rst b/docs/notebooks/llm-agent-react-langchain-with-output.rst index 2b1b289f90db0b..9adb0311542426 100644 --- a/docs/notebooks/llm-agent-react-langchain-with-output.rst +++ b/docs/notebooks/llm-agent-react-langchain-with-output.rst @@ -66,6 +66,29 @@ Prerequisites +.. code:: ipython3 + + import requests + from pathlib import Path + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + + if not Path("cmd_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py") + open("cmd_helper.py", "w", encoding="utf-8").write(r.text) + + + + +.. parsed-literal:: + + 1491 + + + .. code:: ipython3 import os @@ -74,16 +97,27 @@ Prerequisites %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel - %pip install --pre -Uq "openvino>=2024.2.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu \ + %pip install --pre -Uq "openvino>=2024.5.0" openvino-tokenizers[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.38.1" "langchain>=0.2.3" "langchain-huggingface>=0.1.2" "langchain-community>=0.2.4" "Wikipedia" \ "torch>=2.1" \ "datasets" \ "accelerate" \ + "pydantic<2.10.0" \ "gradio>=4.19" - %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "transformers>=4.38.1" "langchain>=0.2.3" "langchain-community>=0.2.4" "Wikipedia" %pip install -q "git+https://github.com/huggingface/optimum-intel.git" \ "git+https://github.com/openvinotoolkit/nncf.git" + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + + Create a tools -------------- @@ -178,7 +212,7 @@ previous agent tool invocations and the corresponding tool outputs. .. code:: ipython3 - PREFIX = """[INST]Respond to the human as helpfully and accurately as possible. You have access to the following tools:""" + PREFIX = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:""" FORMAT_INSTRUCTIONS = """Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). @@ -210,10 +244,10 @@ previous agent tool invocations and the corresponding tool outputs. "action": "Final Answer", "action_input": "Final response to human" }}}} - ```[/INST]""" + ```""" SUFFIX = """Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:. - Thought:[INST]""" + Thought:""" HUMAN_MESSAGE_TEMPLATE = "{input}\n\n{agent_scratchpad}" @@ -225,18 +259,32 @@ Create LLM Large Language Models (LLMs) are a core component of LangChain. LangChain does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs. In this example, we -select ``Mistral-7B-Instruct-v0.3`` as LLM in agent pipeline. - -- **Mistral-7B-Instruct-v0.3** - The Mistral-7B-Instruct-v0.3 Large - Language Model (LLM) is an instruct fine-tuned version of the - Mistral-7B-v0.3. You can find more details about model in the `model - card `__, - `paper `__ and `release blog - post `__. +select following models as LLM in agent pipeline. + +- **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - + Qwen2.5 is the latest series of Qwen large language models. Comparing + with Qwen2, Qwen2.5 series brings significant improvements in coding, + mathematics and general knowledge skills. Additionally, it brings + long-context and multiple languages support including Chinese, + English, French, Spanish, Portuguese, German, Italian, Russian, + Japanese, Korean, Vietnamese, Thai, Arabic, and more. For more + details, please refer to + `model_card `__, + `blog `__, + `GitHub `__, and + `Documentation `__. +- **llama-3.1-8b-instruct** - The Llama 3.1 instruction tuned text only + models (8B, 70B, 405B) are optimized for multilingual dialogue use + cases and outperform many of the available open source and closed + chat models on common industry benchmarks. More details about model + can be found in `Meta blog + post `__, `model + website `__ and `model + card `__. >\ **Note**: run model with demo, you will need to accept license agreement. >You must be a registered user in Hugging Face Hub. Please visit `HuggingFace model - card `__, + card `__, carefully read terms of usage and click accept button. You will need to use an access token for the code below to run. For more information on access tokens, refer to `this section of the @@ -269,31 +317,52 @@ folder. .. code:: ipython3 - from pathlib import Path + import ipywidgets as widgets + + llm_model_ids = ["Qwen/Qwen2.5-7B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/qwen2.5-14b-instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct"] - model_id = "mistralai/Mistral-7B-Instruct-v0.3" - model_path = "Mistral-7B-Instruct-v0.3-ov-int4" + llm_model_id = widgets.Dropdown( + options=llm_model_ids, + value=llm_model_ids[0], + description="Model:", + disabled=False, + ) - if not Path(model_path).exists(): - !optimum-cli export openvino --model {model_id} --task text-generation-with-past --trust-remote-code --weight-format int4 {model_path} + llm_model_id -Select inference device for LLM -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. parsed-literal:: + + Dropdown(description='Model:', options=('Qwen/Qwen2.5-7B-Instruct', 'Qwen/Qwen2.5-3B-Instruct', 'Qwen/qwen2.5-… .. code:: ipython3 - import requests + from cmd_helper import optimum_cli - r = requests.get( - url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", - ) - open("notebook_utils.py", "w").write(r.text) + llm_model_path = llm_model_id.value.split("/")[-1] + repo_name = llm_model_id.value.split("/")[0] + if not Path(llm_model_path).exists(): + optimum_cli( + llm_model_id.value, llm_model_path, additional_args={"task": "text-generation-with-past", "weight-format": "int4", "group-size": "128", "ratio": "1.0"} + ) + +Select inference device for LLM +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + from notebook_utils import device_widget device = device_widget("CPU", exclude=["NPU"]) + + device @@ -312,7 +381,7 @@ information `__. .. code:: ipython3 - from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline + from langchain_huggingface import HuggingFacePipeline from transformers.generation.stopping_criteria import StoppingCriteriaList, StoppingCriteria import openvino.properties as props @@ -346,7 +415,7 @@ information `__. stop_tokens = ["Observation:"] ov_llm = HuggingFacePipeline.from_model_id( - model_id=model_path, + model_id=llm_model_path, task="text-generation", backend="openvino", model_kwargs={ @@ -356,26 +425,16 @@ information `__. }, pipeline_kwargs={"max_new_tokens": 2048}, ) - ov_llm = ov_llm.bind(skip_prompt=True, stop=["Observation:"]) tokenizer = ov_llm.pipeline.tokenizer ov_llm.pipeline._forward_params["stopping_criteria"] = StoppingCriteriaList([StopSequenceCriteria(stop_tokens, tokenizer)]) +.. code:: ipython3 -.. parsed-literal:: - - 2024-06-07 23:17:16.804739: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-06-07 23:17:16.807973: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used. - 2024-06-07 23:17:16.850235: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered - 2024-06-07 23:17:16.850258: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered - 2024-06-07 23:17:16.850290: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered - 2024-06-07 23:17:16.859334: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. - To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - 2024-06-07 23:17:17.692415: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT - You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers - The argument `trust_remote_code` is to be used along with export=True. It will be ignored. - Compiling the model to GPU ... - + from langchain_huggingface import ChatHuggingFace + + ov_chat = ChatHuggingFace(llm=ov_llm, verbose=True) + ov_chat = ov_chat.bind(skip_prompt=True, stop=["Observation:"]) You can get additional inference speed improvement with `Dynamic Quantization of activations and KV-cache quantization on @@ -409,7 +468,7 @@ outputs back to the agent, and repeats. from langchain.agents import AgentExecutor, StructuredChatAgent agent = StructuredChatAgent.from_llm_and_tools( - ov_llm, + ov_chat, tools, prefix=PREFIX, suffix=SUFFIX, @@ -438,57 +497,68 @@ prompt template. > Entering new AgentExecutor chain... - Thought: I can use the exponentiate and add tools to solve the first part, and then use the multiply tool for the second part, and finally the exponentiate tool again to square the result. + Thought: First, we need to take 3 to the fifth power. Then we will find the sum of twelve and three. After that, we multiply the first result by the second result. Finally, we'll square the whole result. Action: ``` { "action": "exponentiate", - "action_input": {"base": 3, "exponent": 5} + "action_input": { + "base": 3, + "exponent": 5 + } } ``` Observation: Observation: 243 - Thought: Now I need to add twelve and three + Thought:Next, let's find the sum of twelve and three. Action: ``` { "action": "add", - "action_input": {"first_int": 12, "second_int": 3} + "action_input": { + "first_int": 12, + "second_int": 3 + } } ``` Observation: Observation: 15 - Thought: Now I need to multiply the result by 243 + Thought:Now, we will multiply the result of \(3^5\) (which is 243) by the sum of 12 and 3 (which is 15). Action: ``` { "action": "multiply", - "action_input": {"first_int": 243, "second_int": 15} + "action_input": { + "first_int": 243, + "second_int": 15 + } } ``` Observation: Observation: 3645 - Thought: Finally, I need to square the result + Thought:Thought: Now, we need to square the result of the multiplication (3645). Action: ``` { "action": "exponentiate", - "action_input": {"base": 3645, "exponent": 2} + "action_input": { + "base": 3645, + "exponent": 2 + } } ``` - Observation: Observation: 13286025 - Thought: I know what to respond + Thought:Thought: I know what to respond Action: ``` { "action": "Final Answer", - "action_input": "The final answer is 13286025" + "action_input": "The final result is 13286025." } ``` @@ -500,7 +570,7 @@ prompt template. .. parsed-literal:: {'input': 'Take 3 to the fifth power and multiply that by the sum of twelve and three, then square the whole result', - 'output': 'The final answer is 13286025'} + 'output': 'The final result is 13286025.'} @@ -566,7 +636,7 @@ words generated by agent. .. parsed-literal:: - 'Page: OpenVINO\nSummary: OpenVINO is an open-source software toolkit for optimizing and deploying deep learning models. It enables programmers to develop scalable and efficient AI solutions with relatively few lines of code. It supports several popular model formats and categories, such as large language models, computer vision, and generative AI.\nActively developed by Intel, it prioritizes high-performance inference on Intel hardware but also supports ARM/ARM64 processors and encourages contributors to add new devices to the portfolio.\nBased in C++, it offers the following APIs: C/C++, Python, and Node.js (an early preview).\nOpenVINO is cross-platform and free for use under Apache License 2.0.\n\nPage: Stable Diffusion\nSummary: Stable Diffusion is a deep learning, text-to-image model released in 2022 based on diffusion techniques. It is considered to be a part of the ongoing artificial intelligence boom.\nIt is primarily used to generate detailed images conditioned on text descriptions, t' + 'Page: OpenVINO\nSummary: OpenVINO is an open-source software toolkit for optimizing and deploying deep learning models. It enables programmers to develop scalable and efficient AI solutions with relatively few lines of code. It supports several popular model formats and categories, such as large language models, computer vision, and generative AI.\nActively developed by Intel, it prioritizes high-performance inference on Intel hardware but also supports ARM/ARM64 processors and encourages contributors to add new devices to the portfolio.\nBased in C++, it offers the following APIs: C/C++, Python, and Node.js (an early preview).\nOpenVINO is cross-platform and free for use under Apache License 2.0.\n\nPage: Audacity (audio editor)\nSummary: Audacity is a free and open-source digital audio editor and recording application software, available for Windows, macOS, Linux, and other Unix-like operating systems. \nAs of December 6, 2022, Audacity is the most popular download at FossHub, with over 114.' @@ -643,7 +713,7 @@ In this examples, we will create 2 customized tools for .. parsed-literal:: - "{'current_condition': {'temp_C': '9', 'FeelsLikeC': '8', 'humidity': '93', 'weatherDesc': [{'value': 'Sunny'}], 'observation_time': '04:39 AM'}}" + "{'current_condition': {'temp_C': '0', 'FeelsLikeC': '-4', 'humidity': '86', 'weatherDesc': [{'value': 'Clear'}], 'observation_time': '12:16 AM'}}" @@ -657,7 +727,7 @@ Create AI agent demo with Gradio UI tools = [wikipedia, painting, weather] agent = StructuredChatAgent.from_llm_and_tools( - ov_llm, + ov_chat, tools, prefix=PREFIX, suffix=SUFFIX, @@ -703,7 +773,7 @@ Create AI agent demo with Gradio UI def request_cancel(): - ov_llm.pipeline.model.request.cancel() + ov_chat.llm.pipeline.model.request.cancel() .. code:: ipython3 @@ -723,50 +793,6 @@ Create AI agent demo with Gradio UI # EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')` # To learn more please refer to the Gradio docs: https://gradio.app/docs/ - -.. parsed-literal:: - - - - > Entering new AgentExecutor chain... - Thought: I need to use the weather tool to get the current weather in London, then use the painting tool to generate a picture of Big Ben based on the weather information. - - Action: - ``` - { - "action": "weather", - "action_input": "London" - } - ``` - - Observation: - Observation: {'current_condition': {'temp_C': '9', 'FeelsLikeC': '8', 'humidity': '93', 'weatherDesc': [{'value': 'Sunny'}], 'observation_time': '04:39 AM'}} - Thought: I have the current weather in London. Now I can use the painting tool to generate a picture of Big Ben based on the weather information. - - Action: - ``` - { - "action": "painting", - "action_input": "Big Ben, sunny day" - } - ``` - - Observation: - Observation: {image_url: "https://image.pollinations.ai/prompt/Big%20Ben%2C%20sunny%20day"} - Thought: I have the image URL of Big Ben on a sunny day. Now I can respond to the human with the image URL. - - Action: - ``` - { - "action": "Final Answer", - "action_input": "Here is the image of Big Ben on a sunny day: https://image.pollinations.ai/prompt/Big%20Ben%2C%20sunny%20day" - } - ``` - Observation: - - > Finished chain. - - .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/llm-agent-react-with-output.rst b/docs/notebooks/llm-agent-react-with-output.rst index aced34d99d90bd..791355276fd2fd 100644 --- a/docs/notebooks/llm-agent-react-with-output.rst +++ b/docs/notebooks/llm-agent-react-with-output.rst @@ -62,22 +62,22 @@ Prerequisites import os import requests - - + + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py", ) open("pip_helper.py", "w").write(r.text) - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + from pip_helper import pip_install - + pip_install( "-q", "--extra-index-url", @@ -106,9 +106,7 @@ folder. Large Language Models (LLMs) are a core component of agent. LlamaIndex does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs. In this example, we can select -``Qwen2.5`` as LLM in agent pipeline. - - +``Qwen2.5`` as LLM in agent pipeline. \* **qwen2.5-3b-instruct/qwen2.5-7b-instruct/qwen2.5-14b-instruct** - Qwen2.5 is the latest series of Qwen large language models. Comparing with Qwen2, Qwen2.5 series brings significant improvements in coding, @@ -124,16 +122,16 @@ Vietnamese, Thai, Arabic, and more. For more details, please refer to .. code:: ipython3 import ipywidgets as widgets - + llm_model_ids = ["Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "Qwen/qwen2.5-14b-instruct"] - + llm_model_id = widgets.Dropdown( options=llm_model_ids, value=llm_model_ids[0], description="Model:", disabled=False, ) - + llm_model_id @@ -148,9 +146,9 @@ Vietnamese, Thai, Arabic, and more. For more details, please refer to .. code:: ipython3 from pathlib import Path - + llm_model_path = llm_model_id.value.split("/")[-1] - + if not Path(llm_model_path).exists(): !optimum-cli export openvino --model {llm_model_id.value} --task text-generation-with-past --trust-remote-code --weight-format int4 --group-size 128 --ratio 1.0 --sym {llm_model_path} @@ -162,9 +160,9 @@ Select inference device for LLM .. code:: ipython3 from notebook_utils import device_widget - + llm_device = device_widget("CPU", exclude=["NPU"]) - + llm_device @@ -228,15 +226,15 @@ guide `__ import openvino.properties as props import openvino.properties.hint as hints import openvino.properties.streams as streams - + import json import json5 import torch - + tokenizer = AutoTokenizer.from_pretrained(llm_model_path, trust_remote_code=True) - + ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""} - + llm = OVModelForCausalLM.from_pretrained( llm_model_path, device=llm_device.value, @@ -244,7 +242,7 @@ guide `__ config=AutoConfig.from_pretrained(llm_model_path, trust_remote_code=True), trust_remote_code=True, ) - + llm.generation_config.top_k = 1 llm.generation_config.max_length = 2000 @@ -262,31 +260,31 @@ received from tool calling.. class StopSequenceCriteria(StoppingCriteria): """ This class can be used to stop generation whenever a sequence of tokens is encountered. - + Args: stop_sequences (`str` or `List[str]`): The sequence (or list of sequences) on which to stop execution. tokenizer: The tokenizer used to decode the model outputs. """ - + def __init__(self, stop_sequences, tokenizer): if isinstance(stop_sequences, str): stop_sequences = [stop_sequences] self.stop_sequences = stop_sequences self.tokenizer = tokenizer - + def __call__(self, input_ids, scores, **kwargs) -> bool: decoded_output = self.tokenizer.decode(input_ids.tolist()[0]) return any(decoded_output.endswith(stop_sequence) for stop_sequence in self.stop_sequences) - - + + def text_completion(prompt: str, stop_words) -> str: im_end = "<|im_end|>" if im_end not in stop_words: stop_words = stop_words + [im_end] streamer = TextStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) - + stopping_criteria = StoppingCriteriaList([StopSequenceCriteria(stop_words, tokenizer)]) input_ids = torch.tensor([tokenizer.encode(prompt)]) generate_kwargs = dict( @@ -299,7 +297,7 @@ received from tool calling.. output = tokenizer.decode(output, errors="ignore") assert output.startswith(prompt) output = output[len(prompt) :].replace("<|endoftext|>", "").replace(im_end, "") - + for stop_str in stop_words: idx = output.find(stop_str) if idx != -1: @@ -341,13 +339,13 @@ parameter should be a sequence of messages that contains the .. code:: ipython3 TOOL_DESC = """{name_for_model}: Call this tool to interact with the {name_for_human} API. What is the {name_for_human} API useful for? {description_for_model} Parameters: {parameters}""" - + PROMPT_REACT = """Answer the following questions as best you can. You have access to the following APIs: - + {tools_text} - + Use the following format: - + Question: the input question you must answer Thought: you should always think about what to do Action: the action to take, should be one of [{tools_name_text}] @@ -356,9 +354,9 @@ parameter should be a sequence of messages that contains the ... (this Thought/Action/Action Input/Observation can be repeated zero or more times) Thought: I now know the final answer Final Answer: the final answer to the original input question - + Begin! - + Question: {query}""" Meanwhile we have to create function for consolidate the tools @@ -383,9 +381,9 @@ information and conversation history into the prompt template. raise NotImplementedError tools_text.append(tool) tools_text = "\n\n".join(tools_text) - + tools_name_text = ", ".join([tool_info["name_for_model"] for tool_info in list_of_tool_info]) - + messages = [{"role": "system", "content": "You are a helpful assistant."}] for i, (query, response) in enumerate(chat_history): if list_of_tool_info: @@ -399,9 +397,9 @@ information and conversation history into the prompt template. messages.append({"role": "user", "content": query}) if response: messages.append({"role": "assistant", "content": response}) - + prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False, return_tensors="pt") - + return prompt Create parser @@ -495,7 +493,7 @@ execute them according to the output of LLM. return str(ret) elif tool_name == "image_gen": import urllib.parse - + tool_args = tool_args.replace("(", "").replace(")", "") prompt = json5.loads(tool_args)["prompt"] prompt = urllib.parse.quote(prompt) @@ -505,11 +503,11 @@ execute them according to the output of LLM. ) else: raise NotImplementedError - - + + def llm_with_tool(prompt: str, history, list_of_tool_info=()): chat_history = [(x["user"], x["bot"]) for x in history] + [(prompt, "")] - + planning_prompt = build_input_text(chat_history, list_of_tool_info) text = "" while True: @@ -524,7 +522,7 @@ execute them according to the output of LLM. else: text += output break - + new_history = [] new_history.extend(history) new_history.append({"user": prompt, "bot": text}) @@ -539,7 +537,7 @@ Run agent history = [] query = "get the weather in London, and create a picture of Big Ben based on the weather information" - + response, history = llm_with_tool(prompt=query, history=history, list_of_tool_info=tools) diff --git a/docs/notebooks/llm-chatbot-generate-api-with-output.rst b/docs/notebooks/llm-chatbot-generate-api-with-output.rst index 817a34011fde2d..c09b463ae985d0 100644 --- a/docs/notebooks/llm-chatbot-generate-api-with-output.rst +++ b/docs/notebooks/llm-chatbot-generate-api-with-output.rst @@ -81,9 +81,9 @@ Install required dependencies .. code:: ipython3 import os - + os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" - + %pip install -Uq pip %pip uninstall -q -y optimum optimum-intel %pip install -q -U "openvino>=2024.3.0" openvino-tokenizers[transformers] openvino-genai @@ -103,12 +103,12 @@ Install required dependencies from pathlib import Path import requests import shutil - + # fetch model configuration - + config_shared_path = Path("../../utils/llm_config.py") config_dst_path = Path("llm_config.py") - + if not config_dst_path.exists(): if config_shared_path.exists(): try: @@ -127,7 +127,7 @@ Install required dependencies r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/llm_config.py") with open("llm_config.py", "w", encoding="utf-8") as f: f.write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -238,7 +238,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -270,7 +270,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -304,7 +304,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -338,7 +338,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -399,7 +399,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -432,7 +432,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -466,7 +466,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -500,7 +500,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -531,7 +531,7 @@ Click here to see available models options .. code:: python - # login to huggingfacehub to get access to pretrained model + # login to huggingfacehub to get access to pretrained model from huggingface_hub import notebook_login, whoami @@ -644,9 +644,9 @@ Click here to see available models options .. code:: ipython3 from llm_config import get_llm_selection_widget - + form, lang, model_id_widget, compression_variant, use_preconverted = get_llm_selection_widget() - + form @@ -668,7 +668,7 @@ Click here to see available models options .. parsed-literal:: Selected model qwen2-0.5b-instruct with INT4 compression - + Convert model using Optimum-CLI tool ------------------------------------ @@ -676,7 +676,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ -is the interface between the +is the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -749,13 +749,12 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments: - -- The ``--group-size`` parameter will define the group size to use for - quantization, -1 it will results in per-column quantization. -- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit - quantization. If set to 0.9, it means that 90% of the layers will be - quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : - +The ``--group-size`` parameter will define the group size to use for +quantization, -1 it will results in per-column quantization. - The +``--ratio`` parameter controls the ratio between 4-bit and 8-bit +quantization. If set to 0.9, it means that 90% of the layers will be +quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. You can enable AWQ to @@ -777,28 +776,28 @@ be additionally applied during model export with INT4 precision using .. code:: ipython3 from llm_config import convert_and_compress_model - + model_dir = convert_and_compress_model(model_id, model_configuration, compression_variant.value, use_preconverted.value) .. parsed-literal:: ✅ INT4 qwen2-0.5b-instruct model already converted and can be found in qwen2/INT4_compressed_weights - + Let’s compare model size for different compression types .. code:: ipython3 from llm_config import compare_model_size - + compare_model_size(model_dir) .. parsed-literal:: Size of model with INT4 compressed weights is 358.86 MB - + Select device for inference --------------------------- @@ -808,9 +807,9 @@ Select device for inference .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="CPU", exclude=["NPU"]) - + device @@ -853,14 +852,14 @@ of the available generation parameters more deeply later. .. code:: ipython3 import openvino_genai as ov_genai - + print(f"Loading model from {model_dir}\n") - - + + pipe = ov_genai.LLMPipeline(str(model_dir), device.value) - + generation_config = pipe.get_generation_config() - + input_prompt = "The Sun is yellow bacause" print(f"Input text: {input_prompt}") print(pipe.generate(input_prompt, max_new_tokens=10)) @@ -869,10 +868,10 @@ of the available generation parameters more deeply later. .. parsed-literal:: Loading model from qwen2/INT4_compressed_weights - + Input text: The Sun is yellow bacause it is made of hydrogen and oxygen atoms. The - + Run Chatbot ----------- @@ -1023,11 +1022,11 @@ Click here to see detailed description of advanced options if not Path("gradio_helper_genai.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/llm-chatbot/gradio_helper_genai.py") open("gradio_helper_genai.py", "w").write(r.text) - + from gradio_helper_genai import make_demo - + demo = make_demo(pipe, model_configuration, model_id, lang.value) - + try: demo.launch(debug=True) except Exception: diff --git a/docs/notebooks/llm-chatbot-with-output.rst b/docs/notebooks/llm-chatbot-with-output.rst index 88dda48053d8ec..0d214f5cccc0fc 100644 --- a/docs/notebooks/llm-chatbot-with-output.rst +++ b/docs/notebooks/llm-chatbot-with-output.rst @@ -655,14 +655,13 @@ to make it `symmetric `__ you can add ``--sym``. -For INT4 quantization you can also specify the following arguments: - -- -The ``--group-size`` parameter will define the group size to use for -quantization, -1 it will results in per-column quantization. - The -``--ratio`` parameter controls the ratio between 4-bit and 8-bit -quantization. If set to 0.9, it means that 90% of the layers will be -quantized to int4 while 10% will be quantized to int8. +For INT4 quantization you can also specify the following arguments : + +- The ``--group-size`` parameter will define the group size to use for + quantization, -1 it will results in per-column quantization. +- The ``--ratio`` parameter controls the ratio between 4-bit and 8-bit + quantization. If set to 0.9, it means that 90% of the layers will be + quantized to int4 while 10% will be quantized to int8. Smaller group_size and ratio values usually improve accuracy at the sacrifice of the model size and inference latency. diff --git a/docs/notebooks/localai-with-output.rst b/docs/notebooks/localai-with-output.rst new file mode 100644 index 00000000000000..fac17b8d241d82 --- /dev/null +++ b/docs/notebooks/localai-with-output.rst @@ -0,0 +1,220 @@ +LocalAI and OpenVINO +==================== + +`LocalAI `__ is the free, Open Source OpenAI +alternative. LocalAI act as a drop-in replacement REST API that’s +compatible with OpenAI API specifications for local inferencing. It +allows you to run LLMs, generate images, audio (and not only) locally or +on-prem with consumer grade hardware, supporting multiple model families +and architectures. Does not require GPU. It is created and maintained by +``Ettore Di Giacinto``. + +In this tutorial we show how to prepare a model config and launch an +OpenVINO LLM model with LocalAI in docker container. + + +**Table of contents:** + + +- `Prepare Docker <#prepare-docker>`__ +- `Prepare a model <#prepare-a-model>`__ +- `Run the server <#run-the-server>`__ +- `Send a client request <#send-a-client-request>`__ +- `Stop the server <#stop-the-server>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prepare Docker +-------------- + +Install `Docker +Engine `__, including its +`post-installation `__ +steps, on your development system. To verify installation, test it, +using the following command. When it is ready, it will display a test +image and a message. + +.. code:: ipython3 + + !docker run hello-world + + +.. parsed-literal:: + + Unable to find image 'hello-world:latest' locally + latest: Pulling from library/hello-world + + Digest: sha256:305243c734571da2d100c8c8b3c3167a098cab6049c9a5b066b6021a60fcb966 + Status: Downloaded newer image for hello-world:latest + + Hello from Docker! + This message shows that your installation appears to be working correctly. + + To generate this message, Docker took the following steps: + 1. The Docker client contacted the Docker daemon. + 2. The Docker daemon pulled the "hello-world" image from the Docker Hub. + (amd64) + 3. The Docker daemon created a new container from that image which runs the + executable that produces the output you are currently reading. + 4. The Docker daemon streamed that output to the Docker client, which sent it + to your terminal. + + To try something more ambitious, you can run an Ubuntu container with: + $ docker run -it ubuntu bash + + Share images, automate workflows, and more with a free Docker ID: + https://hub.docker.com/ + + For more examples and ideas, visit: + https://docs.docker.com/get-started/ + + + +Prepare a model +~~~~~~~~~~~~~~~ + + + +LocalAI allows to use customized models. For more details you can read +the +`instruction `__ +where you can also find the detailed documentation. We will use one of +the OpenVINO optimized LLMs in the collection on the `collection on +🤗Hugging +Face `__. +In this example we will use +`TinyLlama-1.1B-Chat-v1.0-fp16-ov `__. +First of all we should create a model configuration file: + +.. code:: yaml + + name: TinyLlama-1.1B-Chat-v1.0-fp16-ov + backend: transformers + parameters: + model: OpenVINO/TinyLlama-1.1B-Chat-v1.0-fp16-ov + temperature: 0.2 + top_k: 40 + top_p: 0.95 + max_new_tokens: 32 + + type: OVModelForCausalLM + + template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}}<|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + + completion: | + {{.Input}} + + stopwords: + - <|im_end|> + +The fields ``backend``, ``model``, ``type`` you can find in the code +example on the model page (we added the corresponding comments): + +.. code:: python + + from transformers import AutoTokenizer # backend + from optimum.intel.openvino import OVModelForCausalLM # type + + model_id = "OpenVINO/TinyLlama-1.1B-Chat-v1.0-fp16-ov" # parameters.model + tokenizer = AutoTokenizer.from_pretrained(model_id) + model = OVModelForCausalLM.from_pretrained(model_id) + +The name you can choose by yourself. By this name you will specify what +model to use on the client side. + +You can create a GitHub gist and modify fields: +`ov.yaml `__ + +Description of the parameters used in config YAML file can be found +`here `__. + +The most important: + +- ``name`` - model name, used to identify the model in API calls. +- ``backend`` - backend to use for computation (like llama-cpp, + diffusers, whisper, transformers). +- ``parameters.model`` - relative to the models path. +- ``temperature``, ``top_k``, ``top_p``, ``max_new_tokens`` - + parameters for the model. +- ``type`` - type of configuration, often related to the type of task + or model architecture. +- ``template`` - templates for various types of model interactions. +- ``stopwords`` - Words or phrases that halts processing. + +Run the server +~~~~~~~~~~~~~~ + + + +Everything is ready for launch. Use +``quay.io/go-skynet/local-ai:v2.23.0-ffmpeg`` image that contains all +required dependencies. For more details read `Run with container +images `__. +If you want to see the output remove the ``-d`` flag and send a client +request from a separate notebook. + +.. code:: ipython3 + + !docker run -d --rm --name="localai" -p 8080:8080 quay.io/go-skynet/local-ai:master-sycl-f16-ffmpeg https://gist.githubusercontent.com/aleksandr-mokrov/f007c8fa6036760a856ddc60f605a0b0/raw/9d24ceeb487f9c058a943113bd0290e8ae565b3e/ov.yaml + + +.. parsed-literal:: + + 67e1a2a8123aa15794c027278aed2c258a04e06883663459bbeaca22ff014740 + docker: Error response from daemon: failed to create task for container: failed to create shim task: OCI runtime create failed: runc create failed: unable to start container process: error during container init: error running hook #1: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'legacy' + nvidia-container-cli: requirement error: invalid expression: unknown. + + +Check whether the ``localai`` container is running normally: + +.. code:: ipython3 + + !docker ps | grep localai + +Send a client request +~~~~~~~~~~~~~~~~~~~~~ + + + +Now you can send HTTP requests using the model name +``TinyLlama-1.1B-Chat-v1.0-fp16-ov``. More details how to use `OpenAI +API `__. + +.. code:: ipython3 + + !curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{"model": "TinyLlama-1.1B-Chat-v1.0-fp16-ov", "prompt": "What is OpenVINO?"}' + + +.. parsed-literal:: + + curl: (7) Failed to connect to localhost port 8080: Connection refused + + +Stop the server +~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + !docker stop localai + + +.. parsed-literal:: + + Error response from daemon: No such container: localai + diff --git a/docs/notebooks/magika-content-type-recognition-with-output.rst b/docs/notebooks/magika-content-type-recognition-with-output.rst index 383fdc6eebf499..f15167eae183b1 100644 --- a/docs/notebooks/magika-content-type-recognition-with-output.rst +++ b/docs/notebooks/magika-content-type-recognition-with-output.rst @@ -41,8 +41,8 @@ post `__ - `Define model loading class <#define-model-loading-class>`__ diff --git a/docs/notebooks/meter-reader-with-output.rst b/docs/notebooks/meter-reader-with-output.rst index 713c4d68edae6a..0ac9308155d4b7 100644 --- a/docs/notebooks/meter-reader-with-output.rst +++ b/docs/notebooks/meter-reader-with-output.rst @@ -135,7 +135,7 @@ DeepLabV3P pre-trained models from PaddlePaddle community. .. parsed-literal:: - model/meter_det_model.tar.gz: 0%| | 0.00/192M [00:00 + diff --git a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst index 7f64dd936292c5..c130f9e0c08d67 100644 --- a/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/minicpm-v-multimodal-chatbot-with-output.rst @@ -205,7 +205,7 @@ documentation [68 lines of output] + ╰─> [92 lines of output] Ignoring numpy: markers 'python_version >= "3.9"' don't match your environment Collecting setuptools Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) Collecting cython<3.0,>=0.25 Using cached Cython-0.29.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (3.1 kB) Collecting cymem<2.1.0,>=2.0.2 - Using cached cymem-2.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB) + Using cached cymem-2.0.10.tar.gz (10 kB) + Installing build dependencies: started + Installing build dependencies: finished with status 'done' + Getting requirements to build wheel: started + Getting requirements to build wheel: finished with status 'done' + Preparing metadata (pyproject.toml): started + Preparing metadata (pyproject.toml): finished with status 'done' Collecting preshed<3.1.0,>=3.0.2 Using cached preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB) Collecting murmurhash<1.1.0,>=0.28.0 - Using cached murmurhash-1.0.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB) + Using cached murmurhash-1.0.11.tar.gz (13 kB) + Installing build dependencies: started + Installing build dependencies: finished with status 'done' + Getting requirements to build wheel: started + Getting requirements to build wheel: finished with status 'done' + Preparing metadata (pyproject.toml): started + Preparing metadata (pyproject.toml): finished with status 'done' Collecting thinc<8.4.0,>=8.3.0 Using cached thinc-8.3.2.tar.gz (193 kB) Installing build dependencies: started @@ -139,16 +151,28 @@ Prerequisites × pip subprocess to install build dependencies did not run successfully. │ exit code: 1 - ╰─> [38 lines of output] + ╰─> [50 lines of output] Ignoring numpy: markers 'python_version >= "3.9"' don't match your environment Collecting setuptools Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) Collecting cython<3.0,>=0.25 Using cached Cython-0.29.37-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (3.1 kB) Collecting murmurhash<1.1.0,>=1.0.2 - Using cached murmurhash-1.0.10-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB) + Using cached murmurhash-1.0.11.tar.gz (13 kB) + Installing build dependencies: started + Installing build dependencies: finished with status 'done' + Getting requirements to build wheel: started + Getting requirements to build wheel: finished with status 'done' + Preparing metadata (pyproject.toml): started + Preparing metadata (pyproject.toml): finished with status 'done' Collecting cymem<2.1.0,>=2.0.2 - Using cached cymem-2.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB) + Using cached cymem-2.0.10.tar.gz (10 kB) + Installing build dependencies: started + Installing build dependencies: finished with status 'done' + Getting requirements to build wheel: started + Getting requirements to build wheel: finished with status 'done' + Preparing metadata (pyproject.toml): started + Preparing metadata (pyproject.toml): finished with status 'done' Collecting preshed<3.1.0,>=3.0.2 Using cached preshed-3.0.9-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB) Collecting blis<1.1.0,>=1.0.0 @@ -164,7 +188,7 @@ Prerequisites Using cached setuptools-75.3.0-py3-none-any.whl.metadata (6.9 kB) Collecting cython>=0.25 Using cached Cython-3.0.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB) - ERROR: Ignored the following versions that require a different python version: 1.25.0 Requires-Python >=3.9; 1.25.1 Requires-Python >=3.9; 1.25.2 Requires-Python >=3.9; 1.26.0 Requires-Python <3.13,>=3.9; 1.26.1 Requires-Python <3.13,>=3.9; 1.26.2 Requires-Python >=3.9; 1.26.3 Requires-Python >=3.9; 1.26.4 Requires-Python >=3.9; 2.0.0 Requires-Python >=3.9; 2.0.1 Requires-Python >=3.9; 2.0.2 Requires-Python >=3.9; 2.1.0 Requires-Python >=3.10; 2.1.0rc1 Requires-Python >=3.10; 2.1.1 Requires-Python >=3.10; 2.1.2 Requires-Python >=3.10; 2.1.3 Requires-Python >=3.10; 75.4.0 Requires-Python >=3.9; 75.5.0 Requires-Python >=3.9; 75.6.0 Requires-Python >=3.9 + ERROR: Ignored the following versions that require a different python version: 1.25.0 Requires-Python >=3.9; 1.25.1 Requires-Python >=3.9; 1.25.2 Requires-Python >=3.9; 1.26.0 Requires-Python <3.13,>=3.9; 1.26.1 Requires-Python <3.13,>=3.9; 1.26.2 Requires-Python >=3.9; 1.26.3 Requires-Python >=3.9; 1.26.4 Requires-Python >=3.9; 2.0.0 Requires-Python >=3.9; 2.0.1 Requires-Python >=3.9; 2.0.2 Requires-Python >=3.9; 2.1.0 Requires-Python >=3.10; 2.1.0rc1 Requires-Python >=3.10; 2.1.1 Requires-Python >=3.10; 2.1.2 Requires-Python >=3.10; 2.1.3 Requires-Python >=3.10; 2.2.0 Requires-Python >=3.10; 2.2.0rc1 Requires-Python >=3.10; 75.4.0 Requires-Python >=3.9; 75.5.0 Requires-Python >=3.9; 75.6.0 Requires-Python >=3.9 ERROR: Could not find a version that satisfies the requirement numpy<3.0.0,>=2.0.0 (from versions: 1.3.0, 1.4.1, 1.5.0, 1.5.1, 1.6.0, 1.6.1, 1.6.2, 1.7.0, 1.7.1, 1.7.2, 1.8.0, 1.8.1, 1.8.2, 1.9.0, 1.9.1, 1.9.2, 1.9.3, 1.10.0.post2, 1.10.1, 1.10.2, 1.10.4, 1.11.0, 1.11.1, 1.11.2, 1.11.3, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.13.3, 1.14.0, 1.14.1, 1.14.2, 1.14.3, 1.14.4, 1.14.5, 1.14.6, 1.15.0, 1.15.1, 1.15.2, 1.15.3, 1.15.4, 1.16.0, 1.16.1, 1.16.2, 1.16.3, 1.16.4, 1.16.5, 1.16.6, 1.17.0, 1.17.1, 1.17.2, 1.17.3, 1.17.4, 1.17.5, 1.18.0, 1.18.1, 1.18.2, 1.18.3, 1.18.4, 1.18.5, 1.19.0, 1.19.1, 1.19.2, 1.19.3, 1.19.4, 1.19.5, 1.20.0, 1.20.1, 1.20.2, 1.20.3, 1.21.0, 1.21.1, 1.21.2, 1.21.3, 1.21.4, 1.21.5, 1.21.6, 1.22.0, 1.22.1, 1.22.2, 1.22.3, 1.22.4, 1.23.0, 1.23.1, 1.23.2, 1.23.3, 1.23.4, 1.23.5, 1.24.0, 1.24.1, 1.24.2, 1.24.3, 1.24.4) ERROR: No matching distribution found for numpy<3.0.0,>=2.0.0 @@ -499,25 +523,25 @@ Prepare image gallery .. parsed-literal:: - data/red_panda.png: 0%| | 0.00/50.6k [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/modeling_marian.py:166: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if ( - Exporting tokenizers to OpenVINO is not supported for tokenizers version > 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. + model.safetensors: 0%| | 0.00/312M [00:00 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. + model.safetensors: 100%|█████████████████████| 312M/312M [00:04<00:00, 71.1MB/s] .. code:: ipython3 @@ -888,10 +919,10 @@ support searching in Chinese. .. parsed-literal:: - 2024-11-22 01:36:43.187797: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:36:43.213112: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:26:01.092495: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:26:01.118195: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/marian/tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses. warnings.warn("Recommended: pip install sacremoses.") @@ -1123,13 +1154,13 @@ models can require different optimal threshold for search. .. parsed-literal:: - data/car-detection.mp4: 0%| | 0.00/2.68M [00:00`__ is a +“Model-as-a-Service” (MaaS) platform that seeks to bring together most +advanced machine learning models from the AI community, and to +streamline the process of leveraging AI models in real applications. +Hundreds of models are made publicly available on ModelScope (700+ and +counting), covering the latest development in areas such as NLP, CV, +Audio, Multi-modality, and AI for Science, etc. Many of these models +represent the SOTA in their specific fields, and made their open-sourced +debut on ModelScope. + +This tutorial covers how to use the modelscope ecosystem within +OpenVINO. + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert models from ModelScope using OpenVINO Model Conversion + API <#convert-models-from-modelscope-using-openvino-model-conversion-api>`__ + + - `Select inference device for image + classification <#select-inference-device-for-image-classification>`__ + - `Run Image classification <#run-image-classification>`__ + +- `Convert ModelScope models using Optimum + Intel <#convert-modelscope-models-using-optimum-intel>`__ + + - `Select inference device for text + classification <#select-inference-device-for-text-classification>`__ + - `Perform text classification <#perform-text-classification>`__ + +- `Convert ModelScope models for usage with OpenVINO + GenAI <#convert-modelscope-models-for-usage-with-openvino-genai>`__ + + - `Select inference device for text + generation <#select-inference-device-for-text-generation>`__ + - `Run OpenVINO GenAI pipeline <#run-openvino-genai-pipeline>`__ + +Prerequisites +------------- + + + +.. code:: ipython3 + + import platform + + %pip install -q "torch>=2.1.1" "torchvision" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q modelscope addict oss2 simplejson sortedcontainers pillow opencv-python "datasets<=3.0.0" + %pip install -q "transformers>=4.45" "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -qU "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5.0" "nncf>=2.14.0" + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + +.. code:: ipython3 + + import requests + from pathlib import Path + + if not Path("notebook_utils.py").exists(): + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + +Convert models from ModelScope using OpenVINO Model Conversion API +------------------------------------------------------------------ + + + +Modelscope package provides API for initializing a model and loading a +set of pre-trained weights using the model text handle. Discovering a +desired model name is straightforward with `Modelscope models web +page `__, one can choose a model +solving a particular machine learning problem and even sort the models +by popularity and novelty. + +OpenVINO supports various types of models and frameworks via conversion +to OpenVINO Intermediate Representation (IR). `OpenVINO model conversion +API `__ +should be used for these purposes. ``ov.convert_model`` function accepts +original model instance and example input for tracing and returns +``ov.Model`` representing this model in OpenVINO framework. Converted +model can be used for saving on disk using ``ov.save_model`` function or +directly loading on device using ``core.complie_model``. + +As example, we will use +`tinynas `__ +image classification model. The code bellow demonstrates how to load +this model using Modelscope pipelines interface, convert it to OpenVINO +IR and then perform image classification on specified device. + +.. code:: ipython3 + + from pathlib import Path + + from modelscope.pipelines import pipeline + from modelscope.utils.constant import Tasks + import openvino as ov + import torch + import gc + + + cls_model_id = "iic/cv_tinynas_classification" + cls_model_path = Path(cls_model_id.split("/")[-1]) / "openvino_model.xml" + + if not cls_model_path.exists(): + # load Modelcope pipeline with model + image_classification = pipeline(Tasks.image_classification, model=cls_model_id) + # convert model to OpenVINO + ov_model = ov.convert_model(image_classification.model, example_input=torch.zeros((1, 3, 224, 224)), input=[1, 3, 224, 224]) + # save OpenVINO model on disk for next usage + ov.save_model(ov_model, cls_model_path) + del ov_model + del image_classification + gc.collect(); + + +.. parsed-literal:: + + 2024-11-12 19:08:10.199148: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-12 19:08:10.212253: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1731424090.226654 1605757 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1731424090.230976 1605757 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-11-12 19:08:10.246563: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + + +Select inference device for image classification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + cv_cls_device = device_widget("CPU") + + cv_cls_device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Run Image classification +~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Model inference interface remains compatible with pipeline preprocessing +and postprocessing, so you can reuse these part of pipeline, but for +providing standalone experience, we will demonstrate how to use model +without pipeline. The code bellow defines utilities for image +preprocessing and postprocessing. + +.. code:: ipython3 + + from notebook_utils import download_file + from PIL import Image + from torchvision import transforms + + # prepare input data and output lables + img_url = "https://pailitao-image-recog.oss-cn-zhangjiakou.aliyuncs.com/mufan/img_data/maas_test_data/dog.png" + img_path = Path("dog.png") + + labels_url = "https://raw.githubusercontent.com/openvinotoolkit/open_model_zoo/master/data/dataset_classes/imagenet_2012.txt" + + labels_path = Path("imagenet_2012.txt") + + if not img_path.exists(): + download_file(img_url) + + if not labels_path.exists(): + download_file(labels_url) + + image = Image.open(img_path) + imagenet_classes = labels_path.open("r").read().splitlines() + + + # prepare image preprocessing + transforms_normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + transform_list = [ + transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms_normalize, + ] + transformer = transforms.Compose(transform_list) + + # compile model + core = ov.Core() + + ov_model = core.compile_model(cls_model_path, cv_cls_device.value) + +Now, when we make all necessary preparations, we can run model +inference. + +.. code:: ipython3 + + import numpy as np + + # preprocess input + image_tensor = transformer(image) + + # run model inference + result = ov_model(image_tensor.unsqueeze(0))[0] + + # postprocess results + label_id = np.argmax(result[0]) + score = result[0][label_id] + + label = imagenet_classes[label_id] + + # visualize results + display(image) + print(f"Predicted label: {label}, score {score}") + + + +.. image:: modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png + + +.. parsed-literal:: + + Predicted label: n02099601 golden retriever, score 8.060977935791016 + + +Convert ModelScope models using Optimum Intel +--------------------------------------------- + + + +For models compatible with the `HuggingFace +Transformers `__ +library, we can use `Optimum +Intel `__ integration +to convert and run model. Optimum Intel is the interface between the +Transformers and Diffusers libraries and the different tools and +libraries provided by Intel to accelerate end-to-end pipelines on Intel +architectures. + +Optimum Intel provides a simple interface for optimizing your +Transformers and Diffusers models, converting them to the OpenVINO +Intermediate Representation (IR) format, and running inference using +OpenVINO Runtime, among other use cases. For running ModelScope models +using this interface we should download model from hub first. There are +several ways how to download models from Modelscope Hub, one of them is +usage of ``modelscope.snapshot_download`` function. This function +accepts model id from hub and optionally local directory (if not +provided, model will be downloaded to cache directory). + +After that, we can load model to Optimum Intel interface replacing the +``AutoModelForXxx`` class from transformers with the corresponding +``OVModelForXxx``. Model conversion will be performed on the fly. For +avoiding next time conversion, we can save model on disk using +``save_pretrained`` method and in the next time pass directory with +already converted model as argument in ``from_pretrained`` method. We +also specified ``device`` parameter for compiling the model on the +specific device, if not provided, the default device will be used. The +device can be changed later in runtime using ``model.to(device)``, +please note that it may require some time for model compilation on a +newly selected device. In some cases, it can be useful to separate model +initialization and compilation, for example, if you want to reshape the +model using ``reshape`` method, you can postpone compilation, providing +the parameter ``compile=False`` into ``from_pretrained`` method, +compilation can be performed manually using ``compile`` method or will +be performed automatically during first inference run. + +As example, we will use +`nlp_bert_sentiment-analysis_english-base `__. +This model was trained for classification input text on 3 sentiment +categories: negative, positive and neutral. In transformers, +``AutoModelForSequenceClassification`` should be used for model +initialization, so for usage model with OpenVINO, it is enough just +replace ``AutoModelForSequenceClassification`` to +``OVModelForSequenceClassification``. + +.. code:: ipython3 + + from modelscope import snapshot_download + + text_model_id = "iic/nlp_bert_sentiment-analysis_english-base" + text_model_path = Path(text_model_id.split("/")[-1]) + ov_text_model_path = text_model_path / "ov" + + + if not text_model_path.exists(): + snapshot_download(text_model_id, local_dir=text_model_path) + +Select inference device for text classification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + text_cls_device = device_widget("CPU", "NPU") + + text_cls_device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Perform text classification +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from transformers import AutoTokenizer + from optimum.intel.openvino import OVModelForSequenceClassification + + + tokenizer = AutoTokenizer.from_pretrained(text_model_path) + + if not ov_text_model_path.exists(): + # model will be automatically exported to OpenVINO format during loading + ov_model = OVModelForSequenceClassification.from_pretrained(text_model_path, text_cls_device.value) + ov_model.save_pretrained(ov_text_model_path) + # save converted model using save_pretrained for avoid conversion in next time + tokenizer.save_pretrained(ov_text_model_path) + else: + # load converted model directly if availa ble + ov_model = OVModelForSequenceClassification.from_pretrained(ov_text_model_path, device=text_cls_device.value) + + # prepare input + input_text = "Good night." + input_data = tokenizer(input_text, return_tensors="pt") + + # run model inference + output = ov_model(**input_data) + # postprocess results + predicted_label_id = output.logits[0].argmax().item() + + predicted_label = ov_model.config.id2label[predicted_label_id] + + print(f"predicted label: {predicted_label}") + + +.. parsed-literal:: + + predicted label: Positive + + +Convert ModelScope models for usage with OpenVINO GenAI +------------------------------------------------------- + + + +OpenVINO™ GenAI is a library of the most popular Generative AI model +pipelines, optimized execution methods, and samples that run on top of +highly performant `OpenVINO +Runtime `__. + +This library is friendly to PC and laptop execution, and optimized for +resource consumption. It requires no external dependencies to run +generative models as it already includes all the core functionality +(e.g. tokenization via openvino-tokenizers). + +You can also load and run models from ModelScope with OpenVINO GenAI +`supported +pipelines `__. + +This inference approach is also based on model representation obtained +using Optimum Intel and also requires to download ModelScope model +first. As example we will be +`qwen2.5-1.5b-instruct `__ +model for text generation, that is part of powerful Qwen2 LLMs family. +If in previous chapter we are focused with usage python API for +downloading and converting models, in this one - we are also considering +CLI usage for the same actions. + +Downloading ModelScope models using CLI can be performed using following +command: + +.. code:: bash + + modelscope download --local_dir + +where ```` is model id from Hub and ```` is +output directory for model saving. + +``optimum-cli`` provides command line interface for exporting models +using Optimum. General OpenVINO export command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for. Available tasks depend on +the model, but are among: [‘default’, ‘fill-mask’, ‘text-generation’, +‘text2text-generation’, ‘text-classification’, ‘token-classification’, +‘multiple-choice’, ‘object-detection’, ‘question-answering’, +‘image-classification’, ‘image-segmentation’, ‘masked-im’, +‘semantic-segmentation’, ‘automatic-speech-recognition’, +‘audio-classification’, ‘audio-frame-classification’, +‘automatic-speech-recognition’, ‘audio-xvector’, ‘image-to-text’, +‘stable-diffusion’, ‘zero-shot-object-detection’]. + +You can find a mapping between tasks and model classes in Optimum +TaskManager +`documentation `__. + +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 nncf will be used for +weight compression. For models that required remote code execution, +``--trust-remote-code`` flag should be provided. + +Full list of supported arguments available via ``--help`` + +.. code:: ipython3 + + from IPython.display import Markdown, display + + model_id = "Qwen/Qwen2.5-1.5B-Instruct" + + llm_path = Path("Qwen2.5-1.5B-Instruct") + ov_llm_path = llm_path / "ov" + download_command = f"modelscope download {model_id} --local_dir {llm_path}" + display(Markdown("**Download command:**")) + display(Markdown(f"`{download_command}`")) + + if not llm_path.exists(): + !{download_command} + + + +**Download command:** + + + +``modelscope download Qwen/Qwen2.5-1.5B-Instruct --local_dir Qwen2.5-1.5B-Instruct`` + + +.. code:: ipython3 + + export_command = f"optimum-cli export openvino -m {llm_path} --task text-generation-with-past --weight-format int4 {ov_llm_path}" + display(Markdown("**Export command:**")) + display(Markdown(f"`{export_command}`")) + + if not ov_llm_path.exists(): + !{export_command} + + + +**Export command:** + + + +``optimum-cli export openvino -m Qwen2.5-1.5B-Instruct --task text-generation-with-past --weight-format int4 Qwen2.5-1.5B-Instruct/ov`` + + +Select inference device for text generation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + from notebook_utils import device_widget + + llm_device = device_widget("CPU") + + llm_device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU') + + + +Run OpenVINO GenAI pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +For running text generation using OpenVINO GenAI, we should use +``LLMPipeline`` class initialized with providing converted model +directory and inference device. You can find more detailed example how +to use OpenVINO GenAI ``LLMPipeline`` for chatbot scenario in this +`tutorial `__. + +.. code:: ipython3 + + import openvino_genai as ov_genai + + + def streamer(subword): + print(subword, end="", flush=True) + # Return flag corresponds whether generation should be stopped. + # False means continue generation. + return False + + + llm_pipe = ov_genai.LLMPipeline(ov_llm_path, llm_device.value) + + llm_pipe.generate("The Sun is yellow because", max_new_tokens=200, streamer=streamer) + + +.. parsed-literal:: + + it has a spectrum of colors, and you are also looking at it. What color would the sun be if you could see its light without being able to see any other objects? If we imagine that someone had never seen or heard about the sun before, what would they expect to see? + + 1. **Color of the Sun**: The sun appears yellow when viewed from Earth due to the way our atmosphere scatters sunlight. This phenomenon occurs as follows: + + - **Sunlight Scattering**: When sunlight passes through the Earth's atmosphere, different wavelengths (colors) of light travel at slightly different speeds due to their varying energies. + - **Air Mass Height**: At higher altitudes where air density decreases with altitude, shorter wavelength (blue) photons have more energy and thus escape faster into space compared to longer wavelength (red) photons which remain in the atmosphere longer. + - **Sky Color**: As a result, blue light is scattered more than red light by molecules in the upper layers of the atmosphere + + + +.. parsed-literal:: + + " it has a spectrum of colors, and you are also looking at it. What color would the sun be if you could see its light without being able to see any other objects? If we imagine that someone had never seen or heard about the sun before, what would they expect to see?\n\n1. **Color of the Sun**: The sun appears yellow when viewed from Earth due to the way our atmosphere scatters sunlight. This phenomenon occurs as follows:\n\n - **Sunlight Scattering**: When sunlight passes through the Earth's atmosphere, different wavelengths (colors) of light travel at slightly different speeds due to their varying energies.\n - **Air Mass Height**: At higher altitudes where air density decreases with altitude, shorter wavelength (blue) photons have more energy and thus escape faster into space compared to longer wavelength (red) photons which remain in the atmosphere longer.\n - **Sky Color**: As a result, blue light is scattered more than red light by molecules in the upper layers of the atmosphere" + + + +.. code:: ipython3 + + import gc + + del llm_pipe + gc.collect(); diff --git a/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg b/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg new file mode 100644 index 00000000000000..97ae56df8a8721 --- /dev/null +++ b/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1745fd9f64ac9914621f7eee3668e86daa8121bc83d1a2c7f27963c85026f104 +size 66633 diff --git a/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png b/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png new file mode 100644 index 00000000000000..d1c0d309736c1a --- /dev/null +++ b/docs/notebooks/modelscope-to-openvino-with-output_files/modelscope-to-openvino-with-output_12_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6235ab7dd2cb4318435320004320ffc6de773044c51cadcd581a7996faca313a +size 636558 diff --git a/docs/notebooks/music-generation-with-output.rst b/docs/notebooks/music-generation-with-output.rst index a5bdcbd8049318..2d63515872694f 100644 --- a/docs/notebooks/music-generation-with-output.rst +++ b/docs/notebooks/music-generation-with-output.rst @@ -124,8 +124,8 @@ Imports .. parsed-literal:: - 2024-11-22 01:43:50.913766: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:43:50.938403: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:28:39.145741: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:28:39.170431: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -165,7 +165,7 @@ generate a text-conditioned music sample. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/encodec/modeling_encodec.py:124: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/encodec/modeling_encodec.py:124: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). self.register_buffer("padding_total", torch.tensor(kernel_size - stride, dtype=torch.int64), persistent=False) Config of the text_encoder: is overwritten by shared text_encoder config: T5Config { "_name_or_path": "t5-base", @@ -346,7 +346,7 @@ vocabulary. It helps the model understand the context of a sentence. @@ -431,7 +431,7 @@ runtime .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. @@ -775,7 +775,7 @@ We can now infer the pipeline backed by OpenVINO models. diff --git a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst index 0bac7af3f39c32..9cefe7216f2076 100644 --- a/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst +++ b/docs/notebooks/nano-llava-multimodal-chatbot-with-output.rst @@ -204,8 +204,8 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/onnx/model_patcher.py:306: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:939: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.max_seq_len_cached: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/qnguyen3/nanoLLaVA/13d60cec183a86755afed64da495fcc2c382ea80/modeling_llava_qwen2.py:1499: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): @@ -530,10 +530,10 @@ image encoder model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/quantization/quantize_model.py:432: FutureWarning: `CompressWeightsMode.INT8` is deprecated. Please, use `CompressWeightsMode.INT8_ASYM` as value instead. warning_deprecated( - 2024-11-22 01:48:49.764790: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:48:49.789684: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:33:42.983675: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:33:43.008813: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -742,8 +742,7 @@ can use the same tokenizer and image processor that provided with model. Question: Describe this image in detail Answer: - This image features a cute, white lama, possibly a llama, which is depicted in a playful pose. The llama is surrounded by a fire, indicating it's being set on a burner. The flame appears to be a bright, bright yellow, and there are several tiny flames, possibly from the llama's actions. - The llama itself is quite detailed. It has a small brown nose and dark eyes that are expressive. The face of the llama is quite detailed as well, with a pair of ears that are also light brown. The llama's mouth is open, revealing its pink lips. There are also small pink spots on its face, + The image features a white, fluffy lamb with a big, bright smile, standing next to a fire. The lamb's face is detailed, with black eyes that are slightly squinty, and a mouth that's slightly open. It seems to be enjoying the heat from the fire, as it is seen looking down. The lamb's legs are also visible, and they appear to be furry. The lamb's tail is long and appears to be fluffy as well. The lamb's ears are also visible and are pink. The lamb's face is depicted in detail, with small black eyes and black nostrils. The lamb's nose is also Interactive demo diff --git a/docs/notebooks/notebooks_with_binder_buttons.txt b/docs/notebooks/notebooks_with_binder_buttons.txt index ce9cb50da47907..58f31aaae508c8 100644 --- a/docs/notebooks/notebooks_with_binder_buttons.txt +++ b/docs/notebooks/notebooks_with_binder_buttons.txt @@ -7,7 +7,6 @@ convert-to-openvino cross-lingual-books-alignment depth-anything detectron2-to-openvino -distilbert-sequence-classification fast-segment-anything handwritten-ocr hello-detection diff --git a/docs/notebooks/notebooks_with_colab_buttons.txt b/docs/notebooks/notebooks_with_colab_buttons.txt index 59b3348a4c90f7..2361fbe9a19c69 100644 --- a/docs/notebooks/notebooks_with_colab_buttons.txt +++ b/docs/notebooks/notebooks_with_colab_buttons.txt @@ -1,5 +1,4 @@ 3D-segmentation-point-clouds -amused-lightweight-text-to-image async-api auto-device clip-language-saliency-map @@ -8,7 +7,6 @@ cross-lingual-books-alignment depth-anything depth-anything-v2 detectron2-to-openvino -distilbert-sequence-classification explainable-ai-1-basic explainable-ai-2-deep-dive explainable-ai-3-map-interpretation diff --git a/docs/notebooks/object-detection-with-output.rst b/docs/notebooks/object-detection-with-output.rst index 5debc4e7ed88d4..fc055f6e7ae63e 100644 --- a/docs/notebooks/object-detection-with-output.rst +++ b/docs/notebooks/object-detection-with-output.rst @@ -84,7 +84,7 @@ Install requirements .. parsed-literal:: - 24717 + 24624 @@ -136,21 +136,21 @@ Download and convert the Model .. parsed-literal:: - 100%|██████████| 6.25M/6.25M [00:00<00:00, 26.9MB/s] + 100%|██████████| 6.25M/6.25M [00:00<00:00, 26.8MB/s] .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) YOLOv8n summary (fused): 168 layers, 3,151,904 parameters, 0 gradients, 8.7 GFLOPs PyTorch: starting from 'yolov8n.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (6.2 MB) OpenVINO: starting export with openvino 2024.4.0-16579-c3152d32c9c-releases/2024/4... - OpenVINO: export success ✅ 1.4s, saved as 'yolov8n_openvino_model/' (6.4 MB) + OpenVINO: export success ✅ 1.3s, saved as 'yolov8n_openvino_model/' (6.4 MB) - Export complete (1.6s) - Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam + Export complete (1.5s) + Results saved to /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/object-detection-webcam Predict: yolo predict task=detect model=yolov8n_openvino_model imgsz=640 half Validate: yolo val task=detect model=yolov8n_openvino_model imgsz=640 data=coco.yaml half Visualize: https://netron.app @@ -222,7 +222,7 @@ best performance. For that purpose, just use ``AUTO``. .. parsed-literal:: - Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.2.2+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) + Ultralytics 8.3.0 🚀 Python-3.8.10 torch-2.4.1+cpu CPU (Intel Core(TM) i9-10920X 3.50GHz) Loading yolov8n_openvino_model for OpenVINO inference... Using OpenVINO LATENCY mode for batch=1 inference... diff --git a/docs/notebooks/omniparser-with-output.rst b/docs/notebooks/omniparser-with-output.rst index 28676a03a84ba7..e22ce49105f78d 100644 --- a/docs/notebooks/omniparser-with-output.rst +++ b/docs/notebooks/omniparser-with-output.rst @@ -20,7 +20,6 @@ repo `__ and `model card `__. In this tutorial we consider how to run OmniParser using OpenVINO. - **Table of contents:** - `Prerequisites <#prerequisites>`__ @@ -72,9 +71,14 @@ Prerequisites .. code:: ipython3 - %pip install -q "torch>=2.1" easyocr torchvision accelerate "supervision==0.18.0" accelerate timm "einops==0.8.0" "ultralytics==8.1.24" pillow opencv-python "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu + import platform + + %pip install -q "torch>=2.1" easyocr torchvision accelerate "supervision==0.18.0" "transformers>=4.45" timm "einops==0.8.0" "ultralytics==8.1.24" pillow opencv-python "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "openvino>=2024.4.0" + if platform.system() == "Darwin": + %pip install -q "numpy<2.0" + .. parsed-literal:: @@ -89,16 +93,21 @@ Prerequisites notebook_utils_path = Path("notebook_utils.py") florence_helper_path = Path("ov_florence2_helper.py") + omniparser_helper_path = Path("ov_omniparser_helper.py") if not notebook_utils_path.exists(): r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - notebook_utils_path.open("w").write(r.text) + notebook_utils_path.open("w", encoding="utf-8").write(r.text) if not florence_helper_path.exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/florence2/ov_florence2_helper.py") - florence_helper_path.open("w").write(r.text) + florence_helper_path.open("w", encoding="utf-8").write(r.text) + + if not omniparser_helper_path.exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/omniparser/ov_omniparser_helper.py") + omniparser_helper_path.open("w", encoding="utf-8").write(r.text) Prepare models -------------- @@ -155,21 +164,21 @@ API. You can find more examples of this API usage in these .. parsed-literal:: - 2024-11-22 01:51:07.385705: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 01:51:07.410345: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:35:42.631431: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:35:42.657651: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. .. parsed-literal:: - weights/icon_detect/best.pt: 0%| | 0.00/11.7M [00:00=1.4.0, which is not installed. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.17.2+cpu which is incompatible. tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. - torchvision 0.17.2+cpu requires torch==2.2.2, but you have torch 2.4.1 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -250,9 +247,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. WeightNorm.apply(module, name, dim) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -266,9 +263,9 @@ True .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/wavmark/__init__.py:16: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint = torch.load(resume_path, map_location=torch.device('cpu')) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/api.py:36: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) @@ -418,38 +415,40 @@ documentation 0 - No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:283: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert ( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:346: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! pad_length = max(length - (self.window_size + 1), 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:347: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! slice_start_position = max((self.window_size + 1) - length, 0) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/attentions.py:349: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if pad_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:114: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if torch.min(inputs) < left or torch.max(inputs) > right: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:119: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_width * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:121: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if min_bin_height * num_bins > 1.0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/openvoice/OpenVoice/openvoice/transforms.py:171: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert (discriminant >= 0).all() - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 - %5559 : Float(1, 192, 153, strides=[29376, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %3293 : Float(1, 2, 43, strides=[86, 43, 1], requires_grad=0, device=cpu) = aten::randn(%3288, %3289, %3290, %3291, %3292) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 + %5559 : Float(1, 192, 150, strides=[28800, 1, 192], requires_grad=0, device=cpu) = aten::randn_like(%m_p, %5554, %5555, %5556, %5557, %5558) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 39680]) != torch.Size([1, 1, 38400]). - _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 155, 43]) != torch.Size([1, 1, 150, 43]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + Tensor-likes are not close! + + Mismatched elements: 38094 / 39424 (96.6%) + Greatest absolute difference: 0.7026380896568298 at index (0, 0, 4174) (up to 1e-05 allowed) + Greatest relative difference: 43899.56701030928 at index (0, 0, 2529) (up to 1e-05 allowed) _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 3. of the traced function does not match the corresponding output of the Python function. Detailed error: - The values for attribute 'shape' do not match: torch.Size([1, 1, 155]) != torch.Size([1, 1, 150]). + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 2. of the traced function does not match the corresponding output of the Python function. Detailed error: + Tensor-likes are not close! + + Mismatched elements: 42 / 6622 (0.6%) + Greatest absolute difference: 1.0 at index (0, 0, 7, 1) (up to 1e-05 allowed) + Greatest relative difference: inf at index (0, 0, 7, 2) (up to 1e-05 allowed) _check_trace( @@ -483,16 +482,16 @@ documentation )`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/modules/module.py:1562: UserWarning: A window was not provided. A rectangular window will be applied,which is known to cause spectral leakage. Other windows such as torch.hann_window or torch.hamming_window can are recommended to reduce spectral leakage.To suppress this warning and use a rectangular window, explicitly set `window=torch.ones(n_fft, device=)`. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:836.) return forward_call(\*args, \*\*kwargs) @@ -720,7 +719,7 @@ Load speaker embeddings .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/functional.py:666: UserWarning: stft with return_complex=False is deprecated. In a future pytorch release, stft will return complex tensors for all inputs, and return_complex=False will raise an error. Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at ../aten/src/ATen/native/SpectralOps.cpp:873.) return _VF.stft(input, n_fft, hop_length, win_length, window, # type: ignore[attr-defined] @@ -875,7 +874,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -893,7 +892,7 @@ And finally, run voice tone conversion with OpenVINO optimized model @@ -1082,7 +1081,7 @@ voice tone conversion online. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/components/dropdown.py:100: UserWarning: The `max_choices` parameter is ignored when `multiselect` is False. warnings.warn( diff --git a/docs/notebooks/optical-character-recognition-with-output.rst b/docs/notebooks/optical-character-recognition-with-output.rst index 7dae2290312e68..764bad414c61e9 100644 --- a/docs/notebooks/optical-character-recognition-with-output.rst +++ b/docs/notebooks/optical-character-recognition-with-output.rst @@ -131,13 +131,13 @@ again. .. parsed-literal:: - model/horizontal-text-detection-0001/FP16/horizontal-text-detection-0001.bin: 0%| | 0.00/3.70M [00:… + horizontal-text-detection-0001.bin: 0%| | 0.00/3.70M [00:00 + @@ -375,7 +375,7 @@ may be specified is input data .. parsed-literal:: - + @@ -413,7 +413,7 @@ then such conversion will be added explicitly. .. parsed-literal:: - + @@ -575,7 +575,7 @@ Compare results on one image .. parsed-literal:: - data/imagenet_2012.txt: 0%| | 0.00/30.9k [00:00= 3.10. Please make + sure that your environment fulfill to this requirement before running + it + +`OuteTTS-0.1-350M `__ is +a novel text-to-speech synthesis model that leverages pure language +modeling without external adapters or complex architectures, built upon +the LLaMa architecture. It demonstrates that high-quality speech +synthesis is achievable through a straightforward approach using crafted +prompts and audio tokens. + +More details about model can be found in `original +repo `__. + +In this tutorial we consider how to run OuteTTS pipeline using OpenVINO. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Convert model <#convert-model>`__ +- `Run model inference <#run-model-inference>`__ + + - `Text-to-Speech generation <#text-to-speech-generation>`__ + - `Text-to-Speech generation with Voice + Cloning <#text-to-speech-generation-with-voice-cloning>`__ + +- `Interactive demo <#interactive-demo>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +Prerequisites +------------- + + + +.. code:: ipython3 + + import platform + + %pip install -q "torch>=2.1" "torchaudio" "einops" "transformers>=4.46.1" "loguru" "inflect" "pesq" "torchcrepe" "natsort" "polars" uroman mecab-python3 unidic-lite --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q "gradio>=4.19" "openvino>=2024.4.0" "tqdm" "pyyaml" "librosa" "soundfile" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" --extra-index-url https://download.pytorch.org/whl/cpu + + if platform.system() == "Darwin": + %pip install -q "numpy<2.0.0" + +.. code:: ipython3 + + import requests + from pathlib import Path + + utility_files = ["cmd_helper.py", "notebook_utils.py"] + base_utility_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/" + + for utility_file in utility_files: + if not Path(utility_file).exists(): + r = requests.get(base_utility_url + utility_file) + with Path(utility_file).open("w") as f: + f.write(r.text) + + + helper_files = ["gradio_helper.py", "ov_outetts_helper.py"] + base_helper_url = "https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/outetts-text-to-speech" + + for helper_file in helper_files: + if not Path(helper_file).exists(): + r = requests.get(base_helper_url + helper_file) + with Path(helper_file).open("w") as f: + f.write(r.text) + +.. code:: ipython3 + + from cmd_helper import clone_repo + + repo_path = clone_repo("https://github.com/edwko/OuteTTS.git") + + interface_path = repo_path / "outetts/version/v1/interface.py" + + updated_version = interface_path.exists() + + if not updated_version: + interface_pth = repo_path / "outetts/v0_1/interface.py" + orig_interface_path = interface_path.parent / "_orig_interface.py" + + if not updated_version and not orig_interface_path.exists(): + interface_path.rename(orig_interface_path) + # sounddevice requires to install manually additional libraries, as we do not plan to use it for audio playing + # move it closer to its usage for avoid errors + with orig_interface_path.open("r") as in_file: + content = in_file.read() + upd_content = content.replace("import sounddevice as sd", "") + upd_content = upd_content.replace("sd.play", "import sounddevice as sd\n sd.play") + with interface_path.open("w") as out_file: + out_file.write(upd_content) + + %pip install -q {repo_path} --extra-index-url https://download.pytorch.org/whl/cpu + +Convert model +------------- + + + +OpenVINO supports PyTorch models via conversion to OpenVINO Intermediate +Representation format. For convenience, we will use OpenVINO integration +with HuggingFace Optimum. `Optimum +Intel `__ is the +interface between the Transformers and Diffusers libraries and the +different tools and libraries provided by Intel to accelerate end-to-end +pipelines on Intel architectures. + +Among other use cases, Optimum Intel provides a simple interface to +optimize your Transformers and Diffusers models, convert them to the +OpenVINO Intermediate Representation (IR) format and run inference using +OpenVINO Runtime. ``optimum-cli`` provides command line interface for +model conversion and optimization. + +General command format: + +.. code:: bash + + optimum-cli export openvino --model --task + +where task is task to export the model for, if not specified, the task +will be auto-inferred based on the model. You can find a mapping between +tasks and model classes in Optimum TaskManager +`documentation `__. +Additionally, you can specify weights compression using +``--weight-format`` argument with one of following options: ``fp32``, +``fp16``, ``int8`` and ``int4``. Fro int8 and int4 +`nncf `__ will be used for +weight compression. More details about model export provided in `Optimum +Intel +documentation `__. + +As OuteTTS utilizes pure language modeling approach, model conversion +process remains the same like conversion LLaMa models family for text +generation purposes. + +.. code:: ipython3 + + from cmd_helper import optimum_cli + + model_id = "OuteAI/OuteTTS-0.1-350M" + model_dir = Path(model_id.split("/")[-1] + "-ov") + + if not model_dir.exists(): + optimum_cli(model_id, model_dir, additional_args={"task": "text-generation-with-past"}) + +Run model inference +------------------- + + + +OpenVINO integration with Optimum Intel provides ready-to-use API for +model inference that can be used for smooth integration with +transformers-based solutions. For loading model, we will use +``OVModelForCausalLM`` class that have compatible interface with +Transformers LLaMa implementation. For loading a model, +``from_pretrained`` method should be used. It accepts path to the model +directory or model_id from HuggingFace hub (if model is not converted to +OpenVINO format, conversion will be triggered automatically). +Additionally, we can provide an inference device, quantization config +(if model has not been quantized yet) and device-specific OpenVINO +Runtime configuration. More details about model inference with Optimum +Intel can be found in +`documentation `__. +We will use ``OVModelForCausalLM`` as replacement of original +``AutoModelForCausalLM`` in ``InterfaceHF``. + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget(exclude=["NPU"]) + + device + + + + +.. parsed-literal:: + + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') + + + +.. code:: ipython3 + + from ov_outetts_helper import InterfaceOV, OVHFModel # noqa: F401 + + # Uncomment these lines to see pipeline details + # ??InterfaceOV + # ??OVHFModel + + +.. parsed-literal:: + + 2024-11-29 11:48:51.975233: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-11-29 11:48:51.989550: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered + WARNING: All log messages before absl::InitializeLog() is called are written to STDERR + E0000 00:00:1732866532.005718 2314480 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered + E0000 00:00:1732866532.010517 2314480 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered + 2024-11-29 11:48:52.027376: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + + +.. code:: ipython3 + + interface = InterfaceOV(model_dir, device.value) + + +.. parsed-literal:: + + making attention of type 'vanilla' with 768 in_channels + + +Text-to-Speech generation +~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Now let’s see model in action. Providing input text to ``generate`` +method of interface, model returns tensor that represents output audio +with random speaker characteristics. + +.. code:: ipython3 + + output = interface.generate(text="Hello, I'm working!", temperature=0.1, repetition_penalty=1.1, max_length=4096) + + +.. parsed-literal:: + + The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. + Setting `pad_token_id` to `eos_token_id`:None for open-end generation. + The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. + + +.. code:: ipython3 + + import IPython.display as ipd + + ipd.Audio(output.audio[0].numpy(), rate=output.sr) + + + + +.. raw:: html + + + + + + + +Text-to-Speech generation with Voice Cloning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Additionally, we can specify reference voice for generation by providing +reference audio and transcript for it. ``interface.create_speaker`` +processes reference audio and text to set of features used for audio +description. + +.. code:: ipython3 + + from notebook_utils import download_file + + ref_audio_url = "https://huggingface.co/OuteAI/OuteTTS-0.1-350M/resolve/main/samples/2.wav" + + file_path = download_file(ref_audio_url) + + +.. parsed-literal:: + + '2.wav' already exists. + + +.. code:: ipython3 + + ipd.Audio(file_path) + + + + +.. raw:: html + + + + + + + +.. code:: ipython3 + + speaker = interface.create_speaker(file_path, "Hello, I can speak pretty well, but sometimes I make some mistakes.") + + # Save the speaker to a file + interface.save_speaker(speaker, "speaker.pkl") + + # Load the speaker from a file + speaker = interface.load_speaker("speaker.pkl") + + # Generate TTS with the custom voice + output = interface.generate(text="This is a cloned voice speaking", speaker=speaker, temperature=0.1, repetition_penalty=1.1, max_length=4096) + + +.. parsed-literal:: + + The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. + Setting `pad_token_id` to `eos_token_id`:None for open-end generation. + + +.. code:: ipython3 + + ipd.Audio(output.audio[0].numpy(), rate=output.sr) + + + + +.. raw:: html + + + + + + + +Interactive demo +---------------- + + + +.. code:: ipython3 + + from gradio_helper import make_demo + + demo = make_demo(interface) + + try: + demo.launch(debug=True) + except Exception: + demo.launch(share=True, debug=True) diff --git a/docs/notebooks/paddle-ocr-webcam-with-output.rst b/docs/notebooks/paddle-ocr-webcam-with-output.rst index 3fae2e47d99b24..aa054a40e73a07 100644 --- a/docs/notebooks/paddle-ocr-webcam-with-output.rst +++ b/docs/notebooks/paddle-ocr-webcam-with-output.rst @@ -214,7 +214,7 @@ Download the Model for Text **Detection** .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-no… + ch_PP-OCRv3_det_infer.tar: 0%| | 0.00/3.65M [00:00 + @@ -439,7 +439,7 @@ Note that many optimizations are possible to improve the performance. .. parsed-literal:: - PaddlePaddle model on CPU: 0.0069 seconds per image, FPS: 144.32 + PaddlePaddle model on CPU: 0.0071 seconds per image, FPS: 141.67 PaddlePaddle result: Labrador retriever, 0.75138 @@ -500,7 +500,7 @@ select device from dropdown list for running inference using OpenVINO .. parsed-literal:: - OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0026 seconds per image, FPS: 380.57 + OpenVINO IR model in OpenVINO Runtime (AUTO): 0.0027 seconds per image, FPS: 376.00 OpenVINO result: Labrador retriever, 0.74909 diff --git a/docs/notebooks/parler-tts-text-to-speech-with-output.rst b/docs/notebooks/parler-tts-text-to-speech-with-output.rst index 323959aa17e8ef..2be3c2a4a2c7ed 100644 --- a/docs/notebooks/parler-tts-text-to-speech-with-output.rst +++ b/docs/notebooks/parler-tts-text-to-speech-with-output.rst @@ -9,7 +9,7 @@ with synthetic annotations `__ by Dan Lyth and Simon King, from Stability AI and Edinburgh University respectively. -.. image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w +|image0| Text-to-speech models trained on large-scale datasets have demonstrated impressive in-context learning capabilities and naturalness. However, @@ -53,6 +53,8 @@ need a Jupyter server to start. For details, please refer to `Installation Guide `__. +.. |image0| image:: https://images.squarespace-cdn.com/content/v1/657816dfbefe0533e8a69d9a/30c96e25-acc5-4019-acdd-648da6142c4c/architecture_v3.png?format=2500w + Prerequisites ------------- @@ -64,8 +66,32 @@ Prerequisites os.environ["GIT_CLONE_PROTECTION_ACTIVE"] = "false" + %pip uninstall -q -y torch torchvision torchaudio %pip install -q "openvino>=2024.2.0" - %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -q git+https://github.com/huggingface/parler-tts.git "gradio>=4.19" transformers "torch>=2.2" "torchaudio" --extra-index-url https://download.pytorch.org/whl/cpu + + +.. parsed-literal:: + + Note: you may need to restart the kernel to use updated packages. + Note: you may need to restart the kernel to use updated packages. + ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + easyocr 1.7.2 requires torchvision>=0.5, which is not installed. + mobileclip 0.1.0 requires clip-benchmark>=1.4.0, which is not installed. + mobileclip 0.1.0 requires torchvision==0.14.1, which is not installed. + open-clip-torch 2.22.0 requires torchvision, which is not installed. + timm 1.0.12 requires torchvision, which is not installed. + ultralytics 8.1.24 requires torchvision>=0.9.0, which is not installed. + open-clip-torch 2.22.0 requires protobuf<4, but you have protobuf 4.25.5 which is incompatible. + tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. + tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. + tensorflow 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 2.13.0 which is incompatible. + tensorflow-cpu 2.13.1 requires numpy<=1.24.3,>=1.22, but you have numpy 1.24.4 which is incompatible. + tensorflow-cpu 2.13.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.12.2 which is incompatible. + tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.25.5 which is incompatible. + Note: you may need to restart the kernel to use updated packages. + Load the original model and inference ------------------------------------- @@ -95,6 +121,135 @@ Load the original model and inference audio_arr = generation.cpu().numpy().squeeze() sf.write("parler_tts_out.wav", audio_arr, model.config.sampling_rate) + +.. parsed-literal:: + + 2024-12-10 02:43:30.030324: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:43:30.055592: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. + Flash attention 2 is not installed + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`. + WeightNorm.apply(module, name, dim) + Config of the text_encoder: is overwritten by shared text_encoder config: T5Config { + "_name_or_path": "google/flan-t5-base", + "architectures": [ + "T5ForConditionalGeneration" + ], + "classifier_dropout": 0.0, + "d_ff": 2048, + "d_kv": 64, + "d_model": 768, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "n_positions": 512, + "num_decoder_layers": 12, + "num_heads": 12, + "num_layers": 12, + "output_past": true, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "task_specific_params": { + "summarization": { + "early_stopping": true, + "length_penalty": 2.0, + "max_length": 200, + "min_length": 30, + "no_repeat_ngram_size": 3, + "num_beams": 4, + "prefix": "summarize: " + }, + "translation_en_to_de": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to German: " + }, + "translation_en_to_fr": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to French: " + }, + "translation_en_to_ro": { + "early_stopping": true, + "max_length": 300, + "num_beams": 4, + "prefix": "translate English to Romanian: " + } + }, + "tie_word_embeddings": false, + "transformers_version": "4.46.1", + "use_cache": true, + "vocab_size": 32128 + } + + Config of the audio_encoder: is overwritten by shared audio_encoder config: DACConfig { + "_name_or_path": "ylacombe/dac_44khZ_8kbps", + "architectures": [ + "DACModel" + ], + "codebook_size": 1024, + "frame_rate": 86, + "latent_dim": 1024, + "model_bitrate": 8, + "model_type": "dac_on_the_hub", + "num_codebooks": 9, + "sampling_rate": 44100, + "torch_dtype": "float32", + "transformers_version": "4.46.1" + } + + Config of the decoder: is overwritten by shared decoder config: ParlerTTSDecoderConfig { + "_name_or_path": "/fsx/yoach/tmp/artefacts/decoder_400M/", + "activation_dropout": 0.0, + "activation_function": "gelu", + "add_cross_attention": true, + "architectures": [ + "ParlerTTSForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1025, + "codebook_weights": null, + "cross_attention_implementation_strategy": null, + "dropout": 0.1, + "eos_token_id": 1024, + "ffn_dim": 4096, + "hidden_size": 1024, + "initializer_factor": 0.02, + "is_decoder": true, + "layerdrop": 0.0, + "max_position_embeddings": 4096, + "model_type": "parler_tts_decoder", + "num_attention_heads": 16, + "num_codebooks": 9, + "num_cross_attention_key_value_heads": 16, + "num_hidden_layers": 24, + "num_key_value_heads": 16, + "pad_token_id": 1024, + "rope_embeddings": false, + "rope_theta": 10000.0, + "scale_embedding": false, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.46.1", + "use_cache": true, + "use_fused_lm_heads": false, + "vocab_size": 1088 + } + + You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers + The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results. + + .. code:: ipython3 import IPython.display as ipd @@ -108,10 +263,10 @@ Load the original model and inference - + @@ -159,6 +314,20 @@ and Decoder (``ParlerTTSDecoder``). Lets convert them one by one. text_encoder_ov_model = convert(model.text_encoder, TEXT_ENCODER_OV_PATH, example_input) + +.. parsed-literal:: + + WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11. + + +.. parsed-literal:: + + [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + warnings.warn( + `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. + + The Decoder Model performs in generation pipeline and we can separate it into two stage. In the first stage the model generates ``past_key_values`` into output for the second stage. In the second @@ -193,6 +362,17 @@ stage the model produces tokens during several runs. decoder_1_ov_model = convert(DecoderStage1Wrapper(model.decoder.model.decoder), DECODER_STAGE_1_OV_PATH, example_input) + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:367: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if seq_len > self.weights.size(0): + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:1713: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if sequence_length != 1: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/parler_tts/modeling_parler_tts.py:916: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): + + .. code:: ipython3 DECODER_STAGE_2_OV_PATH = Path("models/decoder_stage_2_ir.xml") @@ -231,6 +411,15 @@ stage the model produces tokens during several runs. decoder_2_ov_model = convert(DecoderStage2Wrapper(model.decoder.model.decoder), DECODER_STAGE_2_OV_PATH, example_input) + +.. parsed-literal:: + + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:458: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + or len(self.key_cache[layer_idx]) == 0 # the layer has no cache + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors + + Compiling models and inference ------------------------------ @@ -258,7 +447,7 @@ Select device from dropdown list for running inference using OpenVINO. .. parsed-literal:: - Dropdown(description='Device:', index=4, options=('CPU', 'GPU.0', 'GPU.1', 'GPU.2', 'AUTO'), value='AUTO') + Dropdown(description='Device:', index=1, options=('CPU', 'AUTO'), value='AUTO') @@ -360,10 +549,10 @@ and run inference. - + @@ -406,13 +595,27 @@ Interactive inference demo = make_demo(fn=infer) try: - demo.queue().launch(debug=True) + demo.queue().launch(debug=False) except Exception: - demo.queue().launch(share=True, debug=True) + demo.queue().launch(share=True, debug=False) # if you are launching remotely, specify server_name and server_port # demo.launch(server_name='your server name', server_port='server port in int') # Read more in the docs: https://gradio.app/docs/ + +.. parsed-literal:: + + Running on local URL: http://127.0.0.1:7860 + + To create a public link, set `share=True` in `launch()`. + + + + + + + + .. code:: ipython3 # please uncomment and run this cell for stopping gradio interface diff --git a/docs/notebooks/person-tracking-with-output.rst b/docs/notebooks/person-tracking-with-output.rst index 653a9b376edf7e..6ac8ff43e05ab2 100644 --- a/docs/notebooks/person-tracking-with-output.rst +++ b/docs/notebooks/person-tracking-with-output.rst @@ -148,7 +148,7 @@ Imports import collections from pathlib import Path import time - + import numpy as np import cv2 from IPython import display @@ -158,17 +158,17 @@ Imports .. code:: ipython3 # Import local modules - + if not Path("./notebook_utils.py").exists(): # Fetch `notebook_utils` module import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) - + open("notebook_utils.py", "w").write(r.text) - + import notebook_utils as utils from deepsort_utils.tracker import Tracker from deepsort_utils.nn_matching import NearestNeighborDistanceMetric @@ -200,36 +200,36 @@ by the cosine distance. .. code:: ipython3 from notebook_utils import download_ir_model - + # A directory where the model will be downloaded. base_model_dir = "model" precision = "FP16" # The name of the model from Open Model Zoo detection_model_name = "person-detection-0202" - - + + download_det_model_url = ( f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{detection_model_name}/{precision}/{detection_model_name}.xml" ) - + detection_model_path = download_ir_model(download_det_model_url, Path(base_model_dir) / detection_model_name / precision) - + reidentification_model_name = "person-reidentification-retail-0287" download_reid_model_url = f"https://storage.openvinotoolkit.org/repositories/open_model_zoo/2023.0/models_bin/1/{reidentification_model_name}/{precision}/{reidentification_model_name}.xml" - + reidentification_model_path = download_ir_model(download_reid_model_url, Path(base_model_dir) / reidentification_model_name / precision) .. parsed-literal:: - model/person-detection-0202/FP16/person-detection-0202.bin: 0%| | 0.00/3.47M [00:00 200: processing_times.popleft() - + _, f_width = frame.shape[:2] # Mean processing time [ms]. processing_time = np.mean(processing_times) * 1100 fps = 1000 / processing_time - + # Get poses from detection results. bbox_xywh, score, label = process_results(h, w, results=output) - + img_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = xywh_to_xyxy(box, h, w) img = frame[y1:y2, x1:x2] img_crops.append(img) - + # Get reidentification feature of each person. if img_crops: # preprocess @@ -615,17 +614,17 @@ video file. features = extractor.predict(img_batch) else: features = np.array([]) - + # Wrap the detection and reidentification results together bbox_tlwh = xywh_to_tlwh(bbox_xywh) detections = [Detection(bbox_tlwh[i], features[i]) for i in range(features.shape[0])] - + # predict the position of tracking target tracker.predict() - + # update tracker tracker.update(detections) - + # update bbox identities outputs = [] for track in tracker.tracks: @@ -637,14 +636,14 @@ video file. outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int32)) if len(outputs) > 0: outputs = np.stack(outputs, axis=0) - + # draw box for visualization if len(outputs) > 0: bbox_tlwh = [] bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] frame = draw_boxes(frame, bbox_xyxy, identities) - + cv2.putText( img=frame, text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", @@ -655,7 +654,7 @@ video file. thickness=1, lineType=cv2.LINE_AA, ) - + if use_popup: cv2.imshow(winname=title, mat=frame) key = cv2.waitKey(1) @@ -670,7 +669,7 @@ video file. # Display the image in this notebook. display.clear_output(wait=True) display.display(i) - + # ctrl-c except KeyboardInterrupt: print("Interrupted") @@ -724,11 +723,11 @@ will work. .. code:: ipython3 USE_WEBCAM = False - + cam_id = 0 video_file = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/video/people.mp4" source = cam_id if USE_WEBCAM else video_file - + run_person_tracking(source=source, flip=USE_WEBCAM, use_popup=False) diff --git a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png index f827c9c1094e46..972cc9e5977684 100644 --- a/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png +++ b/docs/notebooks/person-tracking-with-output_files/person-tracking-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5dffde5665ae619cc99fddef72befb32d1002becce56dfccf50e7577f1fab020 -size 218904 +oid sha256:1c04ed0e53cb210bd7853d3daa7f77a0a087b8e08099b837d3237b025c223b5d +size 218593 diff --git a/docs/notebooks/phi-3-vision-with-output.rst b/docs/notebooks/phi-3-vision-with-output.rst index 71981daac13be4..dc588206768c93 100644 --- a/docs/notebooks/phi-3-vision-with-output.rst +++ b/docs/notebooks/phi-3-vision-with-output.rst @@ -260,8 +260,8 @@ documentation 1 or self.sliding_window is not None) and self.is_causal: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:444: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! seq_len = seq_len or torch.max(position_ids) + 1 /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:445: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if seq_len > self.original_max_position_embeddings: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/nncf/torch/dynamic_graph/wrappers.py:86: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect. op1 = operator(\*args, \*\*kwargs) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors /opt/home/k8sworker/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3.5-vision-instruct/4a0d683eba9f1d0cbfb6151705d1ee73c25a80ca/modeling_phi3_v.py:683: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): @@ -374,7 +365,7 @@ documentation =4.0.0, but you have protobuf 3.20.3 which is incompatible. + parler-tts 0.2.2 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. @@ -210,8 +210,8 @@ PhotoMaker to generate the original PhotoMaker pipeline. .. parsed-literal:: - 2024-11-22 02:03:50.933677: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 02:03:50.958255: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:49:18.726948: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:49:18.751780: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -230,6 +230,12 @@ PhotoMaker to generate the original PhotoMaker pipeline. Loading pipeline components...: 0%| | 0/7 [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -587,15 +584,15 @@ original Stable Diffusion XL model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1103: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:146: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: diff --git a/docs/notebooks/pixart-with-output.rst b/docs/notebooks/pixart-with-output.rst index 517191e17501ef..fed1f6b3dada41 100644 --- a/docs/notebooks/pixart-with-output.rst +++ b/docs/notebooks/pixart-with-output.rst @@ -118,8 +118,8 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: - 2024-11-22 02:11:50.540718: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 02:11:50.565755: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 02:57:23.724286: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 02:57:23.749610: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -132,6 +132,8 @@ directly in latent space, achieving super fast inference with few steps. .. parsed-literal:: You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565 + Some weights of the model checkpoint were not used when initializing PixArtTransformer2DModel: + ['caption_projection.y_embedding'] @@ -140,12 +142,6 @@ directly in latent space, achieving super fast inference with few steps. Loading checkpoint shards: 0%| | 0/4 [00:00= 64: @@ -452,7 +448,7 @@ And insert wrappers instances in the pipeline: .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -567,7 +563,7 @@ To collect intermediate model inputs for calibration we should customize .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/configuration_utils.py:140: FutureWarning: Accessing config attribute `_execution_device` directly via 'PixArtAlphaPipeline' object attribute is deprecated. Please access '_execution_device' over 'PixArtAlphaPipeline's config object instead, e.g. 'scheduler.config._execution_device'. deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False) @@ -1625,16 +1621,16 @@ pipelines. Loading pipeline components...: 0%| | 0/5 [00:00 0.19 and openvino version <= 2024.4. Please downgrade to tokenizers version <= 0.19 to export tokenizers to OpenVINO. - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 6% (1 / 281) │ 0% (0 / 280) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 94% (280 / 281) │ 100% (280 / 280) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:05:31 • 0:00:00 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 6% (3 / 172) │ 0% (0 / 169) │ - ├────────────────┼─────────────────────────────┼────────────────────────────────────────┤ - │ 4 │ 94% (169 / 172) │ 100% (169 / 169) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:12 • 0:00:00 - INFO:nncf:Statistics of the bitwidth distribution: - ┍━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑ - │ Num bits (N) │ % all parameters (layers) │ % ratio-defining parameters (layers) │ - ┝━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥ - │ 8 │ 100% (1 / 1) │ 0% (0 / 0) │ - ┕━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙ - Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:05 • 0:00:00 - + Traceback (most recent call last): + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/bin/optimum-cli", line 10, in + sys.exit(main()) + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/optimum_cli.py", line 208, in main + service.run() + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/commands/export/openvino.py", line 390, in run + main_export( + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/exporters/openvino/__main__.py", line 476, in main_export + _weight_only_quantization(submodel, quantization_config) + File "/opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/optimum/intel/openvino/quantization.py", line 938, in _weight_only_quantization + return nncf.compress_weights( + TypeError: compress_weights() got an unexpected keyword argument 'backup_mode' + Run model inference ------------------- @@ -541,8 +528,8 @@ Intel can be found in .. parsed-literal:: - 2024-11-22 03:06:17.214277: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 03:06:17.240005: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 03:48:41.700649: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 03:48:41.726260: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -587,7 +574,7 @@ Intel can be found in .. parsed-literal:: - The unusual aspect of this image is that the cat is lying inside a cardboard box, which is not a typical setting for a cat. Cats are often known for their affinity for boxes, but it is still considered unusual to see a cat comfortably resting inside a box in a living room setting. The cat appears relaxed and content, which adds to the charm of the scene. The presence of a sofa in the background further emphasizes the domestic and cozy atmosphere of the image. + The unusual aspect of this image is that the cat is lying on its back inside a cardboard box. This is not a typical position for a cat, as they usually prefer to curl up or lie on their sides when resting. Additionally, cats are known for their love of small, enclosed spaces, but it is less common to see a cat lying on its back in such a setting. The image captures a playful and relaxed moment, highlighting the cat's comfort and curiosity. Interactive demo diff --git a/docs/notebooks/pose-estimation-with-output.rst b/docs/notebooks/pose-estimation-with-output.rst index e827bd19acfd34..112b6037d4907f 100644 --- a/docs/notebooks/pose-estimation-with-output.rst +++ b/docs/notebooks/pose-estimation-with-output.rst @@ -126,13 +126,13 @@ precision in the code below. .. parsed-literal:: - model/intel/human-pose-estimation-0001/FP16-INT8/human-pose-estimation-0001.xml: 0%| | 0.00/474k [0… + human-pose-estimation-0001.xml: 0%| | 0.00/474k [00:00 target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors diff --git a/docs/notebooks/qwen2-vl-with-output.rst b/docs/notebooks/qwen2-vl-with-output.rst index d9c51a151e5926..ea0541fe1610a7 100644 --- a/docs/notebooks/qwen2-vl-with-output.rst +++ b/docs/notebooks/qwen2-vl-with-output.rst @@ -55,10 +55,8 @@ In this tutorial we consider how to convert and optimize Qwen2VL model for creating multimodal chatbot. Additionally, we demonstrate how to apply stateful transformation on LLM part and model optimization techniques like weights compression using -`NNCF `__ - - -**Table of contents:** +`NNCF `__ #### Table of +contents: - `Prerequisites <#prerequisites>`__ - `Select model <#select-model>`__ @@ -106,11 +104,11 @@ Prerequisites from pathlib import Path import requests - + if not Path("ov_qwen2_vl.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/qwen2-vl/ov_qwen2_vl.py") open("ov_qwen2_vl.py", "w").write(r.text) - + if not Path("notebook_utils.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") open("notebook_utils.py", "w").write(r.text) @@ -128,9 +126,9 @@ using widget bellow: .. code:: ipython3 from ov_qwen2_vl import model_selector - + model_id = model_selector() - + model_id @@ -141,8 +139,8 @@ using widget bellow: .. parsed-literal:: - 2024-11-22 04:16:41.832996: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 04:16:41.858520: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:00:06.245590: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:00:06.272261: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -287,20 +285,20 @@ documentation target_length: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/cache_utils.py:443: TracerWarning: Using len to get tensor shape might cause the trace to be incorrect. Recommended usage would be tensor.shape[0]. Passing a tensor of different shape might lead to errors or silently give incorrect results. elif len(self.key_cache[layer_idx]) == 0: # fills previously skipped layers; checking for tensor causes errors @@ -442,7 +431,7 @@ Intel `__ .. code:: ipython3 from ov_qwen2_vl import OVQwen2VLModel - + # Uncomment below lines to see the model inference class code # OVQwen2VLModel?? @@ -454,9 +443,9 @@ Select inference device .. code:: ipython3 from notebook_utils import device_widget - + device = device_widget(default="AUTO", exclude=["NPU"]) - + device @@ -483,25 +472,25 @@ Run model inference from transformers import AutoProcessor, AutoTokenizer from qwen_vl_utils import process_vision_info from transformers import TextStreamer - - + + min_pixels = 256 * 28 * 28 max_pixels = 1280 * 28 * 28 processor = AutoProcessor.from_pretrained(model_dir, min_pixels=min_pixels, max_pixels=max_pixels) - + if processor.chat_template is None: tok = AutoTokenizer.from_pretrained(model_dir) processor.chat_template = tok.chat_template - + example_image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg" example_image_path = Path("demo.jpeg") - + if not example_image_path.exists(): Image.open(requests.get(example_image_url, stream=True).raw).save(example_image_path) - + image = Image.open(example_image_path) question = "Describe this image." - + messages = [ { "role": "user", @@ -514,7 +503,7 @@ Run model inference ], } ] - + # Preparation for inference text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) image_inputs, video_inputs = process_vision_info(messages) @@ -525,12 +514,12 @@ Run model inference padding=True, return_tensors="pt", ) - + display(image) print("Question:") print(question) print("Answer:") - + generated_ids = model.generate(**inputs, max_new_tokens=100, streamer=TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)) @@ -573,10 +562,10 @@ click ``Submit`` to start communication. .. code:: ipython3 from gradio_helper import make_demo - - + + demo = make_demo(model, processor) - + try: demo.launch(debug=False) except Exception: @@ -589,9 +578,9 @@ click ``Submit`` to start communication. .. parsed-literal:: Running on local URL: http://127.0.0.1:7860 - + Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB - + To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/rmbg-background-removal-with-output.rst b/docs/notebooks/rmbg-background-removal-with-output.rst index c2e7286cc35cb4..0961afb2bf1ef5 100644 --- a/docs/notebooks/rmbg-background-removal-with-output.rst +++ b/docs/notebooks/rmbg-background-removal-with-output.rst @@ -112,8 +112,8 @@ it may take some time. .. parsed-literal:: - 2024-11-22 04:19:11.305790: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 04:19:11.330949: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:02:42.657474: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:02:42.682685: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -240,7 +240,7 @@ function or directly loading on device using ``core.complie_model``. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. diff --git a/docs/notebooks/segment-anything-2-image-with-output.rst b/docs/notebooks/segment-anything-2-image-with-output.rst index 1e938df4a9763a..d9b24bf720325b 100644 --- a/docs/notebooks/segment-anything-2-image-with-output.rst +++ b/docs/notebooks/segment-anything-2-image-with-output.rst @@ -120,24 +120,20 @@ Prerequisites .. parsed-literal:: - ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. - mobileclip 0.1.0 requires clip-benchmark>=1.4.0, which is not installed. - mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.19.1+cpu which is incompatible. - parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. Collecting iopath>=0.1.10 Using cached iopath-0.1.10-py3-none-any.whl - Requirement already satisfied: pillow>=9.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (10.4.0) - Requirement already satisfied: hydra-core>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.3.2) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.67.0) - Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.12.2) - Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (3.0.0) - Requirement already satisfied: omegaconf<2.4,>=2.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (2.3.0) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (4.9.3) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (24.2) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (6.4.5) - Requirement already satisfied: PyYAML>=5.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.3.2) (6.0.2) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.3.2) (3.20.2) + Requirement already satisfied: pillow>=9.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (10.4.0) + Requirement already satisfied: hydra-core>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.3.2) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.67.1) + Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.12.2) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (3.0.0) + Requirement already satisfied: omegaconf<2.4,>=2.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (2.3.0) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (4.9.3) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (24.2) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (6.4.5) + Requirement already satisfied: PyYAML>=5.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.3.2) (6.0.2) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.3.2) (3.20.2) Installing collected packages: iopath Attempting uninstall: iopath Found existing installation: iopath 0.1.9 @@ -190,10 +186,10 @@ Clone and install segment-anything-2 .. parsed-literal:: env: SAM2_BUILD_CUDA=0 - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation/sam2 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation/sam2 ERROR: Package 'sam-2' requires a different Python: 3.8.10 not in '>=3.10.0' Note: you may need to restart the kernel to use updated packages. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sam2-image-segmentation .. code:: ipython3 @@ -397,7 +393,8 @@ Mask prediction will be includes two models: * **Prompt Encoder** - Encoder for segmentation condition. As a condition can be used points, boxes or segmentation mask. -* **Mask Decoder** - The mask decoder efficiently maps the image embedding, prompt embeddings, and an output +* **Mask Decoder** - The mask decoder + efficiently maps the image embedding, prompt embeddings, and an output token to a mask. Combined prompt encoder and mask decoder model has following list of @@ -488,12 +485,6 @@ Example Image image = cv2.imread("truck.jpg") image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - -.. parsed-literal:: - - 'truck.jpg' already exists. - - .. code:: ipython3 plt.figure(figsize=(10, 10)) diff --git a/docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_92_0.png b/docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_92_0.png new file mode 100644 index 00000000000000..343e5ecc49fc50 --- /dev/null +++ b/docs/notebooks/segment-anything-2-image-with-output_files/segment-anything-2-image-with-output_92_0.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:520c7390de98864c4ae6b24b940230e83f2b5fc0b1723d58ed9941cc2d9bc70f +size 469439 diff --git a/docs/notebooks/segment-anything-2-video-with-output.rst b/docs/notebooks/segment-anything-2-video-with-output.rst index 20aae9f8a5e3f9..dec5f3d63f341e 100644 --- a/docs/notebooks/segment-anything-2-video-with-output.rst +++ b/docs/notebooks/segment-anything-2-video-with-output.rst @@ -110,18 +110,18 @@ Prerequisites .. parsed-literal:: Note: you may need to restart the kernel to use updated packages. - Requirement already satisfied: iopath>=0.1.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.1.10) - Requirement already satisfied: pillow>=9.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (10.4.0) - Requirement already satisfied: hydra-core>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.3.2) - Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.67.0) - Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.12.2) - Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (3.0.0) - Requirement already satisfied: omegaconf<2.4,>=2.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (2.3.0) - Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (4.9.3) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (24.2) - Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (6.4.5) - Requirement already satisfied: PyYAML>=5.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.3.2) (6.0.2) - Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.3.2) (3.20.2) + Requirement already satisfied: iopath>=0.1.10 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (0.1.10) + Requirement already satisfied: pillow>=9.4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (10.4.0) + Requirement already satisfied: hydra-core>=1.3.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.3.2) + Requirement already satisfied: tqdm in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.67.1) + Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (4.12.2) + Requirement already satisfied: portalocker in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from iopath>=0.1.10) (3.0.0) + Requirement already satisfied: omegaconf<2.4,>=2.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (2.3.0) + Requirement already satisfied: antlr4-python3-runtime==4.9.* in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (4.9.3) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (24.2) + Requirement already satisfied: importlib-resources in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from hydra-core>=1.3.2) (6.4.5) + Requirement already satisfied: PyYAML>=5.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from omegaconf<2.4,>=2.2->hydra-core>=1.3.2) (6.0.2) + Requirement already satisfied: zipp>=3.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from importlib-resources->hydra-core>=1.3.2) (3.20.2) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -174,7 +174,7 @@ Clone and install segment-anything-2 .. parsed-literal:: env: SAM2_BUILD_CUDA=0 - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sam2-video-segmentation/sam2 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sam2-video-segmentation/sam2 .. parsed-literal:: @@ -203,7 +203,7 @@ Clone and install segment-anything-2 ERROR: Package 'sam-2' requires a different Python: 3.8.10 not in '>=3.10.0' Note: you may need to restart the kernel to use updated packages. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sam2-video-segmentation + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sam2-video-segmentation .. code:: ipython3 @@ -601,14 +601,14 @@ Prepare data .. parsed-literal:: - data/coco.mp4: 0%| | 0.00/877k [00:00 - + Your browser does not support the video tag. @@ -840,7 +840,7 @@ Example with box .. parsed-literal:: - frame loading (JPEG): 100%|██████████| 50/50 [00:00<00:00, 52.72it/s] + frame loading (JPEG): 100%|██████████| 25/25 [00:00<00:00, 54.66it/s] @@ -877,7 +877,7 @@ Example with box .. parsed-literal:: - propagate in video: 100%|██████████| 50/50 [07:47<00:00, 9.35s/it] + propagate in video: 100%|██████████| 25/25 [03:37<00:00, 8.71s/it] .. code:: ipython3 @@ -894,7 +894,7 @@ Example with box .. raw:: html @@ -927,7 +927,7 @@ Run Interactive For Video Segmentation with Gradio .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/analytics.py:106: UserWarning: IMPORTANT: You are using gradio version 4.40.0, however version 4.44.1 is available, please upgrade. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/analytics.py:106: UserWarning: IMPORTANT: You are using gradio version 4.40.0, however version 4.44.1 is available, please upgrade. -------- warnings.warn( diff --git a/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_40_1.png b/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_40_1.png index 5721f78113b9a5..8b2efbd6f030df 100644 --- a/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_40_1.png +++ b/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_40_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dce79554325cf25434872511d2c96b1361ab4a3e14f23a936d227177ee98836f -size 193590 +oid sha256:bdf6f36d230ce5b74e070f0abb2e3672a1ae3f31094c2444a0e0623b95f1bf35 +size 193591 diff --git a/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_46_0.png b/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_46_0.png index e18f213004313f..65df892bd6e8c2 100644 --- a/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_46_0.png +++ b/docs/notebooks/segment-anything-2-video-with-output_files/segment-anything-2-video-with-output_46_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00bfe0191dd680f768ea740c00fc4e6d9054e72c250971fc8e12807159a26644 -size 190195 +oid sha256:6b3a974acb951d94d941f150b640a1dcce172f6974085774adbf06e22adeb386 +size 190202 diff --git a/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst b/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst index a38b7c56a2ec8a..a1738642568a2b 100644 --- a/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst +++ b/docs/notebooks/siglip-zero-shot-image-classification-with-output.rst @@ -120,8 +120,8 @@ tokenizer and preparing the images. .. parsed-literal:: - 2024-11-22 04:41:05.723109: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 04:41:05.748466: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:15:56.596890: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:15:56.621776: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -258,7 +258,7 @@ object ready to load on the device and start making predictions. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. @@ -604,7 +604,7 @@ model are similar to the PyTorch model. .. parsed-literal:: - [{'dog': 0.99}, {'horse': 0.0}, {'cat': 0.0}, {'wolf': 0.0}, {'tiger': 0.0}] + [{'dog': 0.99}, {'horse': 0.0}, {'cat': 0.0}, {'wolf': 0.0}, {'frog': 0.0}] @@ -679,7 +679,7 @@ approximately estimate the speed up of the dynamic quantized models. .. parsed-literal:: - Performance speed up: 2.016 + Performance speed up: 1.907 Interactive inference diff --git a/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png b/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png index 611278a49d1583..6e5afc5acf92a6 100644 --- a/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png +++ b/docs/notebooks/siglip-zero-shot-image-classification-with-output_files/siglip-zero-shot-image-classification-with-output_24_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2f15546e58fac808ed62a6fcc29f2b58b48a974070a9d8c0b5c199c533b747d0 -size 580998 +oid sha256:3ebc30e695ed16710b909a552137d214ca9defb109984e4da59e8b684ce59427 +size 581000 diff --git a/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output.rst b/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output.rst index e9fcfb3f8baa9f..a5b31e15d97ec2 100644 --- a/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output.rst +++ b/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output.rst @@ -61,8 +61,8 @@ and install required packages. ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. modelscope-studio 0.5.2 requires gradio<6.0,>=4.0, but you have gradio 3.43.1 which is incompatible. - parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. - parler-tts 0.2.1 requires transformers<=4.46.1,>=4.46.1, but you have transformers 4.46.3 which is incompatible. + parler-tts 0.2.2 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. + parler-tts 0.2.2 requires transformers<=4.46.1,>=4.46.1, but you have transformers 4.46.3 which is incompatible. Note: you may need to restart the kernel to use updated packages. @@ -121,7 +121,7 @@ and install required packages. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sketch-to-image-pix2pix-turbo/img2img-turbo + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sketch-to-image-pix2pix-turbo/img2img-turbo Load PyTorch model @@ -381,10 +381,10 @@ diagram indicate trainable layers. Semi-transparent layers are frozen. .. parsed-literal:: - 2024-11-22 04:46:27.445712: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 04:46:27.471919: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:21:48.209793: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:21:48.234621: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/outputs.py:63: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead. torch.utils._pytree._register_pytree_node( @@ -402,7 +402,7 @@ diagram indicate trainable layers. Semi-transparent layers are frozen. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`. warnings.warn( @@ -413,8 +413,8 @@ diagram indicate trainable layers. Semi-transparent layers are frozen. .. parsed-literal:: - 100%|██████████| 525M/525M [18:17<00:00, 478kiB/s] - /tmp/ipykernel_3576883/2531017353.py:172: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + 100%|██████████| 525M/525M [07:34<00:00, 1.15MiB/s] + /tmp/ipykernel_2241734/2531017353.py:172: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. sd = torch.load(p_ckpt, map_location="cpu") @@ -473,30 +473,30 @@ on disk using ``ov.save_model`` in compressed to FP16 format. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:135: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:135: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:144: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unet_2d_condition.py:915: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unet_2d_condition.py:915: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:149: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:149: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:165: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:433: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:433: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in ["learned", "learned_range"]: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:440: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:440: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:479: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:479: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if t > 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:330: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/schedulers/scheduling_ddpm.py:330: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one @@ -676,17 +676,17 @@ Download results using download button .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sketch-to-image-pix2pix-turbo/gradio_helper.py:225: GradioDeprecationWarning: 'scale' value should be an integer. Using 0.4 will cause issues. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sketch-to-image-pix2pix-turbo/gradio_helper.py:225: GradioDeprecationWarning: 'scale' value should be an integer. Using 0.4 will cause issues. with gr.Column(elem_id="column_process", min_width=50, scale=0.4): - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/utils.py:776: UserWarning: Expected 1 arguments for function . at 0x7f22fbf5a550>, received 0. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/utils.py:776: UserWarning: Expected 1 arguments for function . at 0x7fafe0603c10>, received 0. warnings.warn( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/utils.py:780: UserWarning: Expected at least 1 arguments for function . at 0x7f22fbf5a550>, received 0. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/gradio/utils.py:780: UserWarning: Expected at least 1 arguments for function . at 0x7fafe0603c10>, received 0. warnings.warn( .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/sketch-to-image-pix2pix-turbo + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/sketch-to-image-pix2pix-turbo Running on local URL: http://127.0.0.1:7860 To create a public link, set `share=True` in `launch()`. diff --git a/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.jpg b/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.jpg index 9d8436d4e8894f..a054eb11c32455 100644 --- a/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.jpg +++ b/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c28978a7a34769c09d64918686fb69f0239eb9f6499e590a86af16ca1a416d4 -size 23636 +oid sha256:7f92cbd6bb14242b47d354389a04e3413c94c46d233e71b73e305bfb73085a10 +size 23649 diff --git a/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.png b/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.png index cacdc0c183ea23..336a9ae38fa096 100644 --- a/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.png +++ b/docs/notebooks/sketch-to-image-pix2pix-turbo-with-output_files/sketch-to-image-pix2pix-turbo-with-output_19_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e5acfcf35473541de444c0a2edbfec36423f37335fecf6844a179c65530a6b54 -size 303319 +oid sha256:179009716266de8c220bfe9b7b3d64410061f8ae8bf74a08305655c020cde76f +size 303164 diff --git a/docs/notebooks/sparsity-optimization-with-output.rst b/docs/notebooks/sparsity-optimization-with-output.rst index 8d3779621fb2ec..038a8db6aec1b1 100644 --- a/docs/notebooks/sparsity-optimization-with-output.rst +++ b/docs/notebooks/sparsity-optimization-with-output.rst @@ -82,8 +82,8 @@ Imports .. parsed-literal:: - 2024-11-22 05:06:26.947305: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 05:06:26.972806: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:31:08.167081: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:31:08.192294: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -202,7 +202,7 @@ as an example. It is recommended to tune based on your applications. [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 68.94 ms + [ INFO ] Read model took 72.79 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [?,?] @@ -213,7 +213,7 @@ as an example. It is recommended to tune based on your applications. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,64], 'attention_mask': [1,64], 'token_type_ids': [1,64] - [ INFO ] Reshape model took 28.06 ms + [ INFO ] Reshape model took 27.96 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,64] @@ -222,7 +222,7 @@ as an example. It is recommended to tune based on your applications. [ INFO ] Model outputs: [ INFO ] logits (node: logits) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 999.63 ms + [ INFO ] Compile model took 1082.12 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: torch_jit @@ -254,17 +254,17 @@ as an example. It is recommended to tune based on your applications. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests, limits: 60000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 27.20 ms + [ INFO ] First inference took 28.08 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] [ INFO ] Count: 9176 iterations - [ INFO ] Duration: 60047.45 ms + [ INFO ] Duration: 60033.51 ms [ INFO ] Latency: [ INFO ] Median: 25.83 ms - [ INFO ] Average: 25.91 ms - [ INFO ] Min: 24.30 ms - [ INFO ] Max: 37.67 ms - [ INFO ] Throughput: 152.81 FPS + [ INFO ] Average: 25.92 ms + [ INFO ] Min: 23.43 ms + [ INFO ] Max: 42.58 ms + [ INFO ] Throughput: 152.85 FPS Benchmark quantized sparse inference performance @@ -321,7 +321,7 @@ for which a layer will be enabled. [ WARNING ] Performance hint was not explicitly specified in command line. Device(CPU) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 71.97 ms + [ INFO ] Read model took 75.90 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [?,?] @@ -332,7 +332,7 @@ for which a layer will be enabled. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: 'input_ids': [1,64], 'attention_mask': [1,64], 'token_type_ids': [1,64] - [ INFO ] Reshape model took 28.33 ms + [ INFO ] Reshape model took 28.30 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_ids (node: input_ids) : i64 / [...] / [1,64] @@ -341,7 +341,7 @@ for which a layer will be enabled. [ INFO ] Model outputs: [ INFO ] logits (node: logits) : f32 / [...] / [1,2] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1001.30 ms + [ INFO ] Compile model took 1011.04 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: torch_jit @@ -373,17 +373,17 @@ for which a layer will be enabled. [ INFO ] Fill input 'token_type_ids' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests, limits: 60000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 28.02 ms + [ INFO ] First inference took 27.34 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 9216 iterations - [ INFO ] Duration: 60030.33 ms + [ INFO ] Count: 9152 iterations + [ INFO ] Duration: 60027.24 ms [ INFO ] Latency: - [ INFO ] Median: 25.92 ms - [ INFO ] Average: 25.94 ms - [ INFO ] Min: 23.04 ms - [ INFO ] Max: 31.17 ms - [ INFO ] Throughput: 153.52 FPS + [ INFO ] Median: 25.91 ms + [ INFO ] Average: 25.97 ms + [ INFO ] Min: 23.89 ms + [ INFO ] Max: 41.37 ms + [ INFO ] Throughput: 152.46 FPS When this might be helpful diff --git a/docs/notebooks/speculative-sampling-with-output.rst b/docs/notebooks/speculative-sampling-with-output.rst index 4d5656cb99645c..868fbe9beccf9e 100644 --- a/docs/notebooks/speculative-sampling-with-output.rst +++ b/docs/notebooks/speculative-sampling-with-output.rst @@ -214,7 +214,23 @@ generation is finished, we will write streamer function. pipe = ov_genai.LLMPipeline(target_model_path, device.value) config = ov_genai.GenerationConfig() - config.max_new_tokens = 100 + config.max_new_tokens = 330 + prompt = ''' + + def prime_fib(n: int): + """ + prime_fib returns n-th number that is a Fibonacci number and it's also prime. + >>> prime_fib(1) + 2 + >>> prime_fib(2) + 3 + >>> prime_fib(3) + 5 + >>> prime_fib(4) + 13 + >>> prime_fib(5) + 89 + """''' def streamer(subword): @@ -225,7 +241,7 @@ generation is finished, we will write streamer function. start_time = time.perf_counter() - pipe.generate(["Sun is yellow because"], config, streamer=streamer) + pipe.generate(prompt, config, streamer=streamer) end_time = time.perf_counter() @@ -239,7 +255,7 @@ generation is finished, we will write streamer function. print(f"Generation time: {end_time - start_time:.2f}s") del pipe - gc.collect(); + gc.collect() .. parsed-literal:: @@ -282,17 +298,19 @@ stops the current token generation iteration is not yet reached. scheduler_config = ov_genai.SchedulerConfig() # cache params - scheduler_config.cache_size = 2 + scheduler_config.cache_size = 0 + scheduler_config.num_kv_blocks = 2048 // 8 + scheduler_config.max_num_batched_tokens = 2048 draft_model = ov_genai.draft_model(draft_model_path, device.value) pipe = ov_genai.LLMPipeline(target_model_path, device.value, draft_model=draft_model, scheduler_config=scheduler_config) config = ov_genai.GenerationConfig() - config.max_new_tokens = 100 - config.num_assistant_tokens = 3 + config.max_new_tokens = 330 + config.num_assistant_tokens = 5 start_time = time.perf_counter() - result = pipe.generate(["Sun is yellow because"], config, streamer=streamer) + result = pipe.generate(prompt, config, streamer=streamer) end_time = time.perf_counter() diff --git a/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst b/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst index 0b9b8db99880b6..27fad907b62fd6 100644 --- a/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst +++ b/docs/notebooks/speech-recognition-quantization-wav2vec2-with-output.rst @@ -57,47 +57,47 @@ Guide =0.11.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.5.2) - Requirement already satisfied: torch>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) - Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.16.1) - Requirement already satisfied: numpy>=1.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (1.24.4) - Requirement already satisfied: pyarrow>=15.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (17.0.0) - Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.3.8) - Requirement already satisfied: pandas in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.0.3) - Requirement already satisfied: requests>=2.32.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.32.3) - Requirement already satisfied: tqdm>=4.66.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (4.67.0) - Requirement already satisfied: xxhash in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.5.0) - Requirement already satisfied: multiprocess in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.70.16) - Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets) (2024.6.1) - Requirement already satisfied: aiohttp in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.10.11) - Requirement already satisfied: huggingface-hub>=0.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.25.2) - Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (24.2) - Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (6.0.2) - Requirement already satisfied: lightning-utilities>=0.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (0.11.9) - Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (4.12.2) - Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (1.13.3) - Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1) - Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1.4) - Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (2.4.3) - Requirement already satisfied: aiosignal>=1.1.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.3.1) - Requirement already satisfied: attrs>=17.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (24.2.0) - Requirement already satisfied: frozenlist>=1.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.5.0) - Requirement already satisfied: multidict<7.0,>=4.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (6.1.0) - Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.15.2) - Requirement already satisfied: async-timeout<6.0,>=4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (5.0.1) - Requirement already satisfied: setuptools in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from lightning-utilities>=0.8.0->torchmetrics>=0.11.0) (75.3.0) - Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.4.0) - Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.10) - Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2.2.3) - Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2024.8.30) - Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1.0) (2.1.5) - Requirement already satisfied: python-dateutil>=2.8.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2.9.0.post0) - Requirement already satisfied: pytz>=2020.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) - Requirement already satisfied: tzdata>=2022.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) - Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1.0) (1.3.0) - Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0) - Requirement already satisfied: propcache>=0.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0) + Requirement already satisfied: datasets in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (3.0.0) + Requirement already satisfied: torchmetrics>=0.11.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (1.5.2) + Requirement already satisfied: torch>=2.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (2.4.1+cpu) + Requirement already satisfied: filelock in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.16.1) + Requirement already satisfied: numpy>=1.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (1.24.4) + Requirement already satisfied: pyarrow>=15.0.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (17.0.0) + Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.3.8) + Requirement already satisfied: pandas in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.0.3) + Requirement already satisfied: requests>=2.32.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (2.32.3) + Requirement already satisfied: tqdm>=4.66.3 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (4.67.1) + Requirement already satisfied: xxhash in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.5.0) + Requirement already satisfied: multiprocess in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.70.16) + Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets) (2024.6.1) + Requirement already satisfied: aiohttp in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (3.10.11) + Requirement already satisfied: huggingface-hub>=0.22.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (0.25.2) + Requirement already satisfied: packaging in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (24.2) + Requirement already satisfied: pyyaml>=5.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from datasets) (6.0.2) + Requirement already satisfied: lightning-utilities>=0.8.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (0.11.9) + Requirement already satisfied: typing-extensions in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torchmetrics>=0.11.0) (4.12.2) + Requirement already satisfied: sympy in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (1.13.3) + Requirement already satisfied: networkx in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1) + Requirement already satisfied: jinja2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from torch>=2.1.0) (3.1.4) + Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (2.4.4) + Requirement already satisfied: aiosignal>=1.1.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.3.1) + Requirement already satisfied: attrs>=17.3.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (24.2.0) + Requirement already satisfied: frozenlist>=1.1.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.5.0) + Requirement already satisfied: multidict<7.0,>=4.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (6.1.0) + Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (1.15.2) + Requirement already satisfied: async-timeout<6.0,>=4.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from aiohttp->datasets) (5.0.1) + Requirement already satisfied: setuptools in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from lightning-utilities>=0.8.0->torchmetrics>=0.11.0) (75.3.0) + Requirement already satisfied: charset-normalizer<4,>=2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.4.0) + Requirement already satisfied: idna<4,>=2.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (3.10) + Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2.2.3) + Requirement already satisfied: certifi>=2017.4.17 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from requests>=2.32.2->datasets) (2024.8.30) + Requirement already satisfied: MarkupSafe>=2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from jinja2->torch>=2.1.0) (2.1.5) + Requirement already satisfied: python-dateutil>=2.8.2 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2.9.0.post0) + Requirement already satisfied: pytz>=2020.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) + Requirement already satisfied: tzdata>=2022.1 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from pandas->datasets) (2024.2) + Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from sympy->torch>=2.1.0) (1.3.0) + Requirement already satisfied: six>=1.5 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0) + Requirement already satisfied: propcache>=0.2.0 in /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages (from yarl<2.0,>=1.12.0->aiohttp->datasets) (0.2.0) Note: you may need to restart the kernel to use updated packages. Note: you may need to restart the kernel to use updated packages. @@ -119,8 +119,8 @@ Imports .. parsed-literal:: - 2024-11-22 05:08:52.722966: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 05:08:52.748262: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:33:33.150578: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:33:33.175323: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -177,10 +177,10 @@ IR). .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:872: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:872: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): @@ -507,7 +507,7 @@ quantized model. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:62: FutureWarning: Importing `WordErrorRate` from `torchmetrics` was deprecated and will be removed in 2.0. Import `WordErrorRate` from `torchmetrics.text` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:62: FutureWarning: Importing `WordErrorRate` from `torchmetrics` was deprecated and will be removed in 2.0. Import `WordErrorRate` from `torchmetrics.text` instead. _future_warning( @@ -577,7 +577,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 18.23 ms + [ INFO ] Read model took 17.68 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_values , 45 (node: input_values) : f32 / [...] / [?,?] @@ -586,14 +586,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: '45': [1,30480] - [ INFO ] Reshape model took 4.39 ms + [ INFO ] Reshape model took 4.15 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_values , 45 (node: input_values) : f32 / [...] / [1,30480] [ INFO ] Model outputs: [ INFO ] logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [1,95,32] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 493.97 ms + [ INFO ] Compile model took 492.85 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -630,17 +630,17 @@ models. [ INFO ] Fill input '45' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 69.21 ms + [ INFO ] First inference took 70.68 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 5430 iterations - [ INFO ] Duration: 120128.75 ms + [ INFO ] Count: 5424 iterations + [ INFO ] Duration: 120129.30 ms [ INFO ] Latency: - [ INFO ] Median: 130.74 ms - [ INFO ] Average: 132.58 ms - [ INFO ] Min: 66.32 ms - [ INFO ] Max: 307.29 ms - [ INFO ] Throughput: 45.20 FPS + [ INFO ] Median: 130.69 ms + [ INFO ] Average: 132.71 ms + [ INFO ] Min: 66.95 ms + [ INFO ] Max: 336.57 ms + [ INFO ] Throughput: 45.15 FPS .. code:: ipython3 @@ -667,7 +667,7 @@ models. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 25.21 ms + [ INFO ] Read model took 24.12 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] input_values , 45 (node: input_values) : f32 / [...] / [?,?] @@ -676,14 +676,14 @@ models. [Step 5/11] Resizing model to match image sizes and given batch [ INFO ] Model batch size: 1 [ INFO ] Reshaping model: '45': [1,30480] - [ INFO ] Reshape model took 6.04 ms + [ INFO ] Reshape model took 6.07 ms [Step 6/11] Configuring input of the model [ INFO ] Model inputs: [ INFO ] input_values , 45 (node: input_values) : f32 / [...] / [1,30480] [ INFO ] Model outputs: [ INFO ] logits (node: __module.lm_head/aten::linear/Add) : f32 / [...] / [1,95,32] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 1188.53 ms + [ INFO ] Compile model took 1216.49 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: Model0 @@ -720,15 +720,15 @@ models. [ INFO ] Fill input '45' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 120000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 55.48 ms + [ INFO ] First inference took 54.72 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 8046 iterations - [ INFO ] Duration: 120134.14 ms + [ INFO ] Count: 8016 iterations + [ INFO ] Duration: 120075.42 ms [ INFO ] Latency: - [ INFO ] Median: 88.11 ms - [ INFO ] Average: 89.43 ms - [ INFO ] Min: 71.74 ms - [ INFO ] Max: 270.18 ms - [ INFO ] Throughput: 66.98 FPS + [ INFO ] Median: 88.25 ms + [ INFO ] Average: 89.73 ms + [ INFO ] Min: 39.47 ms + [ INFO ] Max: 249.83 ms + [ INFO ] Throughput: 66.76 FPS diff --git a/docs/notebooks/speechbrain-emotion-recognition-with-output.rst b/docs/notebooks/speechbrain-emotion-recognition-with-output.rst index 23857ad92d4fa2..0f2b2a55f67169 100644 --- a/docs/notebooks/speechbrain-emotion-recognition-with-output.rst +++ b/docs/notebooks/speechbrain-emotion-recognition-with-output.rst @@ -63,9 +63,9 @@ Installations detectron2 0.6 requires iopath<0.1.10,>=0.1.7, but you have iopath 0.1.10 which is incompatible. mobileclip 0.1.0 requires torchvision==0.14.1, but you have torchvision 0.19.1+cpu which is incompatible. modelscope-studio 0.5.2 requires gradio<6.0,>=4.0, but you have gradio 3.43.1 which is incompatible. - parler-tts 0.2.1 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. - parler-tts 0.2.1 requires transformers<=4.46.1,>=4.46.1, but you have transformers 4.46.3 which is incompatible. - pydantic 2.10.0 requires typing-extensions>=4.12.2, but you have typing-extensions 4.9.0 which is incompatible. + parler-tts 0.2.2 requires protobuf>=4.0.0, but you have protobuf 3.20.3 which is incompatible. + parler-tts 0.2.2 requires transformers<=4.46.1,>=4.46.1, but you have transformers 4.46.3 which is incompatible. + pydantic 2.10.3 requires typing-extensions>=4.12.2, but you have typing-extensions 4.9.0 which is incompatible. tensorflow 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.13.1 which is incompatible. tensorflow 2.12.0 requires numpy<1.24,>=1.22, but you have numpy 1.24.4 which is incompatible. tensorflow 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 2.13.0 which is incompatible. @@ -95,7 +95,7 @@ Imports .. parsed-literal:: - INFO:speechbrain.utils.quirks:Applied quirks (see `speechbrain.utils.quirks`): [allow_tf32, disable_jit_profiling] + INFO:speechbrain.utils.quirks:Applied quirks (see `speechbrain.utils.quirks`): [disable_jit_profiling, allow_tf32] INFO:speechbrain.utils.quirks:Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): [] @@ -135,8 +135,8 @@ SpeechBrain codebase. INFO:speechbrain.utils.fetching:Fetch hyperparams.yaml: Fetching from HuggingFace Hub 'speechbrain/emotion-recognition-wav2vec2-IEMOCAP' if not cached INFO:speechbrain.utils.fetching:Fetch custom_interface.py: Fetching from HuggingFace Hub 'speechbrain/emotion-recognition-wav2vec2-IEMOCAP' if not cached - 2024-11-22 05:15:27.494190: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 05:15:27.518517: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:40:05.072169: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:40:05.097896: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -148,7 +148,7 @@ SpeechBrain codebase. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/configuration_utils.py:306: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/configuration_utils.py:306: UserWarning: Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 Transformers. Using `model.gradient_checkpointing_enable()` instead, or if you are using the `Trainer` API, pass `gradient_checkpointing=True` in your `TrainingArguments`. warnings.warn( @@ -175,7 +175,7 @@ SpeechBrain codebase. INFO:speechbrain.utils.fetching:Fetch model.ckpt: Fetching from HuggingFace Hub 'speechbrain/emotion-recognition-wav2vec2-IEMOCAP' if not cached INFO:speechbrain.utils.fetching:Fetch label_encoder.txt: Fetching from HuggingFace Hub 'speechbrain/emotion-recognition-wav2vec2-IEMOCAP' if not cached INFO:speechbrain.utils.parameter_transfer:Loading pretrained files for: wav2vec2, model, label_encoder - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/speechbrain/utils/checkpoints.py:200: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/speechbrain/utils/checkpoints.py:200: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. state_dict = torch.load(path, map_location=device) @@ -263,13 +263,19 @@ Step 2: Convert model to OpenVINO IR .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:872: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:872: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim): + +.. parsed-literal:: + + model.safetensors: 0%| | 0.00/380M [00:00 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: @@ -306,7 +306,7 @@ here, we always use fixed shapes in conversion by using an .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_stable_cascade.py:548: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_stable_cascade.py:548: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if skip is not None and (x.size(-1) != skip.size(-1) or x.size(-2) != skip.size(-2)): diff --git a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg index c26f6d2e4e6256..a09f1e5356f98d 100644 --- a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg +++ b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8f1d0c9a1548ea1728d293d5e9864b85f3f438666fb647d5d98ce4a08bd9d494 -size 81141 +oid sha256:a7c11f26f9dc1eb3286c357bb147d12c812786f1796a8b62a5012075afe6de12 +size 83987 diff --git a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png index 8d36ff65c9eca3..eece770ac13fce 100644 --- a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png +++ b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_29_2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a807ec75efd8572779d2c5de64bec882d23a29b52449e0a2df13fb67b527beae -size 1575960 +oid sha256:a32e4037dd5a34d227f3ef5a892121797617a3becd465227678a6ef6d7f8a090 +size 1608106 diff --git a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.jpg b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.jpg new file mode 100644 index 00000000000000..57b41a7f8d9bbe --- /dev/null +++ b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb107211ea8c2d5b7f376c7896193df1b1b5c4b1ba4014e10734d5401848fada +size 92085 diff --git a/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.png b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.png new file mode 100644 index 00000000000000..e718da40df51ae --- /dev/null +++ b/docs/notebooks/stable-cascade-image-generation-with-output_files/stable-cascade-image-generation-with-output_8_2.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b04c8ee319ce2d23bd4cf76666af100b2852bb0dd4ba558d978698f871f581 +size 1591012 diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst b/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst index 7f23c866161568..d5cbb62354f4fc 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output.rst @@ -193,8 +193,8 @@ Additionally, LCM requires using LCMScheduler for efficient generation. .. parsed-literal:: - 2024-11-22 05:28:32.243878: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. - 2024-11-22 05:28:32.268737: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. + 2024-12-10 05:53:08.894939: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. + 2024-12-10 05:53:08.920444: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. @@ -206,7 +206,7 @@ Additionally, LCM requires using LCMScheduler for efficient generation. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/feature_extraction_clip.py:28: FutureWarning: The class CLIPFeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use CLIPImageProcessor instead. warnings.warn( @@ -288,10 +288,10 @@ extractor as input and returns image embeddings. .. parsed-literal:: [ WARNING ] Please fix your imports. Module %s has been moved to %s. The old module will be deleted in version %s. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_utils.py:5006: FutureWarning: `_is_quantized_training_enabled` is going to be deprecated in transformers 4.39.0. Please use `model.hf_quantizer.is_trainable` instead warnings.warn( `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`. - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/models/clip/modeling_clip.py:243: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if not interpolate_pos_encoding and (height != self.image_size or width != self.image_size): @@ -353,17 +353,17 @@ Model predicts the ``sample`` state for the next step. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/unets/unet_2d_condition.py:1111: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if dim % default_overall_up_factor != 0: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:1801: FutureWarning: You have passed a tensor as `image_embeds`.This is deprecated and will be removed in a future release. Please make sure to update your script to pass `image_embeds` as a list of tensors to suppress this warning. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/embeddings.py:1801: FutureWarning: You have passed a tensor as `image_embeds`.This is deprecated and will be removed in a future release. Please make sure to update your script to pass `image_embeds` as a list of tensors to suppress this warning. deprecate("image_embeds not a list", "1.0.0", deprecation_message, standard_warn=False) - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:136: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/downsampling.py:145: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:147: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! assert hidden_states.shape[1] == self.channels - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/models/upsampling.py:162: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if hidden_states.shape[0] >= 64: @@ -441,16 +441,16 @@ image in pipeline, we can discuss it in inference examples. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: - %2506 : Float(1, 4, 64, 64, strides=[16384, 4096, 64, 1], requires_grad=0, device=cpu) = aten::randn(%2500, %2501, %2502, %2503, %2504, %2505) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/torch_utils.py:81:0 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Trace had nondeterministic nodes. Did you forget call .eval() on your model? Nodes: + %2506 : Float(1, 4, 64, 64, strides=[16384, 4096, 64, 1], requires_grad=0, device=cpu) = aten::randn(%2500, %2501, %2502, %2503, %2504, %2505) # /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/diffusers/utils/torch_utils.py:81:0 This may cause errors in trace checking. To disable trace checking, pass check_trace=False to torch.jit.trace() _check_trace( - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/torch/jit/_trace.py:1303: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: Tensor-likes are not close! - Mismatched elements: 10391 / 16384 (63.4%) - Greatest absolute difference: 0.000982522964477539 at index (0, 1, 0, 60) (up to 1e-05 allowed) - Greatest relative difference: 0.014704568038430557 at index (0, 3, 63, 59) (up to 1e-05 allowed) + Mismatched elements: 10463 / 16384 (63.9%) + Greatest absolute difference: 0.001137852668762207 at index (0, 2, 0, 6) (up to 1e-05 allowed) + Greatest relative difference: 0.006470232386295268 at index (0, 3, 63, 59) (up to 1e-05 allowed) _check_trace( @@ -496,9 +496,9 @@ hidden states. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:88: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if input_shape[-1] > 1 or self.sliding_window is not None: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/.venv/lib/python3.8/site-packages/transformers/modeling_attn_mask_utils.py:164: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! if past_key_values_length > 0: diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png index c5cde5597bba55..475b4dd8ea40b4 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_22_1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:541f65736f11c59692c577b1d85c0f6b9ab6ab51e8a9fdf6abf15063d06e2036 -size 965452 +oid sha256:f41e9dd669351422cfb30a6a5458431b699453f0934b43e199a0d4684dd4da85 +size 975310 diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png index 61d61f6001a527..ba0e885cf44c5a 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_25_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6162787bd52816b379097b9ec5284c2b65dc1178be5be7936240895f9de5285b -size 956477 +oid sha256:c5f0746a06f6d81be16e808107174009b68510b2e826885fe3f78021079b2a12 +size 945107 diff --git a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png index 937356ce2c1a55..baae1d818321e1 100644 --- a/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png +++ b/docs/notebooks/stable-diffusion-ip-adapter-with-output_files/stable-diffusion-ip-adapter-with-output_28_0.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9fe298508ac791d7d0901af522504878a7fd98675bad80cfd4b60cafd2a49c0 -size 592390 +oid sha256:76b9fd3519e90a6fa4b39a5749633ffc0031a5141f3698920b724205b304e9f3 +size 595645 diff --git a/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst b/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst index e03a4ab614c769..a50a47392fb029 100644 --- a/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst +++ b/docs/notebooks/stable-diffusion-torchdynamo-backend-with-output.rst @@ -114,15 +114,18 @@ script. It speeds up PyTorch code by JIT-compiling it into optimized kernels. By default, Torch code runs in eager-mode, but with the use of torch.compile it goes through the following steps: -1. Graph acquisition - the model is rewritten as blocks of subgraphs that are either: +1. Graph acquisition + - the model is rewritten as blocks of subgraphs that are either: - compiled by TorchDynamo and “flattened”, - - falling back to the eager-mode, due to unsupported Python constructs (like control-flow + - falling back to the + eager-mode, due to unsupported Python constructs (like control-flow code). 2. Graph lowering - all PyTorch operations are decomposed into their constituent kernels specific to the chosen backend. -3. Graph compilation - the kernels call their corresponding low-level +3. Graph + compilation - the kernels call their corresponding low-level device-specific operations. Select device for inference and enable or disable saving the optimized diff --git a/docs/notebooks/stable-diffusion-v3-torch-fx-with-output.rst b/docs/notebooks/stable-diffusion-v3-torch-fx-with-output.rst new file mode 100644 index 00000000000000..2eee517599af7c --- /dev/null +++ b/docs/notebooks/stable-diffusion-v3-torch-fx-with-output.rst @@ -0,0 +1,562 @@ +Image generation with Torch.FX Stable Diffusion v3 and OpenVINO +=============================================================== + +Stable Diffusion V3 is next generation of latent diffusion image Stable +Diffusion models family that outperforms state-of-the-art text-to-image +generation systems in typography and prompt adherence, based on human +preference evaluations. In comparison with previous versions, it based +on Multimodal Diffusion Transformer (MMDiT) text-to-image model that +features greatly improved performance in image quality, typography, +complex prompt understanding, and resource-efficiency. + +.. figure:: https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/dd079427-89f2-4d28-a10e-c80792d750bf + :alt: mmdit.png + + mmdit.png + +More details about model can be found in `model +card `__, +`research +paper `__ +and `Stability.AI blog +post `__. In this +tutorial, we will demonstrate the optimize stable diffusion 3 in a Torch +FX representation using NNCF +`NNCF `__ for model +optimization. Additionally, we will accelerate the pipeline further by +running with torch.compile using the openvino backend. If you want to +run previous Stable Diffusion versions, please check our other +notebooks: + +- `Stable Diffusion `__ +- `Stable Diffusion v2 `__ +- `Stable Diffusion v3 `__ +- `Stable Diffusion XL `__ +- `LCM Stable + Diffusion `__ +- `Turbo SDXL `__ +- `Turbo SD `__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + + +**Table of contents:** + + +- `Prerequisites <#prerequisites>`__ +- `Build PyTorch pipeline <#build-pytorch-pipeline>`__ + + - `Store the Configs <#store-the-configs>`__ + +- `Run FP Inference <#run-fp-inference>`__ +- `Convert models to Torch FX <#convert-models-to-torch-fx>`__ +- `Quantization <#quantization>`__ + + - `Collect Calibration Dataset <#collect-calibration-dataset>`__ + - `Compress and Quantize models <#compress-and-quantize-models>`__ + - `Create Optimized Pipeline <#create-optimized-pipeline>`__ + - `Check File Size <#check-file-size>`__ + - `Optimized pipeline inference <#optimized-pipeline-inference>`__ + - `Visualize Results <#visualize-results>`__ + +- `Interactive demo <#interactive-demo>`__ + +Prerequisites +------------- + + + +.. code:: ipython3 + + %pip install -q "gradio>=4.19" "torch>=2.5" "torchvision>=0.20" "numpy<2.0" "transformers" "datasets>=2.14.6" "opencv-python" "pillow" "peft>=0.7.0" "diffusers>=0.31.0" --extra-index-url https://download.pytorch.org/whl/cpu + %pip install -qU "openvino>=2024.3.0" + %pip install -q "nncf>=2.14.0" "typing_extensions>=4.11" + +.. code:: ipython3 + + from pathlib import Path + + import requests + + if not Path("sd3_torch_fx_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/stable-diffusion-v3/sd3_torch_fx_helper.py") + open("sd3_torch_fx_helper.py", "w").write(r.text) + + if not Path("gradio_helper.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/stable-diffusion-v3/gradio_helper.py") + open("gradio_helper.py", "w").write(r.text) + + if not Path("notebook_utils.py").exists(): + r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py") + open("notebook_utils.py", "w").write(r.text) + +Build PyTorch pipeline +---------------------- + + + + **Note**: run model with notebook, you will need to accept license + agreement. You must be a registered user in Hugging Face Hub. + Please visit `HuggingFace model + card `__, + carefully read terms of usage and click accept button. You will need + to use an access token for the code below to run. For more + information on access tokens, refer to `this section of the + documentation `__. + You can login on Hugging Face Hub in notebook environment, using + following code: + +.. code:: ipython3 + + # uncomment these lines to login to huggingfacehub to get access to pretrained model + + # from huggingface_hub import notebook_login, whoami + + # try: + # whoami() + # print('Authorization token already provided') + # except OSError: + # notebook_login() + +.. code:: ipython3 + + from sd3_torch_fx_helper import get_sd3_pipeline, init_pipeline + + pipe = get_sd3_pipeline() + pipe.to("cpu") + +Store the Configs +~~~~~~~~~~~~~~~~~ + + + +This will be used later when wrapping the Torch FX models to insert back +into the pipeline + +.. code:: ipython3 + + configs_dict = {} + configs_dict["text_encoder"] = pipe.text_encoder.config + configs_dict["text_encoder_2"] = pipe.text_encoder_2.config + configs_dict["transformer"] = pipe.transformer.config + configs_dict["vae"] = pipe.vae.config + + pipe_config = pipe.config + +Run FP Inference +---------------- + + + +.. code:: ipython3 + + import numpy as np + import torch + + generator = torch.Generator(device="cpu").manual_seed(42) + prompt = "A raccoon trapped inside a glass jar full of colorful candies, the background is steamy with vivid colors" + num_inference_steps = 28 + with torch.no_grad(): + image = pipe( + prompt=prompt, + negative_prompt="", + num_inference_steps=num_inference_steps, + generator=generator, + guidance_scale=5, + ).images[0] + image.resize( + ( + 512, + 512, + ) + ) + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget() + + device + +Convert models to Torch FX +-------------------------- + + + +This step converts the pytorch models in the hf pipeline to Torch FX +representation using the ``capture_pre_autograd()`` function. + +The pipeline consists of four important parts: + +- Clip and T5 Text Encoders to create condition to generate an image + from a text prompt. +- Transformer for step-by-step denoising latent image representation. +- Autoencoder (VAE) for decoding latent space to image. + +.. code:: ipython3 + + import torch + from nncf.torch.dynamic_graph.patch_pytorch import disable_patching + + text_encoder_input = torch.ones((1, 77), dtype=torch.long) + text_encoder_kwargs = {} + text_encoder_kwargs["output_hidden_states"] = True + + vae_encoder_input = torch.ones((1, 3, 128, 128)) + vae_decoder_input = torch.ones((1, 16, 128, 128)) + + unet_kwargs = {} + unet_kwargs["hidden_states"] = torch.ones((2, 16, 128, 128)) + unet_kwargs["timestep"] = torch.from_numpy(np.array([1, 2], dtype=np.float32)) + unet_kwargs["encoder_hidden_states"] = torch.ones((2, 154, 4096)) + unet_kwargs["pooled_projections"] = torch.ones((2, 2048)) + + with torch.no_grad(): + with disable_patching(): + text_encoder = torch.export.export_for_training( + pipe.text_encoder.eval(), + args=(text_encoder_input,), + kwargs=(text_encoder_kwargs), + ).module() + text_encoder_2 = torch.export.export_for_training( + pipe.text_encoder_2.eval(), + args=(text_encoder_input,), + kwargs=(text_encoder_kwargs), + ).module() + pipe.vae.decoder = torch.export.export_for_training(pipe.vae.decoder.eval(), args=(vae_decoder_input,)).module() + pipe.vae.encoder = torch.export.export_for_training(pipe.vae.encoder.eval(), args=(vae_encoder_input,)).module() + vae = pipe.vae + transformer = torch.export.export_for_training(pipe.transformer.eval(), args=(), kwargs=(unet_kwargs)).module() + models_dict = {} + models_dict["transformer"] = transformer + models_dict["vae"] = vae + models_dict["text_encoder"] = text_encoder + models_dict["text_encoder_2"] = text_encoder_2 + del unet_kwargs + del vae_encoder_input + del vae_decoder_input + del text_encoder_input + del text_encoder_kwargs + del pipe + +Quantization +------------ + + + +`NNCF `__ enables +post-training quantization by adding quantization layers into model +graph and then using a subset of the training dataset to initialize the +parameters of these additional quantization layers. Quantized operations +are executed in ``INT8`` instead of ``FP32``/``FP16`` making model +inference faster. + +According to ``StableDiffusion3Pipeline`` structure, the ``transformer`` +model takes up significant portion of the overall pipeline execution +time. Now we will show you how to optimize the transformer part using +`NNCF `__ to reduce +computation cost and speed up the pipeline. Quantizing the rest of the +pipeline does not significantly improve inference performance but can +lead to a substantial degradation of accuracy. That’s why we use 8-bit +weight compression for the rest of the pipeline to reduce memory +footprint. + +Please select below whether you would like to run quantization to +improve model inference speed. + + **NOTE**: Quantization is time and memory consuming operation. + Running quantization code below may take some time. + +.. code:: ipython3 + + from notebook_utils import quantization_widget + + to_quantize = quantization_widget() + + to_quantize + +Let’s load ``skip magic`` extension to skip quantization if +``to_quantize`` is not selected + +.. code:: ipython3 + + # Fetch `skip_kernel_extension` module + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/skip_kernel_extension.py", + ) + open("skip_kernel_extension.py", "w").write(r.text) + + %load_ext skip_kernel_extension + +Collect Calibration Dataset +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from typing import Any, Dict, List + + import datasets + from diffusers.models.transformers.transformer_sd3 import SD3Transformer2DModel + from tqdm.notebook import tqdm + + + def disable_progress_bar(pipeline, disable=True): + if not hasattr(pipeline, "_progress_bar_config"): + pipeline._progress_bar_config = {"disable": disable} + else: + pipeline._progress_bar_config["disable"] = disable + + + class UNetWrapper(SD3Transformer2DModel): + def __init__(self, transformer, config): + super().__init__(**config) + self.transformer = transformer + self.captured_args = [] + + def forward(self, *args, **kwargs): + del kwargs["joint_attention_kwargs"] + del kwargs["return_dict"] + self.captured_args.append((*args, *tuple(kwargs.values()))) + return self.transformer(*args, **kwargs) + + + def collect_calibration_data( + pipe, calibration_dataset_size: int, num_inference_steps: int + ) -> List[Dict]: + + original_unet = pipe.transformer + calibration_data = [] + disable_progress_bar(pipe) + + dataset = datasets.load_dataset( + "google-research-datasets/conceptual_captions", + split="train", + trust_remote_code=True, + ).shuffle(seed=42) + + transformer_config = dict(pipe.transformer.config) + del transformer_config["model"] + wrapped_unet = UNetWrapper(pipe.transformer.model, transformer_config) + pipe.transformer = wrapped_unet + # Run inference for data collection + pbar = tqdm(total=calibration_dataset_size) + for i, batch in enumerate(dataset): + prompt = batch["caption"] + if len(prompt) > pipe.tokenizer.model_max_length: + continue + # Run the pipeline + pipe(prompt, num_inference_steps=num_inference_steps) + calibration_data.extend(wrapped_unet.captured_args) + wrapped_unet.captured_args = [] + pbar.update(len(calibration_data) - pbar.n) + if pbar.n >= calibration_dataset_size: + break + + disable_progress_bar(pipe, disable=False) + pipe.transformer = original_unet + return calibration_data + + + if to_quantize: + pipe = init_pipeline(models_dict, configs_dict) + calibration_dataset_size = 300 + unet_calibration_data = collect_calibration_data( + pipe, calibration_dataset_size=calibration_dataset_size, num_inference_steps=28 + ) + del pipe + +Compress and Quantize models +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + %%skip not $to_quantize.value + + import nncf + from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters + from nncf.quantization.range_estimator import RangeEstimatorParametersSet + + text_encoder = models_dict["text_encoder"] + text_encoder_2 = models_dict["text_encoder_2"] + vae_encoder = models_dict["vae"].encoder + vae_decoder = models_dict["vae"].decoder + original_transformer = models_dict["transformer"] + if to_quantize: + with disable_patching(): + with torch.no_grad(): + nncf.compress_weights(text_encoder) + nncf.compress_weights(text_encoder_2) + nncf.compress_weights(vae_encoder) + nncf.compress_weights(vae_decoder) + quantized_transformer = nncf.quantize( + model=original_transformer, + calibration_dataset=nncf.Dataset(unet_calibration_data), + subset_size=len(unet_calibration_data), + model_type=nncf.ModelType.TRANSFORMER, + ignored_scope=nncf.IgnoredScope(names=["conv2d"]), + advanced_parameters=nncf.AdvancedQuantizationParameters( + weights_range_estimator_params=RangeEstimatorParametersSet.MINMAX, + activations_range_estimator_params=RangeEstimatorParametersSet.MINMAX, + ), + ) + + optimized_models_dict = {} + optimized_models_dict["transformer"] = quantized_transformer + optimized_models_dict["vae"] = vae + optimized_models_dict["text_encoder"] = text_encoder + optimized_models_dict["text_encoder_2"] = text_encoder_2 + del models_dict + +.. code:: ipython3 + + %%skip not $to_quantize.value + import openvino.torch + + optimized_models_dict["text_encoder"] = torch.compile( + optimized_models_dict["text_encoder"], backend="openvino" + ) + optimized_models_dict["text_encoder_2"] = torch.compile( + optimized_models_dict["text_encoder_2"], backend="openvino" + ) + optimized_models_dict["vae"].encoder = torch.compile( + optimized_models_dict["vae"].encoder, backend="openvino" + ) + optimized_models_dict["vae"].decoder = torch.compile( + optimized_models_dict["vae"].decoder, backend="openvino" + ) + optimized_models_dict["transformer"] = torch.compile( + optimized_models_dict["transformer"], backend="openvino" + ) + +Create Optimized Pipeline +~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Initialize the optimized pipeline using the optimized models + +.. code:: ipython3 + + %%skip not $to_quantize.value + + opt_pipe = init_pipeline(optimized_models_dict, configs_dict) + +Check File Size +~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + %%skip not $to_quantize.value + + + def get_model_size(models): + total_size = 0 + for model in models: + param_size = 0 + for param in model.parameters(): + param_size += param.nelement() * param.element_size() + buffer_size = 0 + for buffer in model.buffers(): + buffer_size += buffer.nelement() * buffer.element_size() + + model_size_mb = (param_size + buffer_size) / 1024**2 + + total_size += model_size_mb + return total_size + + + optimized_model_size = get_model_size([opt_pipe.transformer]) + original_model_size = get_model_size([original_transformer]) + + print(f"Original Transformer Size: {original_model_size} MB") + print(f"Optimized Transformer Size: {optimized_model_size} MB") + print(f"Compression Rate: {original_model_size / optimized_model_size:.3f}") + +Optimized pipeline inference +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Run inference with single step to compile the model. + +.. code:: ipython3 + + %%skip not $to_quantize.value + + # Warmup the model for initial compile + with torch.no_grad(): + image = opt_pipe( + prompt=prompt, negative_prompt="", num_inference_steps=1, generator=generator + ).images[0] + +Visualize Results +~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + %%skip not $to_quantize.value + + from sd3_torch_fx_helper import visualize_results + + generator = torch.Generator(device="cpu").manual_seed(42) + opt_image = opt_pipe( + prompt, + negative_prompt="", + num_inference_steps=28, + guidance_scale=5, + generator=generator, + ).images[0] + + visualize_results(image, opt_image) + +Interactive demo +---------------- + + + +Please select below whether you would like to use the quantized models +to launch the interactive demo. + +.. code:: ipython3 + + use_quantized_models = quantization_widget() + + use_quantized_models + +.. code:: ipython3 + + from gradio_helper import make_demo + + fx_pipe = init_pipeline(models_dict if not to_quantize.value else optimized_models_dict, configs_dict) + demo = make_demo(fx_pipe, False) + + # if you are launching remotely, specify server_name and server_port + # demo.launch(server_name='your server name', server_port='server port in int') + # if you have any issue to launch on your platform, you can pass share=True to launch method: + # demo.launch(share=True) + # it creates a publicly shareable link for the interface. Read more in the docs: https://gradio.app/docs/ + try: + demo.launch(debug=True) + except Exception: + demo.launch(debug=True, share=True) diff --git a/docs/notebooks/stable-diffusion-xl-with-output.rst b/docs/notebooks/stable-diffusion-xl-with-output.rst index 54a43191c229a4..7ec1c0c81eeb20 100644 --- a/docs/notebooks/stable-diffusion-xl-with-output.rst +++ b/docs/notebooks/stable-diffusion-xl-with-output.rst @@ -100,9 +100,9 @@ Install prerequisites .. code:: ipython3 - # %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" "diffusers>=0.24.0" "invisible-watermark>=0.2.0" "transformers>=4.33.0" "accelerate" "onnx!=1.16.2" "peft>=0.6.2" - # %pip install -q "git+https://github.com/huggingface/optimum-intel.git" - # %pip install -q "openvino>=2023.1.0" "gradio>=4.19" "nncf>=2.9.0" + %pip install -q --extra-index-url https://download.pytorch.org/whl/cpu "torch>=2.1" "torchvision" "diffusers>=0.24.0" "invisible-watermark>=0.2.0" "transformers>=4.33.0" "accelerate" "onnx!=1.16.2" "peft>=0.6.2" + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" + %pip install -q "openvino>=2023.1.0" "gradio>=4.19" "nncf>=2.9.0" SDXL Base model --------------- diff --git a/docs/notebooks/style-transfer-with-output.rst b/docs/notebooks/style-transfer-with-output.rst index b123ca215cbbfc..c228604aee32f9 100644 --- a/docs/notebooks/style-transfer-with-output.rst +++ b/docs/notebooks/style-transfer-with-output.rst @@ -96,7 +96,7 @@ Install requirements .. parsed-literal:: - 24717 + 24624 @@ -186,14 +186,14 @@ OpenVINO Intermediate Representation (IR) with ``FP16`` precision. .. parsed-literal:: - model/mosaic-9.onnx: 0%| | 0.00/6.42M [00:00`__. It uses +`BiT-M-R50x1/1 `__ +model, which is trained on ImageNet-21k. Big Transfer is a recipe for +pre-training image classification models on large supervised datasets +and efficiently fine-tuning them on any given target task. The recipe +achieves excellent performance on a wide variety of tasks, even when +using very few labeled examples from the target dataset. This tutorial +uses OpenVINO backend for performing model quantization in NNCF. + + +**Table of contents:** + + +- `Prepare Dataset <#prepare-dataset>`__ +- `Plotting data samples <#plotting-data-samples>`__ +- `Model Fine-tuning <#model-fine-tuning>`__ +- `Perform model optimization (IR) + step <#perform-model-optimization-ir-step>`__ +- `Compute accuracy of the TF + model <#compute-accuracy-of-the-tf-model>`__ +- `Compute accuracy of the OpenVINO + model <#compute-accuracy-of-the-openvino-model>`__ +- `Quantize OpenVINO model using + NNCF <#quantize-openvino-model-using-nncf>`__ +- `Compute accuracy of the quantized + model <#compute-accuracy-of-the-quantized-model>`__ +- `Compare FP32 and INT8 accuracy <#compare-fp32-and-int8-accuracy>`__ +- `Compare inference results on one + picture <#compare-inference-results-on-one-picture>`__ + +Installation Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is a self-contained example that relies solely on its own code. + +We recommend running the notebook in a virtual environment. You only +need a Jupyter server to start. For details, please refer to +`Installation +Guide `__. + +.. code:: ipython3 + + import platform + + %pip install -q "tensorflow-macos>=2.5; sys_platform == 'darwin' and platform_machine == 'arm64' and python_version > '3.8'" # macOS M1 and M2 + %pip install -q "tensorflow>=2.5; sys_platform == 'darwin' and platform_machine != 'arm64' and python_version > '3.8'" # macOS x86 + %pip install -q "tensorflow>=2.5; sys_platform != 'darwin' and python_version > '3.8'" + + %pip install -q "openvino>=2024.0.0" "nncf>=2.7.0" "tensorflow-hub>=0.15.0" tf_keras + %pip install -q "scikit-learn>=1.3.2" + + if platform.system() != "Windows": + %pip install -q "matplotlib>=3.4" "tensorflow_datasets>=4.9.0" + else: + %pip install -q "matplotlib>=3.4" "tensorflow_datasets>=4.9.0,<4.9.3" + +.. code:: ipython3 + + import os + import numpy as np + from pathlib import Path + + from openvino.runtime import Core + import openvino as ov + import nncf + import logging + + from nncf.common.logging.logger import set_log_level + + set_log_level(logging.ERROR) + + from sklearn.metrics import accuracy_score + + os.environ["TF_USE_LEGACY_KERAS"] = "1" + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" + os.environ["TFHUB_CACHE_DIR"] = str(Path("./tfhub_modules").resolve()) + + import tensorflow as tf + import tensorflow_datasets as tfds + import tensorflow_hub as hub + + tfds.core.utils.gcs_utils._is_gcs_disabled = True + os.environ["NO_GCE_CHECK"] = "true" + + import requests + + r = requests.get( + url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", + ) + open("notebook_utils.py", "w").write(r.text) + +.. code:: ipython3 + + core = Core() + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + + + # For top 5 labels. + MAX_PREDS = 1 + TRAINING_BATCH_SIZE = 128 + BATCH_SIZE = 1 + IMG_SIZE = (256, 256) # Default Imagenet image size + NUM_CLASSES = 10 # For Imagenette dataset + FINE_TUNING_STEPS = 1 + LR = 1e-5 + + MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255) # From Imagenet dataset + STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255) # From Imagenet dataset + +Prepare Dataset +~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + datasets, datasets_info = tfds.load( + "imagenette/160px", + shuffle_files=True, + as_supervised=True, + with_info=True, + read_config=tfds.ReadConfig(shuffle_seed=0), + ) + train_ds, validation_ds = datasets["train"], datasets["validation"] + +.. code:: ipython3 + + def preprocessing(image, label): + image = tf.image.resize(image, IMG_SIZE) + image = tf.cast(image, tf.float32) / 255.0 + label = tf.one_hot(label, NUM_CLASSES) + return image, label + + + train_dataset = train_ds.map(preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(TRAINING_BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE) + validation_dataset = ( + validation_ds.map(preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(TRAINING_BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE) + ) + +.. code:: ipython3 + + # Class labels dictionary with imagenette sample names and classes + lbl_dict = dict( + n01440764="tench", + n02102040="English springer", + n02979186="cassette player", + n03000684="chain saw", + n03028079="church", + n03394916="French horn", + n03417042="garbage truck", + n03425413="gas pump", + n03445777="golf ball", + n03888257="parachute", + ) + + # Imagenette samples name index + class_idx_dict = [ + "n01440764", + "n02102040", + "n02979186", + "n03000684", + "n03028079", + "n03394916", + "n03417042", + "n03425413", + "n03445777", + "n03888257", + ] + + + def label_func(key): + return lbl_dict[key] + +Plotting data samples +~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + import matplotlib.pyplot as plt + + # Get the class labels from the dataset info + class_labels = datasets_info.features["label"].names + + # Display labels along with the examples + num_examples_to_display = 4 + fig, axes = plt.subplots(nrows=1, ncols=num_examples_to_display, figsize=(10, 5)) + + for i, (image, label_index) in enumerate(train_ds.take(num_examples_to_display)): + label_name = class_labels[label_index.numpy()] + + axes[i].imshow(image.numpy()) + axes[i].set_title(f"{label_func(label_name)}") + axes[i].axis("off") + plt.tight_layout() + plt.show() + + +.. parsed-literal:: + + 2024-01-26 10:40:54.747316: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + + + +.. image:: tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png + + +.. code:: ipython3 + + # Get the class labels from the dataset info + class_labels = datasets_info.features["label"].names + + # Display labels along with the examples + num_examples_to_display = 4 + fig, axes = plt.subplots(nrows=1, ncols=num_examples_to_display, figsize=(10, 5)) + + for i, (image, label_index) in enumerate(validation_ds.take(num_examples_to_display)): + label_name = class_labels[label_index.numpy()] + + axes[i].imshow(image.numpy()) + axes[i].set_title(f"{label_func(label_name)}") + axes[i].axis("off") + plt.tight_layout() + plt.show() + + +.. parsed-literal:: + + 2024-01-26 10:40:57.011386: W tensorflow/core/kernels/data/cache_dataset_ops.cc:854] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead. + + + +.. image:: tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png + + +Model Fine-tuning +~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + # Load the Big Transfer model + bit_model_url = "https://www.kaggle.com/models/google/bit/frameworks/TensorFlow2/variations/m-r50x1/versions/1" + bit_m = hub.KerasLayer(bit_model_url, trainable=True) + + tf_model_dir = Path("bit_tf_model") + + # Customize the model for the new task + model = tf.keras.Sequential([bit_m, tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")]) + + # Compile the model + model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=LR), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + # Fine-tune the model + model.fit( + train_dataset.take(3000), + epochs=FINE_TUNING_STEPS, + validation_data=validation_dataset.take(1000), + ) + model.save(tf_model_dir, save_format="tf") + + +.. parsed-literal:: + + 101/101 [==============================] - 472s 4s/step - loss: 0.4904 - accuracy: 0.8806 - val_loss: 0.0810 - val_accuracy: 0.9840 + + +Perform model optimization (IR) step +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + ir_path = Path("bit_ov_model/bit_m_r50x1_1.xml") + if not ir_path.exists(): + print("Initiating model optimization..!!!") + ov_model = ov.convert_model("./bit_tf_model") + ov.save_model(ov_model, ir_path) + else: + print(f"IR model {ir_path} already exists.") + + +.. parsed-literal:: + + Initiating model optimization..!!! + + +Compute accuracy of the TF model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + tf_model = tf.keras.models.load_model(tf_model_dir) + + tf_predictions = [] + gt_label = [] + + for _, label in validation_dataset: + for cls_label in label: + l_list = cls_label.numpy().tolist() + gt_label.append(l_list.index(1)) + + for img_batch, label_batch in validation_dataset: + tf_result_batch = tf_model.predict(img_batch, verbose=0) + for i in range(len(img_batch)): + tf_result = tf_result_batch[i] + tf_result = tf.reshape(tf_result, [-1]) + top5_label_idx = np.argsort(tf_result)[-MAX_PREDS::][::-1] + tf_predictions.append(top5_label_idx) + + # Convert the lists to NumPy arrays for accuracy calculation + tf_predictions = np.array(tf_predictions) + gt_label = np.array(gt_label) + + tf_acc_score = accuracy_score(tf_predictions, gt_label) + + +.. parsed-literal:: + + 2024-01-26 10:51:24.539777: W tensorflow/core/common_runtime/graph_constructor.cc:839] Node 're_lu_48/PartitionedCall' has 1 outputs but the _output_shapes attribute specifies shapes for 2 outputs. Output shapes may be inaccurate. + 2024-01-26 10:51:24.539856: W tensorflow/core/common_runtime/graph_constructor.cc:839] Node 'global_average_pooling2d/PartitionedCall' has 1 outputs but the _output_shapes attribute specifies shapes for 3 outputs. Output shapes may be inaccurate. + + +Compute accuracy of the OpenVINO model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Select device for inference: + +.. code:: ipython3 + + from notebook_utils import device_widget + + device = device_widget() + + device + +.. code:: ipython3 + + core = ov.Core() + + ov_fp32_model = core.read_model(ir_path) + ov_fp32_model.reshape([1, IMG_SIZE[0], IMG_SIZE[1], 3]) + + # Target device set to CPU (Other options Ex: AUTO/GPU/dGPU/) + compiled_model = ov.compile_model(ov_fp32_model, device.value) + output = compiled_model.outputs[0] + + ov_predictions = [] + for img_batch, _ in validation_dataset: + for image in img_batch: + image = tf.expand_dims(image, axis=0) + pred = compiled_model(image)[output] + ov_result = tf.reshape(pred, [-1]) + top_label_idx = np.argsort(ov_result)[-MAX_PREDS::][::-1] + ov_predictions.append(top_label_idx) + + fp32_acc_score = accuracy_score(ov_predictions, gt_label) + +Quantize OpenVINO model using NNCF +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +Model Quantization using NNCF + +1. Preprocessing and preparing validation samples for NNCF calibration +2. Perform NNCF Quantization on OpenVINO FP32 model +3. Serialize Quantized OpenVINO INT8 model + +.. code:: ipython3 + + def nncf_preprocessing(image, label): + image = tf.image.resize(image, IMG_SIZE) + image = image - MEAN_RGB + image = image / STDDEV_RGB + return image + + + int8_ir_path = Path("bit_ov_int8_model/bit_m_r50x1_1_ov_int8.xml") + val_ds = validation_ds.map(nncf_preprocessing, num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(1).prefetch(tf.data.experimental.AUTOTUNE) + + calibration_dataset = nncf.Dataset(val_ds) + + ov_fp32_model = core.read_model(ir_path) + + ov_int8_model = nncf.quantize(ov_fp32_model, calibration_dataset, fast_bias_correction=False) + + ov.save_model(ov_int8_model, int8_ir_path) + + + +.. parsed-literal:: + + Output() + + + + + + + + + + + + + + + + + + +.. parsed-literal:: + + Output() + + + + + + + + + + + + + + + + + +Compute accuracy of the quantized model +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + nncf_quantized_model = core.read_model(int8_ir_path) + nncf_quantized_model.reshape([1, IMG_SIZE[0], IMG_SIZE[1], 3]) + + # Target device set to CPU by default + compiled_model = ov.compile_model(nncf_quantized_model, device.value) + output = compiled_model.outputs[0] + + ov_predictions = [] + inp_tensor = nncf_quantized_model.inputs[0] + out_tensor = nncf_quantized_model.outputs[0] + + for img_batch, _ in validation_dataset: + for image in img_batch: + image = tf.expand_dims(image, axis=0) + pred = compiled_model(image)[output] + ov_result = tf.reshape(pred, [-1]) + top_label_idx = np.argsort(ov_result)[-MAX_PREDS::][::-1] + ov_predictions.append(top_label_idx) + + int8_acc_score = accuracy_score(ov_predictions, gt_label) + +Compare FP32 and INT8 accuracy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + print(f"Accuracy of the tensorflow model (fp32): {tf_acc_score * 100: .2f}%") + print(f"Accuracy of the OpenVINO optimized model (fp32): {fp32_acc_score * 100: .2f}%") + print(f"Accuracy of the OpenVINO quantized model (int8): {int8_acc_score * 100: .2f}%") + accuracy_drop = fp32_acc_score - int8_acc_score + print(f"Accuracy drop between OV FP32 and INT8 model: {accuracy_drop * 100:.1f}% ") + + +.. parsed-literal:: + + Accuracy of the tensorflow model (fp32): 98.40% + Accuracy of the OpenVINO optimized model (fp32): 98.40% + Accuracy of the OpenVINO quantized model (int8): 98.00% + Accuracy drop between OV FP32 and INT8 model: 0.4% + + +Compare inference results on one picture +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + + +.. code:: ipython3 + + # Accessing validation sample + sample_idx = 50 + vds = datasets["validation"] + + if len(vds) > sample_idx: + sample = vds.take(sample_idx + 1).skip(sample_idx).as_numpy_iterator().next() + else: + print("Dataset does not have enough samples...!!!") + + # Image data + sample_data = sample[0] + + # Label info + sample_label = sample[1] + + # Image data pre-processing + image = tf.image.resize(sample_data, IMG_SIZE) + image = tf.expand_dims(image, axis=0) + image = tf.cast(image, tf.float32) / 255.0 + + + # OpenVINO inference + def ov_inference(model: ov.Model, image) -> str: + compiled_model = ov.compile_model(model, device.value) + output = compiled_model.outputs[0] + pred = compiled_model(image)[output] + ov_result = tf.reshape(pred, [-1]) + pred_label = np.argsort(ov_result)[-MAX_PREDS::][::-1] + return pred_label + + + # OpenVINO FP32 model + ov_fp32_model = core.read_model(ir_path) + ov_fp32_model.reshape([1, IMG_SIZE[0], IMG_SIZE[1], 3]) + + # OpenVINO INT8 model + ov_int8_model = core.read_model(int8_ir_path) + ov_int8_model.reshape([1, IMG_SIZE[0], IMG_SIZE[1], 3]) + + # OpenVINO FP32 model inference + ov_fp32_pred_label = ov_inference(ov_fp32_model, image) + + print(f"Predicted label for the sample picture by float (fp32) model: {label_func(class_idx_dict[int(ov_fp32_pred_label)])}\n") + + # OpenVINO FP32 model inference + ov_int8_pred_label = ov_inference(ov_int8_model, image) + print(f"Predicted label for the sample picture by qunatized (int8) model: {label_func(class_idx_dict[int(ov_int8_pred_label)])}\n") + + # Plotting the image sample with ground truth + plt.figure() + plt.imshow(sample_data) + plt.title(f"Ground truth: {label_func(class_idx_dict[sample_label])}") + plt.axis("off") + plt.show() + + +.. parsed-literal:: + + Predicted label for the sample picture by float (fp32) model: gas pump + + Predicted label for the sample picture by qunatized (int8) model: gas pump + + + + +.. image:: tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_27_1.png + diff --git a/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png b/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png new file mode 100644 index 00000000000000..71aa7443a92cd8 --- /dev/null +++ b/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_10_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b53b19fd375df2b53791482fa4f76ec9d376be865f1298f4ea5aa0acdb1f35 +size 224517 diff --git a/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_27_1.png b/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_27_1.png new file mode 100644 index 00000000000000..38f050c05e472a --- /dev/null +++ b/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_27_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048e8ff7b7ac7fa5f9cb66251d618f1ae941f26255f62c725d6223abd63e6fb7 +size 335047 diff --git a/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png b/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png new file mode 100644 index 00000000000000..a8d02fcbd58c16 --- /dev/null +++ b/docs/notebooks/tensorflow-bit-image-classification-nncf-quantization-with-output_files/tensorflow-bit-image-classification-nncf-quantization-with-output_9_1.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf1b651f79891da47103dcc27259f890451c392325a712ff4c1b1cace7cb4be +size 296205 diff --git a/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst b/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst index 2e4f5ffe25369c..9ab3ae90d2fd3e 100644 --- a/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst +++ b/docs/notebooks/tensorflow-classification-to-openvino-with-output.rst @@ -249,7 +249,7 @@ network. .. parsed-literal:: - data/coco.jpg: 0%| | 0.00/202k [00:00 + @@ -649,7 +649,7 @@ Zoo `__: .. parsed-literal:: - data/coco_91cl.txt: 0%| | 0.00/421 [00:00 + @@ -683,16 +678,10 @@ Zoo `__: -.. parsed-literal:: - - data/coco_91cl.txt: 0%| | 0.00/421 [00:00`__. -2. Run inference using the `Text to Image -pipeline `__ +2. Run inference using the `Text-to-Image Generation +pipeline `__ from OpenVINO GenAI. + **Table of contents:** + - `Prerequisites <#prerequisites>`__ - `Convert model using Optimum-CLI tool <#convert-model-using-optimum-cli-tool>`__ @@ -57,19 +59,19 @@ Prerequisites import platform import requests - - + + %pip install -q "git+https://github.com/huggingface/optimum-intel.git" %pip install -q -U "openvino>=2024.5" "openvino-tokenizers>=2024.5" "openvino-genai>=2024.5" %pip install -q Pillow "diffusers>=0.30.3" "gradio>=4.19" "typing_extensions>=4.9" if platform.system() == "Darwin": %pip install -q "numpy<2.0.0" - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py", ) @@ -81,7 +83,7 @@ Convert model using Optimum-CLI tool `Optimum Intel `__ -is the interface between the +is the interface between the `Transformers `__ and `Diffusers `__ libraries and OpenVINO to accelerate end-to-end pipelines on Intel architectures. @@ -116,12 +118,12 @@ wrapper over cli-command. .. code:: ipython3 from pathlib import Path - + from cmd_helper import optimum_cli - - + + model_dir = Path("dreamlike_anime_1_0_ov") - + if not model_dir.exists(): optimum_cli("dreamlike-art/dreamlike-anime-1.0", model_dir) @@ -135,8 +137,8 @@ select device from dropdown list for running inference using OpenVINO .. code:: ipython3 from notebook_utils import device_widget - - + + device = device_widget("CPU", exclude=["NPU"]) device @@ -161,27 +163,27 @@ That’s it:) import openvino as ov from PIL import Image import torch - - + + class Generator(ov_genai.Generator): def __init__(self, seed): ov_genai.Generator.__init__(self) self.generator = torch.Generator(device="cpu").manual_seed(seed) - + def next(self): return torch.randn(1, generator=self.generator, dtype=torch.float32).item() - + def randn_tensor(self, shape: ov.Shape): torch_tensor = torch.randn(list(shape), generator=self.generator, dtype=torch.float32) return ov.Tensor(torch_tensor.numpy()) - - + + random_generator = Generator(42) # openvino_genai.CppStdGenerator can be used to have same images as C++ sample pipe = ov_genai.Text2ImagePipeline(model_dir, device.value) prompt = "anime, masterpiece, high quality, a green snowman with a happy smiling face in the snows" - + image_tensor = pipe.generate(prompt, width=512, height=512, num_inference_steps=20, num_images_per_prompt=1, generator=random_generator) - + image = Image.fromarray(image_tensor.data[0]) .. code:: ipython3 @@ -228,20 +230,20 @@ from command line: def prepare_adapter_config(adapters): adapter_config = ov_genai.AdapterConfig() - + # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: for i in range(int(len(adapters) / 2)): adapter = ov_genai.Adapter(adapters[2 * i]) alpha = float(adapters[2 * i + 1]) adapter_config.add(adapter, alpha) - + return adapter_config - - + + adapter_config = prepare_adapter_config(["soulcard.safetensors", 0.5]) - + pipe = ov_genai.Text2ImagePipeline(model_dir, device.value, adapters=adapter_config) - + image_tensor = pipe.generate(prompt, generator=Generator(42), width=512, height=512, num_inference_steps=20) image = Image.fromarray(image_tensor.data[0]) @@ -268,10 +270,10 @@ Interactive demo .. code:: ipython3 from gradio_helper import make_demo - - + + demo = make_demo(pipe, Generator, adapter_config) - + try: demo.launch(debug=True) except Exception: diff --git a/docs/notebooks/tflite-selfie-segmentation-with-output.rst b/docs/notebooks/tflite-selfie-segmentation-with-output.rst index 7f613016c47019..8691da62b77526 100644 --- a/docs/notebooks/tflite-selfie-segmentation-with-output.rst +++ b/docs/notebooks/tflite-selfie-segmentation-with-output.rst @@ -117,8 +117,7 @@ Download pretrained model and test image tflite_model_path = Path("selfie_multiclass_256x256.tflite") tflite_model_url = "https://storage.googleapis.com/mediapipe-models/image_segmenter/selfie_multiclass_256x256/float32/latest/selfie_multiclass_256x256.tflite" - if not tflite_model_path.exists(): - download_file(tflite_model_url, tflite_model_path) + download_file(tflite_model_url, tflite_model_path) @@ -127,6 +126,14 @@ Download pretrained model and test image selfie_multiclass_256x256.tflite: 0%| | 0.00/15.6M [00:00`__. .. code:: ipython3 - image = load_image("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bricks.png") + image = load_image("coco_bricks.png", "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bricks.png") # load_image reads the image in BGR format, [:,:,::-1] reshape transfroms it to RGB image = Image.fromarray(image[:, :, ::-1]) resized_image = image.resize((224, 224)) @@ -274,7 +274,7 @@ GPU. [ WARNING ] Performance hint was not explicitly specified in command line. Device(AUTO) performance hint will be set to PerformanceMode.THROUGHPUT. [Step 4/11] Reading model files [ INFO ] Loading model files - [ INFO ] Read model took 9.66 ms + [ INFO ] Read model took 9.35 ms [ INFO ] Original model I/O parameters: [ INFO ] Model inputs: [ INFO ] images (node: images) : f32 / [...] / [1,224,224,3] @@ -288,7 +288,7 @@ GPU. [ INFO ] Model outputs: [ INFO ] Softmax (node: 61) : f32 / [...] / [1,1000] [Step 7/11] Loading the model to the device - [ INFO ] Compile model took 147.18 ms + [ INFO ] Compile model took 166.78 ms [Step 8/11] Querying optimal runtime parameters [ INFO ] Model: [ INFO ] NETWORK_NAME: TensorFlow_Lite_Frontend_IR @@ -325,15 +325,15 @@ GPU. [ INFO ] Fill input 'images' with random values [Step 10/11] Measuring performance (Start inference asynchronously, 6 inference requests, limits: 15000 ms duration) [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). - [ INFO ] First inference took 7.26 ms + [ INFO ] First inference took 7.31 ms [Step 11/11] Dumping statistics report [ INFO ] Execution Devices:['CPU'] - [ INFO ] Count: 16578 iterations - [ INFO ] Duration: 15004.17 ms + [ INFO ] Count: 17460 iterations + [ INFO ] Duration: 15005.01 ms [ INFO ] Latency: - [ INFO ] Median: 5.30 ms - [ INFO ] Average: 5.29 ms - [ INFO ] Min: 2.92 ms - [ INFO ] Max: 17.62 ms - [ INFO ] Throughput: 1104.89 FPS + [ INFO ] Median: 4.99 ms + [ INFO ] Average: 5.02 ms + [ INFO ] Min: 2.99 ms + [ INFO ] Max: 17.05 ms + [ INFO ] Throughput: 1163.61 FPS diff --git a/docs/notebooks/tiny-sd-image-generation-with-output.rst b/docs/notebooks/tiny-sd-image-generation-with-output.rst index 2c4126b8aefc4c..090b8ff5f63378 100644 --- a/docs/notebooks/tiny-sd-image-generation-with-output.rst +++ b/docs/notebooks/tiny-sd-image-generation-with-output.rst @@ -96,9 +96,9 @@ First, load the pre-trained weights of all components of the model. import gc from diffusers import StableDiffusionPipeline - + model_id = "segmind/tiny-sd" - + pipe = StableDiffusionPipeline.from_pretrained(model_id).to("cpu") text_encoder = pipe.text_encoder text_encoder.eval() @@ -106,7 +106,7 @@ First, load the pre-trained weights of all components of the model. unet.eval() vae = pipe.vae vae.eval() - + del pipe gc.collect() @@ -164,10 +164,10 @@ hidden states. from pathlib import Path import torch import openvino as ov - + TEXT_ENCODER_OV_PATH = Path("text_encoder.xml") - - + + def convert_encoder(text_encoder: torch.nn.Module, ir_path: Path): """ Convert Text Encoder mode. @@ -181,7 +181,7 @@ hidden states. input_ids = torch.ones((1, 77), dtype=torch.long) # switch model to inference mode text_encoder.eval() - + # disable gradients calculation for reducing memory consumption with torch.no_grad(): # Export model to IR format @@ -195,13 +195,13 @@ hidden states. ov.save_model(ov_model, ir_path) del ov_model print(f"Text Encoder successfully converted to IR and saved to {ir_path}") - - + + if not TEXT_ENCODER_OV_PATH.exists(): convert_encoder(text_encoder, TEXT_ENCODER_OV_PATH) else: print(f"Text encoder will be loaded from {TEXT_ENCODER_OV_PATH}") - + del text_encoder gc.collect(); @@ -223,12 +223,12 @@ Model predicts the ``sample`` state for the next step. import numpy as np from openvino import PartialShape, Type - + UNET_OV_PATH = Path("unet.xml") - + dtype_mapping = {torch.float32: Type.f32, torch.float64: Type.f64} - - + + def convert_unet(unet: torch.nn.Module, ir_path: Path): """ Convert U-net model to IR format. @@ -250,15 +250,15 @@ Model predicts the ``sample`` state for the next step. shape = PartialShape(tuple(input_tensor.shape)) element_type = dtype_mapping[input_tensor.dtype] input_info.append((shape, element_type)) - + unet.eval() with torch.no_grad(): ov_model = ov.convert_model(unet, example_input=dummy_inputs, input=input_info) ov.save_model(ov_model, ir_path) del ov_model print(f"Unet successfully converted to IR and saved to {ir_path}") - - + + if not UNET_OV_PATH.exists(): convert_unet(unet, UNET_OV_PATH) gc.collect() @@ -292,8 +292,8 @@ of the pipeline, it will be better to convert them to separate models. .. code:: ipython3 VAE_ENCODER_OV_PATH = Path("vae_encodr.xml") - - + + def convert_vae_encoder(vae: torch.nn.Module, ir_path: Path): """ Convert VAE model for encoding to IR format. @@ -305,15 +305,15 @@ of the pipeline, it will be better to convert them to separate models. Returns: None """ - + class VAEEncoderWrapper(torch.nn.Module): def __init__(self, vae): super().__init__() self.vae = vae - + def forward(self, image): return self.vae.encode(x=image)["latent_dist"].sample() - + vae_encoder = VAEEncoderWrapper(vae) vae_encoder.eval() image = torch.zeros((1, 3, 512, 512)) @@ -322,16 +322,16 @@ of the pipeline, it will be better to convert them to separate models. ov.save_model(ov_model, ir_path) del ov_model print(f"VAE encoder successfully converted to IR and saved to {ir_path}") - - + + if not VAE_ENCODER_OV_PATH.exists(): convert_vae_encoder(vae, VAE_ENCODER_OV_PATH) else: print(f"VAE encoder will be loaded from {VAE_ENCODER_OV_PATH}") - + VAE_DECODER_OV_PATH = Path("vae_decoder.xml") - - + + def convert_vae_decoder(vae: torch.nn.Module, ir_path: Path): """ Convert VAE model for decoding to IR format. @@ -343,31 +343,31 @@ of the pipeline, it will be better to convert them to separate models. Returns: None """ - + class VAEDecoderWrapper(torch.nn.Module): def __init__(self, vae): super().__init__() self.vae = vae - + def forward(self, latents): return self.vae.decode(latents) - + vae_decoder = VAEDecoderWrapper(vae) latents = torch.zeros((1, 4, 64, 64)) - + vae_decoder.eval() with torch.no_grad(): ov_model = ov.convert_model(vae_decoder, example_input=latents, input=[((1, 4, 64, 64),)]) ov.save_model(ov_model, ir_path) del ov_model print(f"VAE decoder successfully converted to IR and saved to {ir_path}") - - + + if not VAE_DECODER_OV_PATH.exists(): convert_vae_decoder(vae, VAE_DECODER_OV_PATH) else: print(f"VAE decoder will be loaded from {VAE_DECODER_OV_PATH}") - + del vae gc.collect(); @@ -426,20 +426,20 @@ of the variational auto encoder. import inspect from typing import List, Optional, Union, Dict - + import PIL import cv2 - + from transformers import CLIPTokenizer from diffusers.pipelines.pipeline_utils import DiffusionPipeline from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler - - + + def scale_fit_to_window(dst_width: int, dst_height: int, image_width: int, image_height: int): """ Preprocessing helper function for calculating image size for resize with peserving original aspect ratio and fitting image to specific window size - + Parameters: dst_width (int): destination window width dst_height (int): destination window height @@ -451,15 +451,15 @@ of the variational auto encoder. """ im_scale = min(dst_height / image_height, dst_width / image_width) return int(im_scale * image_width), int(im_scale * image_height) - - + + def preprocess(image: PIL.Image.Image): """ Image preprocessing function. Takes image in PIL.Image format, resizes it to keep aspect ration and fits to model input window 512x512, then converts it to np.ndarray and adds padding with zeros on right or bottom side of image (depends from aspect ratio), after that converts data to float32 data type and change range of values from [0, 255] to [-1, 1], finally, converts data layout from planar NHWC to NCHW. The function returns preprocessed input tensor and padding size, which can be used in postprocessing. - + Parameters: image (PIL.Image.Image): input image Returns: @@ -477,8 +477,8 @@ of the variational auto encoder. image = 2.0 * image - 1.0 image = image.transpose(0, 3, 1, 2) return image, {"padding": pad, "src_width": src_width, "src_height": src_height} - - + + class OVStableDiffusionPipeline(DiffusionPipeline): def __init__( self, @@ -518,7 +518,7 @@ of the variational auto encoder. self.height = 512 self.width = 512 self.tokenizer = tokenizer - + def __call__( self, prompt: Union[str, List[str]], @@ -567,7 +567,7 @@ of the variational auto encoder. """ if seed is not None: np.random.seed(seed) - + img_buffer = [] do_classifier_free_guidance = guidance_scale > 1.0 # get prompt text embeddings @@ -576,20 +576,20 @@ of the variational auto encoder. do_classifier_free_guidance=do_classifier_free_guidance, negative_prompt=negative_prompt, ) - + # set timesteps accepts_offset = "offset" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()) extra_set_kwargs = {} if accepts_offset: extra_set_kwargs["offset"] = 1 - + self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs) timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength) latent_timestep = timesteps[:1] - + # get the initial random noise unless the user supplied it latents, meta = self.prepare_latents(image, latent_timestep) - + # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers. # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502 @@ -598,19 +598,19 @@ of the variational auto encoder. extra_step_kwargs = {} if accepts_eta: extra_step_kwargs["eta"] = eta - + for i, t in enumerate(self.progress_bar(timesteps)): # expand the latents if you are doing classifier free guidance latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) - + # predict the noise residual noise_pred = self.unet([latent_model_input, t, text_embeddings])[self._unet_output] # perform guidance if do_classifier_free_guidance: noise_pred_uncond, noise_pred_text = noise_pred[0], noise_pred[1] noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) - + # compute the previous noisy sample x_t -> x_t-1 latents = self.scheduler.step( torch.from_numpy(noise_pred), @@ -622,13 +622,13 @@ of the variational auto encoder. image = self.vae_decoder(latents * (1 / 0.18215))[self._vae_d_output] image = self.postprocess_image(image, meta, output_type) img_buffer.extend(image) - + # scale and decode the image latents with vae image = self.vae_decoder(latents * (1 / 0.18215))[self._vae_d_output] - + image = self.postprocess_image(image, meta, output_type) return {"sample": image, "iterations": img_buffer} - + def _encode_prompt( self, prompt: Union[str, List[str]], @@ -638,7 +638,7 @@ of the variational auto encoder. ): """ Encodes the prompt into text encoder hidden states. - + Parameters: prompt (str or list(str)): prompt to be encoded num_images_per_prompt (int): number of images that should be generated per prompt @@ -648,7 +648,7 @@ of the variational auto encoder. text_embeddings (np.ndarray): text encoder hidden states """ batch_size = len(prompt) if isinstance(prompt, list) else 1 - + # tokenize input prompts text_inputs = self.tokenizer( prompt, @@ -658,15 +658,15 @@ of the variational auto encoder. return_tensors="np", ) text_input_ids = text_inputs.input_ids - + text_embeddings = self.text_encoder(text_input_ids)[self._text_encoder_output] - + # duplicate text embeddings for each generation per prompt if num_images_per_prompt != 1: bs_embed, seq_len, _ = text_embeddings.shape text_embeddings = np.tile(text_embeddings, (1, num_images_per_prompt, 1)) text_embeddings = np.reshape(text_embeddings, (bs_embed * num_images_per_prompt, seq_len, -1)) - + # get unconditional embeddings for classifier free guidance if do_classifier_free_guidance: uncond_tokens: List[str] @@ -684,25 +684,25 @@ of the variational auto encoder. truncation=True, return_tensors="np", ) - + uncond_embeddings = self.text_encoder(uncond_input.input_ids)[self._text_encoder_output] - + # duplicate unconditional embeddings for each generation per prompt, using mps friendly method seq_len = uncond_embeddings.shape[1] uncond_embeddings = np.tile(uncond_embeddings, (1, num_images_per_prompt, 1)) uncond_embeddings = np.reshape(uncond_embeddings, (batch_size * num_images_per_prompt, seq_len, -1)) - + # For classifier free guidance, we need to do two forward passes. # Here we concatenate the unconditional and text embeddings into a single batch # to avoid doing two forward passes text_embeddings = np.concatenate([uncond_embeddings, text_embeddings]) - + return text_embeddings - + def prepare_latents(self, image: PIL.Image.Image = None, latent_timestep: torch.Tensor = None): """ Function for getting initial latents for starting generation - + Parameters: image (PIL.Image.Image, *optional*, None): Input image for generation, if not provided randon noise will be used as starting point @@ -723,12 +723,12 @@ of the variational auto encoder. latents = self.vae_encoder(input_image)[self._vae_e_output] * 0.18215 latents = self.scheduler.add_noise(torch.from_numpy(latents), torch.from_numpy(noise), latent_timestep).numpy() return latents, meta - + def postprocess_image(self, image: np.ndarray, meta: Dict, output_type: str = "pil"): """ Postprocessing for decoded image. Takes generated image decoded by VAE decoder, unpad it to initila image size (if required), normalize and convert to [0, 255] pixels range. Optionally, convertes it from np.ndarray to PIL.Image format - + Parameters: image (np.ndarray): Generated image @@ -760,12 +760,12 @@ of the variational auto encoder. orig_height, orig_width = meta["src_height"], meta["src_width"] image = [cv2.resize(img, (orig_width, orig_width)) for img in image] return image - + def get_timesteps(self, num_inference_steps: int, strength: float): """ Helper function for getting scheduler timesteps for generation In case of image-to-image generation, it updates number of steps according to strength - + Parameters: num_inference_steps (int): number of inference steps for generation @@ -775,10 +775,10 @@ of the variational auto encoder. """ # get the original timestep using init_timestep init_timestep = min(int(num_inference_steps * strength), num_inference_steps) - + t_start = max(num_inference_steps - init_timestep, 0) timesteps = self.scheduler.timesteps[t_start:] - + return timesteps, num_inference_steps - t_start Configure Inference Pipeline @@ -797,16 +797,16 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 import requests - + r = requests.get( url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py", ) open("notebook_utils.py", "w").write(r.text) - + from notebook_utils import device_widget - + device = device_widget() - + device @@ -821,7 +821,7 @@ Select device from dropdown list for running inference using OpenVINO. .. code:: ipython3 core = ov.Core() - + text_enc = core.compile_model(TEXT_ENCODER_OV_PATH, device.value) Calibrate UNet for GPU inference @@ -839,28 +839,28 @@ operations to be executed in full precision. import pickle import requests import os - + # Fetch `model_upcast_utils` which helps to restore accuracy when inferred on GPU r = requests.get("https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/model_upcast_utils.py") with open("model_upcast_utils.py", "w") as f: f.write(r.text) - + # Fetch an example input for UNet model needed for upcasting calibration process r = requests.get("https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/pkl/unet_calibration_example_input.pkl") with open("unet_calibration_example_input.pkl", "wb") as f: f.write(r.content) - + from model_upcast_utils import ( is_model_partially_upcasted, partially_upcast_nodes_to_fp32, ) - + unet_model = core.read_model(UNET_OV_PATH) if "GPU" in core.available_devices and not is_model_partially_upcasted(unet_model): with open("unet_calibration_example_input.pkl", "rb") as f: example_input = pickle.load(f) unet_model = partially_upcast_nodes_to_fp32(unet_model, example_input, upcast_ratio=0.7, operation_types=["Convolution"]) - + ov.save_model(unet_model, UNET_OV_PATH.with_suffix("._tmp.xml")) del unet_model os.remove(UNET_OV_PATH) @@ -875,7 +875,7 @@ operations to be executed in full precision. .. code:: ipython3 ov_config = {"INFERENCE_PRECISION_HINT": "f32"} if device.value != "CPU" else {} - + vae_decoder = core.compile_model(VAE_DECODER_OV_PATH, device.value, ov_config) vae_encoder = core.compile_model(VAE_ENCODER_OV_PATH, device.value, ov_config) @@ -886,10 +886,10 @@ Let us define them and put all components together from transformers import CLIPTokenizer from diffusers.schedulers import LMSDiscreteScheduler - + lms = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear") tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") - + ov_pipe = OVStableDiffusionPipeline( tokenizer=tokenizer, text_encoder=text_enc, @@ -923,7 +923,7 @@ Now, let’s see model in action .. parsed-literal:: Pipeline settings - Input text: RAW studio photo of An intricate forest minitown landscape trapped in a bottle, atmospheric oliva lighting, on the table, intricate details, dark shot, soothing tones, muted colors + Input text: RAW studio photo of An intricate forest minitown landscape trapped in a bottle, atmospheric oliva lighting, on the table, intricate details, dark shot, soothing tones, muted colors Seed: 431 Number of steps: 20 @@ -961,7 +961,7 @@ Now is show time! .. parsed-literal:: Input text: - RAW studio photo of An intricate forest minitown landscape trapped in a bottle, atmospheric oliva lighting, on the table, intricate details, dark shot, soothing tones, muted colors + RAW studio photo of An intricate forest minitown landscape trapped in a bottle, atmospheric oliva lighting, on the table, intricate details, dark shot, soothing tones, muted colors @@ -1014,10 +1014,11 @@ found in this .. code:: ipython3 from diffusers.utils import load_image - + default_image_url = "https://user-images.githubusercontent.com/29454499/260418860-69cc443a-9ee6-493c-a393-3a97af080be7.jpg" + image_name = "default.jpg" # read uploaded image - image = load_image(default_image_url) + image = load_image(image_name, default_image_url) print("Pipeline settings") print(f"Input positive prompt: \n\t{text_prompt_i2i}") print(f"Input negative prompt: \n\t{negative_prompt_i2i}") @@ -1039,9 +1040,9 @@ found in this .. parsed-literal:: Pipeline settings - Input positive prompt: + Input positive prompt: professional photo portrait of woman, highly detailed, hyper realistic, cinematic effects, soft lighting - Input negative prompt: + Input negative prompt: blurry, poor quality, low res, worst quality, cropped, ugly, poorly drawn face, without eyes, mutation, unreal, animate, poorly drawn eyes Seed: 82698152 Number of steps: 40 @@ -1090,13 +1091,13 @@ Interactive Demo .. code:: ipython3 import gradio as gr - - + + def generate_from_text(text, negative_text, seed, num_steps, _=gr.Progress(track_tqdm=True)): result = ov_pipe(text, negative_prompt=negative_text, num_inference_steps=num_steps, seed=seed) return result["sample"][0] - - + + def generate_from_image(img, text, negative_text, seed, num_steps, strength, _=gr.Progress(track_tqdm=True)): result = ov_pipe( text, @@ -1113,11 +1114,11 @@ Interactive Demo if not Path("gradio_helper.py").exists(): r = requests.get(url="https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/tiny-sd-image-generation/gradio_helper.py") open("gradio_helper.py", "w").write(r.text) - + from gradio_helper import make_demo - + demo = make_demo(text_to_text_fn=generate_from_text, image_to_image_fn=generate_from_image) - + try: demo.queue().launch(debug=False) except Exception: diff --git a/docs/notebooks/vehicle-detection-and-recognition-with-output.rst b/docs/notebooks/vehicle-detection-and-recognition-with-output.rst index 30204d6bd11ded..fab72fb0725d03 100644 --- a/docs/notebooks/vehicle-detection-and-recognition-with-output.rst +++ b/docs/notebooks/vehicle-detection-and-recognition-with-output.rst @@ -152,25 +152,25 @@ model is already downloaded, this step is skipped. .. parsed-literal:: - model/vehicle-detection-0200.xml: 0%| | 0.00/181k [00:00 - 100%|██████████| 4.68M/4.68M [00:00<00:00, 34.1MB/s] + 100%|██████████| 4.68M/4.68M [00:00<00:00, 17.2MB/s] .. parsed-literal:: @@ -215,13 +215,13 @@ next cell loads the model and the pre-trained weights. .. parsed-literal:: - Loading model weights from: 'model/u2net_lite/u2net_lite.pth' + /tmp/ipykernel_2254056/1036642300.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + net.load_state_dict(state_dict=torch.load(model_path, map_location="cpu")) .. parsed-literal:: - /tmp/ipykernel_3590581/1036642300.py:7: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. - net.load_state_dict(state_dict=torch.load(model_path, map_location="cpu")) + Loading model weights from: 'model/u2net_lite/u2net_lite.pth' @@ -247,7 +247,7 @@ OpenVINO IR format. Executing the following command may take a while. .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/vision-background-removal/model/u2net.py:23: UserWarning: `nn.functional.upsample` is deprecated. Use `nn.functional.interpolate` instead. + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/vision-background-removal/model/u2net.py:23: UserWarning: `nn.functional.upsample` is deprecated. Use `nn.functional.interpolate` instead. src = F.upsample(src,size=tar.shape[2:],mode='bilinear') @@ -273,12 +273,13 @@ repository `__ and multiplied by .. code:: ipython3 IMAGE_URI = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_hollywood.jpg" + IMAGE_NAME = "coco_hollywood.jpg" input_mean = np.array([123.675, 116.28, 103.53]).reshape(1, 3, 1, 1) input_scale = np.array([58.395, 57.12, 57.375]).reshape(1, 3, 1, 1) image = cv2.cvtColor( - src=load_image(IMAGE_URI), + src=load_image(IMAGE_NAME, IMAGE_URI), code=cv2.COLOR_BGR2RGB, ) @@ -336,7 +337,7 @@ Load the OpenVINO IR model to OpenVINO Runtime and do inference. .. parsed-literal:: - Inference finished. Inference time: 0.107 seconds, FPS: 9.35. + Inference finished. Inference time: 0.109 seconds, FPS: 9.19. Visualize Results @@ -389,12 +390,13 @@ background pixels a value of 0. Replace the background image as follows: .. code:: ipython3 - BACKGROUND_FILE = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/wall.jpg" + BACKGROUND_IMAGE_URL = "https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/wall.jpg" + BACKGROUND_IMAGE_NAME = "wall.jpg" OUTPUT_DIR = "output" os.makedirs(name=OUTPUT_DIR, exist_ok=True) - background_image = cv2.cvtColor(src=load_image(BACKGROUND_FILE), code=cv2.COLOR_BGR2RGB) + background_image = cv2.cvtColor(src=load_image(BACKGROUND_IMAGE_NAME, BACKGROUND_IMAGE_URL), code=cv2.COLOR_BGR2RGB) background_image = cv2.resize(src=background_image, dsize=(image.shape[1], image.shape[0])) # Set all the foreground pixels from the result to 0 @@ -403,7 +405,7 @@ background pixels a value of 0. Replace the background image as follows: new_image = background_image + bg_removed_result # Save the generated image. - new_image_path = Path(f"{OUTPUT_DIR}/{Path(IMAGE_URI).stem}-{Path(BACKGROUND_FILE).stem}.jpg") + new_image_path = Path(f"{OUTPUT_DIR}/{Path(IMAGE_URI).stem}-{BACKGROUND_IMAGE_NAME}") cv2.imwrite(filename=str(new_image_path), img=cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)) # Display the original image and the image with the new background side by side diff --git a/docs/notebooks/vision-monodepth-with-output.rst b/docs/notebooks/vision-monodepth-with-output.rst index 736e0238989f8d..cf232c30fc9c73 100644 --- a/docs/notebooks/vision-monodepth-with-output.rst +++ b/docs/notebooks/vision-monodepth-with-output.rst @@ -146,11 +146,8 @@ format. ir_model_name_xml = "MiDaS_small.xml" ir_model_name_bin = "MiDaS_small.bin" - - if not (model_folder / ir_model_name_xml).exists(): - download_file(ir_model_url + ir_model_name_xml, filename=ir_model_name_xml, directory=model_folder) - if not (model_folder / ir_model_name_bin).exists(): - download_file(ir_model_url + ir_model_name_bin, filename=ir_model_name_bin, directory=model_folder) + download_file(ir_model_url + ir_model_name_xml, filename=ir_model_name_xml, directory=model_folder) + download_file(ir_model_url + ir_model_name_bin, filename=ir_model_name_bin, directory=model_folder) model_xml_path = model_folder / ir_model_name_xml @@ -158,13 +155,13 @@ format. .. parsed-literal:: - model/MiDaS_small.xml: 0%| | 0.00/268k [00:00 #0:0 (mpeg4 (native) -> h264 (libx264)) Stream #0:0 -> #0:1 (pcm_s16le (native) -> aac (native)) Press [q] to stop, [?] for help - [libx264 @ 0x556392e25840] -qscale is ignored, -crf is recommended. - [libx264 @ 0x556392e25840] using SAR=1/1 - [libx264 @ 0x556392e25840] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512 - [libx264 @ 0x556392e25840] profile High, level 3.1 - [libx264 @ 0x556392e25840] 264 - core 155 r2917 0a84d98 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00 + [libx264 @ 0x55ec6513e840] -qscale is ignored, -crf is recommended. + [libx264 @ 0x55ec6513e840] using SAR=1/1 + [libx264 @ 0x55ec6513e840] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512 + [libx264 @ 0x55ec6513e840] profile High, level 3.1 + [libx264 @ 0x55ec6513e840] 264 - core 155 r2917 0a84d98 - H.264/MPEG-4 AVC codec - Copyleft 2003-2018 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00 Output #0, mp4, to 'results/result_voice.mp4': Metadata: encoder : Lavf58.29.100 @@ -349,27 +349,27 @@ python API and converted OpenVINO models. Stream #0:1: Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, mono, fltp, 69 kb/s Metadata: encoder : Lavc58.54.100 aac - frame= 123 fps=0.0 q=-1.0 Lsize= 621kB time=00:00:05.06 bitrate=1005.8kbits/s speed=9.73x + frame= 123 fps=0.0 q=-1.0 Lsize= 621kB time=00:00:05.06 bitrate=1005.8kbits/s speed=10.6x video:573kB audio:43kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.827166% - [libx264 @ 0x556392e25840] frame I:1 Avg QP:22.24 size: 31028 - [libx264 @ 0x556392e25840] frame P:75 Avg QP:22.01 size: 6954 - [libx264 @ 0x556392e25840] frame B:47 Avg QP:25.58 size: 718 - [libx264 @ 0x556392e25840] consecutive B-frames: 38.2% 27.6% 14.6% 19.5% - [libx264 @ 0x556392e25840] mb I I16..4: 14.0% 83.9% 2.1% - [libx264 @ 0x556392e25840] mb P I16..4: 1.3% 3.3% 0.1% P16..4: 37.8% 8.2% 6.4% 0.0% 0.0% skip:43.0% - [libx264 @ 0x556392e25840] mb B I16..4: 0.2% 0.7% 0.0% B16..8: 27.9% 0.4% 0.1% direct: 0.2% skip:70.6% L0:43.9% L1:54.2% BI: 1.9% - [libx264 @ 0x556392e25840] 8x8 transform intra:73.3% inter:77.1% - [libx264 @ 0x556392e25840] coded y,uvDC,uvAC intra: 56.9% 72.4% 8.1% inter: 11.4% 13.0% 0.2% - [libx264 @ 0x556392e25840] i16 v,h,dc,p: 20% 23% 9% 48% - [libx264 @ 0x556392e25840] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 25% 23% 36% 3% 3% 2% 2% 3% 3% - [libx264 @ 0x556392e25840] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 39% 14% 14% 4% 6% 7% 4% 9% 3% - [libx264 @ 0x556392e25840] i8c dc,h,v,p: 42% 25% 29% 4% - [libx264 @ 0x556392e25840] Weighted P-Frames: Y:0.0% UV:0.0% - [libx264 @ 0x556392e25840] ref P L0: 74.2% 10.4% 11.1% 4.3% - [libx264 @ 0x556392e25840] ref B L0: 86.1% 11.2% 2.8% - [libx264 @ 0x556392e25840] ref B L1: 98.3% 1.7% - [libx264 @ 0x556392e25840] kb/s:953.36 - [aac @ 0x556392e27140] Qavg: 121.673 + [libx264 @ 0x55ec6513e840] frame I:1 Avg QP:22.24 size: 31028 + [libx264 @ 0x55ec6513e840] frame P:75 Avg QP:22.01 size: 6954 + [libx264 @ 0x55ec6513e840] frame B:47 Avg QP:25.58 size: 718 + [libx264 @ 0x55ec6513e840] consecutive B-frames: 38.2% 27.6% 14.6% 19.5% + [libx264 @ 0x55ec6513e840] mb I I16..4: 14.0% 83.9% 2.1% + [libx264 @ 0x55ec6513e840] mb P I16..4: 1.3% 3.3% 0.1% P16..4: 37.8% 8.2% 6.4% 0.0% 0.0% skip:43.0% + [libx264 @ 0x55ec6513e840] mb B I16..4: 0.2% 0.7% 0.0% B16..8: 27.9% 0.4% 0.1% direct: 0.2% skip:70.6% L0:43.9% L1:54.2% BI: 1.9% + [libx264 @ 0x55ec6513e840] 8x8 transform intra:73.3% inter:77.1% + [libx264 @ 0x55ec6513e840] coded y,uvDC,uvAC intra: 56.9% 72.4% 8.1% inter: 11.4% 13.0% 0.2% + [libx264 @ 0x55ec6513e840] i16 v,h,dc,p: 20% 23% 9% 48% + [libx264 @ 0x55ec6513e840] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 25% 23% 36% 3% 3% 2% 2% 3% 3% + [libx264 @ 0x55ec6513e840] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 39% 14% 14% 4% 6% 7% 4% 9% 3% + [libx264 @ 0x55ec6513e840] i8c dc,h,v,p: 42% 25% 29% 4% + [libx264 @ 0x55ec6513e840] Weighted P-Frames: Y:0.0% UV:0.0% + [libx264 @ 0x55ec6513e840] ref P L0: 74.2% 10.4% 11.1% 4.3% + [libx264 @ 0x55ec6513e840] ref B L0: 86.1% 11.2% 2.8% + [libx264 @ 0x55ec6513e840] ref B L1: 98.3% 1.7% + [libx264 @ 0x55ec6513e840] kb/s:953.36 + [aac @ 0x55ec65140140] Qavg: 121.673 diff --git a/docs/notebooks/whisper-subtitles-generation-with-output.rst b/docs/notebooks/whisper-subtitles-generation-with-output.rst index 5a3c677fc27daa..a2764b4622bf67 100644 --- a/docs/notebooks/whisper-subtitles-generation-with-output.rst +++ b/docs/notebooks/whisper-subtitles-generation-with-output.rst @@ -79,15 +79,27 @@ Install dependencies. .. code:: ipython3 import platform + import importlib.metadata + import importlib.util %pip install -q "nncf>=2.14.0" %pip install -q -U "openvino>=2024.5.0" "openvino-tokenizers>=2024.5.0" "openvino-genai>=2024.5.0" %pip install -q "python-ffmpeg<=1.0.16" "ffmpeg" "moviepy" "transformers>=4.45" "git+https://github.com/huggingface/optimum-intel.git" "torch>=2.1" --extra-index-url https://download.pytorch.org/whl/cpu - %pip install -q -U "yt_dlp>=2024.8.6" soundfile librosa jiwer + %pip install -q -U "yt_dlp>=2024.8.6" soundfile librosa jiwer packaging %pip install -q "gradio>=4.19" "typing_extensions>=4.9" if platform.system() == "Darwin": %pip install -q "numpy<2.0" + + + from packaging import version + + if ( + importlib.util.find_spec("tensorflow") is not None + and version.parse(importlib.metadata.version("tensorflow")) < version.parse("2.18.0") + and version.parse(importlib.metadata.version("numpy")) >= version.parse("2.0.0") + ): + %pip uninstall -q -y tensorflow .. code:: ipython3 @@ -312,7 +324,10 @@ Select the task for the model: .. code:: ipython3 - from moviepy.editor import VideoFileClip + try: + from moviepy import VideoFileClip + except ImportError: + from moviepy.editor import VideoFileClip from transformers.pipelines.audio_utils import ffmpeg_read diff --git a/docs/notebooks/yolov10-optimization-with-output.rst b/docs/notebooks/yolov10-optimization-with-output.rst index dd7106d7a6bb56..99674518b41948 100644 --- a/docs/notebooks/yolov10-optimization-with-output.rst +++ b/docs/notebooks/yolov10-optimization-with-output.rst @@ -107,17 +107,6 @@ Prerequisites %pip install -q "git+https://github.com/THU-MIG/yolov10.git" --extra-index-url https://download.pytorch.org/whl/cpu %pip install -q "torch>=2.1" "torchvision>=0.16" tqdm opencv-python "gradio>=4.19" --extra-index-url https://download.pytorch.org/whl/cpu - -.. parsed-literal:: - - WARNING: Skipping openvino as it is not installed. - WARNING: Skipping openvino-dev as it is not installed. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - Note: you may need to restart the kernel to use updated packages. - - .. code:: ipython3 from pathlib import Path diff --git a/docs/notebooks/yolov11-instance-segmentation-with-output.rst b/docs/notebooks/yolov11-instance-segmentation-with-output.rst index 6c71d614e519db..0c346bf08cf3a6 100644 --- a/docs/notebooks/yolov11-instance-segmentation-with-output.rst +++ b/docs/notebooks/yolov11-instance-segmentation-with-output.rst @@ -128,18 +128,25 @@ Import required utility functions. The lower cell will download the # Download a test sample IMAGE_PATH = Path("./data/coco_bike.jpg") - if not IMAGE_PATH.exists(): - download_file( - url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg", - filename=IMAGE_PATH.name, - directory=IMAGE_PATH.parent, - ) + download_file( + url="https://storage.openvinotoolkit.org/repositories/openvino_notebooks/data/data/image/coco_bike.jpg", + filename=IMAGE_PATH.name, + directory=IMAGE_PATH.parent, + ) + + + +.. parsed-literal:: + + coco_bike.jpg: 0%| | 0.00/182k [00:00=2023.3.0" "nncf>=2.8.1" "opencv-python" "matplotlib>=3.4" "seaborn" "pandas" "scikit-learn" "torch" "torchvision" "tqdm" --extra-index-url https://download.pytorch.org/whl/cpu @@ -97,7 +98,7 @@ Prerequisites .. parsed-literal:: - /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/823/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 + /opt/home/k8sworker/ci-ai/cibuilds/jobs/ov-notebook/jobs/OVNotebookOps/builds/835/archive/.workspace/scm/ov-notebook/notebooks/yolov9-optimization/yolov9 Get PyTorch model @@ -128,14 +129,14 @@ applicable for other models from YOLO V9 family. .. parsed-literal:: - model/gelan-c.pt: 0%| | 0.00/49.1M [00:00 Date: Tue, 10 Dec 2024 15:43:18 +0200 Subject: [PATCH 36/43] [NPU] Create compiler adapter factory (#27962) ### Details: Clean-up code - create compiler adapter factory class to create the proper compiler adapter. ### Tickets: - *CVS-158848* Signed-off-by: Bogdan Pereanu --- .../src/backend/include/zero_backend.hpp | 4 +- .../src/backend/src/zero_backend.cpp | 8 +-- .../intel_npu/common/icompiler_adapter.hpp | 21 ++++++++ .../common/include/intel_npu/common/npu.hpp | 13 ++--- src/plugins/intel_npu/src/common/src/npu.cpp | 4 ++ .../include/compiler_adapter_factory.hpp | 41 ++++++++++++++ .../include/driver_compiler_adapter.hpp | 5 +- .../include/plugin_compiler_adapter.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 3 -- .../src/plugin_compiler_adapter.cpp | 3 -- .../intel_npu/src/plugin/include/plugin.hpp | 2 - .../intel_npu/src/plugin/src/plugin.cpp | 54 ++++--------------- 12 files changed, 88 insertions(+), 72 deletions(-) create mode 100644 src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp index 038c7c1d2d9bf9..358bdc93ce7a71 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp @@ -29,12 +29,12 @@ class ZeroEngineBackend final : public IEngineBackend { bool isCommandQueueExtSupported() const override; bool isLUIDExtSupported() const override; - const std::shared_ptr& getInitStruct() const; - void* getContext() const override; void updateInfo(const Config& config) override; + const std::shared_ptr getInitStructs() const override; + private: std::shared_ptr _initStruct; diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp index 55aaad102e8b8f..afcf77d55616f0 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp @@ -72,10 +72,6 @@ void* ZeroEngineBackend::getContext() const { return _initStruct->getContext(); } -const std::shared_ptr& ZeroEngineBackend::getInitStruct() const { - return _initStruct; -} - void ZeroEngineBackend::updateInfo(const Config& config) { _logger.setLevel(config.get()); if (_devices.size() > 0) { @@ -85,4 +81,8 @@ void ZeroEngineBackend::updateInfo(const Config& config) { } } +const std::shared_ptr ZeroEngineBackend::getInitStructs() const { + return _initStruct; +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp new file mode 100644 index 00000000000000..6e585299d68a1d --- /dev/null +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -0,0 +1,21 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_npu/common/igraph.hpp" + +namespace intel_npu { + +class ICompilerAdapter { +public: + virtual std::shared_ptr compile(const std::shared_ptr& model, + const Config& config) const = 0; + virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; + virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; + + virtual ~ICompilerAdapter() = default; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index b34f2deee6c61e..9e4c59852151ce 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -10,6 +10,7 @@ #include "intel_npu/common/igraph.hpp" #include "intel_npu/common/sync_infer_request.hpp" #include "intel_npu/config/config.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" @@ -47,6 +48,8 @@ class IEngineBackend : public std::enable_shared_from_this { virtual void* getContext() const; /** @brief Update backend and device info */ virtual void updateInfo(const Config& config) = 0; + /** @brief Get LevelZero structures */ + virtual const std::shared_ptr getInitStructs() const; protected: virtual ~IEngineBackend() = default; @@ -54,16 +57,6 @@ class IEngineBackend : public std::enable_shared_from_this { //------------------------------------------------------------------------------ -class ICompilerAdapter { -public: - virtual std::shared_ptr compile(const std::shared_ptr& model, - const Config& config) const = 0; - virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; - virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; - - virtual ~ICompilerAdapter() = default; -}; - //------------------------------------------------------------------------------ class IDevice : public std::enable_shared_from_this { diff --git a/src/plugins/intel_npu/src/common/src/npu.cpp b/src/plugins/intel_npu/src/common/src/npu.cpp index 0969b200ea09a5..afcfa5b0bba271 100644 --- a/src/plugins/intel_npu/src/common/src/npu.cpp +++ b/src/plugins/intel_npu/src/common/src/npu.cpp @@ -43,6 +43,10 @@ IDevice::Uuid IDevice::getUuid() const { OPENVINO_THROW("Get UUID not supported"); } +const std::shared_ptr IEngineBackend::getInitStructs() const { + return nullptr; +} + ov::device::LUID IDevice::getLUID() const { OPENVINO_THROW("Get LUID not supported"); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp new file mode 100644 index 00000000000000..31c6ca348a234f --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp @@ -0,0 +1,41 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "driver_compiler_adapter.hpp" +#include "intel_npu/common/icompiler_adapter.hpp" +#include "intel_npu/config/compiler.hpp" +#include "intel_npu/config/config.hpp" +#include "plugin_compiler_adapter.hpp" + +namespace intel_npu { + +class CompilerAdapterFactory final { +public: + const std::unique_ptr getCompiler(const ov::SoPtr& engineBackend, + const Config& config) const { + auto compilerType = config.get(); + switch (compilerType) { + case ov::intel_npu::CompilerType::MLIR: { + if (engineBackend->getName() != "LEVEL0") { + return std::make_unique(nullptr); + } + + return std::make_unique(engineBackend->getInitStructs()); + } + case ov::intel_npu::CompilerType::DRIVER: { + if (engineBackend->getName() != "LEVEL0") { + OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend"); + } + + return std::make_unique(engineBackend->getInitStructs()); + } + default: + OPENVINO_THROW("Invalid NPU_COMPILER_TYPE"); + } + } +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 82ababf21c147a..3fb0ea8937da60 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -6,13 +6,10 @@ #pragma once -#include -#include - #include #include -#include "intel_npu/common/npu.hpp" +#include "intel_npu/common/icompiler_adapter.hpp" #include "intel_npu/config/config.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 8d2616884e7d5f..96c71d9f80c668 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -6,7 +6,7 @@ #pragma once -#include "intel_npu/common/npu.hpp" +#include "intel_npu/common/icompiler_adapter.hpp" #include "intel_npu/icompiler.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 9d634656db109a..b17148c6411936 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -4,8 +4,6 @@ #include "driver_compiler_adapter.hpp" -#include - #include #include @@ -21,7 +19,6 @@ #include "intel_npu/utils/zero/zero_utils.hpp" #include "ir_serializer.hpp" #include "openvino/core/model.hpp" -#include "ze_graph_ext_wrappers.hpp" namespace { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 06d71fd1126c17..6d67f544db2c17 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -4,8 +4,6 @@ #include "plugin_compiler_adapter.hpp" -#include - #include #include @@ -19,7 +17,6 @@ #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" #include "plugin_graph.hpp" -#include "ze_graph_ext_wrappers.hpp" namespace { std::shared_ptr loadLibrary(const std::string& libpath) { diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index c3c2daa525aaa1..6b1b46872788e3 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -52,8 +52,6 @@ class Plugin : public ov::IPlugin { const ov::AnyMap& properties) const override; private: - std::unique_ptr getCompiler(const Config& config) const; - std::shared_ptr _backends; std::map _config; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index da425d5d01a5c3..fa641dfdcd9641 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -7,9 +7,9 @@ #include #include "compiled_model.hpp" -#include "npuw/compiled_model.hpp" -#include "driver_compiler_adapter.hpp" +#include "compiler_adapter_factory.hpp" #include "intel_npu/common/device_helpers.hpp" +#include "intel_npu/common/icompiler_adapter.hpp" #include "intel_npu/common/igraph.hpp" #include "intel_npu/common/itt.hpp" #include "intel_npu/config/common.hpp" @@ -17,13 +17,12 @@ #include "intel_npu/config/npuw.hpp" #include "intel_npu/config/runtime.hpp" #include "intel_npu/utils/zero/zero_init.hpp" +#include "npuw/compiled_model.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/parameter.hpp" #include "openvino/runtime/intel_npu/properties.hpp" #include "openvino/runtime/properties.hpp" -#include "plugin_compiler_adapter.hpp" #include "remote_context.hpp" -#include "zero_backend.hpp" using namespace intel_npu; @@ -699,8 +698,9 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } } - auto original_model = model->clone(); - auto compiler = getCompiler(localConfig); + auto originalModel = model->clone(); + CompilerAdapterFactory compilerAdapterFactory; + auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); OV_ITT_TASK_NEXT(PLUGIN_COMPILE_MODEL, "compile"); std::shared_ptr graph; @@ -716,7 +716,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< std::shared_ptr compiledModel; try { - compiledModel = std::make_shared(original_model, shared_from_this(), device, graph, localConfig); + compiledModel = std::make_shared(originalModel, shared_from_this(), device, graph, localConfig); } catch (const std::exception& ex) { OPENVINO_THROW(ex.what()); } catch (...) { @@ -772,7 +772,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c std::shared_ptr compiledModel; try { - auto compiler = getCompiler(localConfig); + CompilerAdapterFactory compilerAdapterFactory; + auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); auto graphSize = getFileSize(stream); @@ -821,7 +822,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& const auto platform = _backends->getCompilationPlatform(localConfig.get(), localConfig.get()); localConfig.update({{ov::intel_npu::platform.name(), platform}}); - auto compiler = getCompiler(localConfig); + CompilerAdapterFactory compilerAdapterFactory; + auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); ov::SupportedOpsMap supportedOpsMap; try { supportedOpsMap = compiler->query(model, localConfig); @@ -834,40 +836,6 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& return supportedOpsMap; } -std::unique_ptr Plugin::getCompiler(const Config& config) const { - auto compilerType = config.get(); - _logger.debug("performing createCompiler"); - - switch (compilerType) { - case ov::intel_npu::CompilerType::MLIR: { - if (_backends->getBackendName() != "LEVEL0") { - return std::make_unique(nullptr); - } - - auto zeroBackend = std::dynamic_pointer_cast(_backends->getIEngineBackend()._ptr); - if (zeroBackend == nullptr) { - return std::make_unique(nullptr); - } - - return std::make_unique(zeroBackend->getInitStruct()); - } - case ov::intel_npu::CompilerType::DRIVER: { - if (_backends->getBackendName() != "LEVEL0") { - OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend"); - } - - auto zeroBackend = std::dynamic_pointer_cast(_backends->getIEngineBackend()._ptr); - if (!zeroBackend) { - OPENVINO_THROW("Failed to cast zeroBackend, zeroBackend is a nullptr"); - } - - return std::make_unique(zeroBackend->getInitStruct()); - } - default: - OPENVINO_THROW("Invalid NPU_COMPILER_TYPE"); - } -} - std::atomic Plugin::_compiledModelLoadCounter{1}; static const ov::Version version = {CI_BUILD_NUMBER, NPU_PLUGIN_LIB_NAME}; From 1be5963bc99e408904369cba4611a10fe2572dc6 Mon Sep 17 00:00:00 2001 From: Alicja Miloszewska Date: Tue, 10 Dec 2024 15:48:14 +0100 Subject: [PATCH 37/43] [Py OV] Add op to openvino module (#27902) ### Details: - copy `openvino/runtime/op/` to `openvino`. `op/` is not initialized in `openvino/runtime/__init__.py`. Update inits in openvino/runtime/op/ - update rule `src/openvino/runtime/*/ops.py: VNE001,VNE003` in _setup.cfg_ - For opset 1 - 16: - move `runtime/opsetx/ops.py`. - update opsetx inits to import from new ops.py location. - add `runtime/opsetx/ops/__init__.py`. - initialize opsets in openvino init - add imports for opset14-16 to `openvino/runtime/__init__.py` - adds possibility to import opsets directly: ```python import openvino.opset13 as ops param = ops.parameter([10]) ``` or ```python import openvino as ov param = ov.opset13.parameter([10]) ``` ### Tickets: - CVS-129458 --------- Signed-off-by: Alicja Miloszewska --- src/bindings/python/setup.cfg | 2 +- src/bindings/python/src/openvino/__init__.py | 18 + .../python/src/openvino/op/__init__.py | 19 + .../python/src/openvino/op/util/__init__.py | 22 + .../python/src/openvino/opset1/__init__.py | 111 +++++ .../src/openvino/{runtime => }/opset1/ops.py | 2 +- .../python/src/openvino/opset10/__init__.py | 179 ++++++++ .../src/openvino/{runtime => }/opset10/ops.py | 0 .../python/src/openvino/opset11/__init__.py | 179 ++++++++ .../src/openvino/{runtime => }/opset11/ops.py | 0 .../python/src/openvino/opset12/__init__.py | 180 ++++++++ .../src/openvino/{runtime => }/opset12/ops.py | 0 .../python/src/openvino/opset13/__init__.py | 188 +++++++++ .../src/openvino/{runtime => }/opset13/ops.py | 4 +- .../python/src/openvino/opset14/__init__.py | 190 +++++++++ .../src/openvino/{runtime => }/opset14/ops.py | 0 .../python/src/openvino/opset15/__init__.py | 204 +++++++++ .../src/openvino/{runtime => }/opset15/ops.py | 4 +- .../python/src/openvino/opset16/__init__.py | 9 + .../src/openvino/{runtime => }/opset16/ops.py | 0 .../python/src/openvino/opset2/__init__.py | 117 ++++++ .../src/openvino/{runtime => }/opset2/ops.py | 2 +- .../python/src/openvino/opset3/__init__.py | 133 ++++++ .../src/openvino/{runtime => }/opset3/ops.py | 2 +- .../python/src/openvino/opset4/__init__.py | 144 +++++++ .../src/openvino/{runtime => }/opset4/ops.py | 2 +- .../python/src/openvino/opset5/__init__.py | 152 +++++++ .../src/openvino/{runtime => }/opset5/ops.py | 2 +- .../python/src/openvino/opset6/__init__.py | 154 +++++++ .../src/openvino/{runtime => }/opset6/ops.py | 6 +- .../python/src/openvino/opset7/__init__.py | 158 +++++++ .../src/openvino/{runtime => }/opset7/ops.py | 2 +- .../python/src/openvino/opset8/__init__.py | 169 ++++++++ .../src/openvino/{runtime => }/opset8/ops.py | 2 +- .../python/src/openvino/opset9/__init__.py | 175 ++++++++ .../src/openvino/{runtime => }/opset9/ops.py | 0 .../python/src/openvino/runtime/__init__.py | 3 + .../src/openvino/runtime/op/__init__.py | 18 +- .../src/openvino/runtime/op/util/__init__.py | 26 +- .../src/openvino/runtime/opset1/__init__.py | 214 +++++----- .../openvino/runtime/opset1/ops/__init__.py | 109 +++++ .../src/openvino/runtime/opset10/__init__.py | 350 ++++++++-------- .../openvino/runtime/opset10/ops/__init__.py | 9 + .../src/openvino/runtime/opset11/__init__.py | 350 ++++++++-------- .../openvino/runtime/opset11/ops/__init__.py | 6 + .../src/openvino/runtime/opset12/__init__.py | 352 ++++++++-------- .../openvino/runtime/opset12/ops/__init__.py | 7 + .../src/openvino/runtime/opset13/__init__.py | 368 ++++++++-------- .../openvino/runtime/opset13/ops/__init__.py | 15 + .../src/openvino/runtime/opset14/__init__.py | 372 ++++++++--------- .../openvino/runtime/opset14/ops/__init__.py | 8 + .../src/openvino/runtime/opset15/__init__.py | 394 +++++++++--------- .../openvino/runtime/opset15/ops/__init__.py | 17 + .../src/openvino/runtime/opset16/__init__.py | 2 +- .../openvino/runtime/opset16/ops/__init__.py | 5 + .../src/openvino/runtime/opset2/__init__.py | 226 +++++----- .../openvino/runtime/opset2/ops/__init__.py | 10 + .../src/openvino/runtime/opset3/__init__.py | 258 ++++++------ .../openvino/runtime/opset3/ops/__init__.py | 23 + .../src/openvino/runtime/opset4/__init__.py | 280 ++++++------- .../openvino/runtime/opset4/ops/__init__.py | 19 + .../src/openvino/runtime/opset5/__init__.py | 296 ++++++------- .../openvino/runtime/opset5/ops/__init__.py | 14 + .../src/openvino/runtime/opset6/__init__.py | 300 ++++++------- .../openvino/runtime/opset6/ops/__init__.py | 8 + .../src/openvino/runtime/opset7/__init__.py | 308 +++++++------- .../openvino/runtime/opset7/ops/__init__.py | 10 + .../src/openvino/runtime/opset8/__init__.py | 330 +++++++-------- .../openvino/runtime/opset8/ops/__init__.py | 22 + .../src/openvino/runtime/opset9/__init__.py | 342 +++++++-------- .../openvino/runtime/opset9/ops/__init__.py | 13 + tools/benchmark_tool/openvino/__init__.py | 18 + tools/ovc/openvino/__init__.py | 18 + 73 files changed, 5243 insertions(+), 2408 deletions(-) create mode 100644 src/bindings/python/src/openvino/op/__init__.py create mode 100644 src/bindings/python/src/openvino/op/util/__init__.py create mode 100644 src/bindings/python/src/openvino/opset1/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset1/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset10/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset10/ops.py (100%) create mode 100644 src/bindings/python/src/openvino/opset11/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset11/ops.py (100%) create mode 100644 src/bindings/python/src/openvino/opset12/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset12/ops.py (100%) create mode 100644 src/bindings/python/src/openvino/opset13/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset13/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset14/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset14/ops.py (100%) create mode 100644 src/bindings/python/src/openvino/opset15/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset15/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset16/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset16/ops.py (100%) create mode 100644 src/bindings/python/src/openvino/opset2/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset2/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset3/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset3/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset4/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset4/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset5/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset5/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset6/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset6/ops.py (97%) create mode 100644 src/bindings/python/src/openvino/opset7/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset7/ops.py (98%) create mode 100644 src/bindings/python/src/openvino/opset8/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset8/ops.py (99%) create mode 100644 src/bindings/python/src/openvino/opset9/__init__.py rename src/bindings/python/src/openvino/{runtime => }/opset9/ops.py (100%) create mode 100644 src/bindings/python/src/openvino/runtime/opset1/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset10/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset11/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset12/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset13/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset14/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset15/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset16/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset2/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset3/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset4/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset5/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset6/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset7/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset8/ops/__init__.py create mode 100644 src/bindings/python/src/openvino/runtime/opset9/ops/__init__.py diff --git a/src/bindings/python/setup.cfg b/src/bindings/python/setup.cfg index 7fc407ad56e7a1..89fabcb659c108 100644 --- a/src/bindings/python/setup.cfg +++ b/src/bindings/python/setup.cfg @@ -66,7 +66,7 @@ show_source = True docstring-convention = google enable-extensions = G per-file-ignores = - src/openvino/runtime/*/ops.py: VNE001,VNE003 + src/openvino/*/ops.py: VNE001,VNE003 src/openvino/preprocess/torchvision/*: N801, VNE001 *__init__.py: F401 diff --git a/src/bindings/python/src/openvino/__init__.py b/src/bindings/python/src/openvino/__init__.py index 57f03f00c2eebf..e4d1a247520332 100644 --- a/src/bindings/python/src/openvino/__init__.py +++ b/src/bindings/python/src/openvino/__init__.py @@ -56,6 +56,24 @@ from openvino._pyopenvino import RemoteTensor from openvino._pyopenvino import Op +# Import opsets +from openvino import opset1 +from openvino import opset2 +from openvino import opset3 +from openvino import opset4 +from openvino import opset5 +from openvino import opset6 +from openvino import opset7 +from openvino import opset8 +from openvino import opset9 +from openvino import opset10 +from openvino import opset11 +from openvino import opset12 +from openvino import opset13 +from openvino import opset14 +from openvino import opset15 +from openvino import opset16 + # libva related: from openvino._pyopenvino import VAContext from openvino._pyopenvino import VASurfaceTensor diff --git a/src/bindings/python/src/openvino/op/__init__.py b/src/bindings/python/src/openvino/op/__init__.py new file mode 100644 index 00000000000000..ee324dd76380f7 --- /dev/null +++ b/src/bindings/python/src/openvino/op/__init__.py @@ -0,0 +1,19 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Package: openvino.op +Low level wrappers for the c++ api in ov::op. +""" + +# flake8: noqa + +from openvino._pyopenvino.op import Constant +from openvino._pyopenvino.op import assign +from openvino._pyopenvino.op import _PagedAttentionExtension +from openvino._pyopenvino.op import Parameter +from openvino._pyopenvino.op import if_op +from openvino._pyopenvino.op import loop +from openvino._pyopenvino.op import tensor_iterator +from openvino._pyopenvino.op import read_value +from openvino._pyopenvino.op import Result diff --git a/src/bindings/python/src/openvino/op/util/__init__.py b/src/bindings/python/src/openvino/op/util/__init__.py new file mode 100644 index 00000000000000..0c946b115f451e --- /dev/null +++ b/src/bindings/python/src/openvino/op/util/__init__.py @@ -0,0 +1,22 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" +Package: openvino.op.util +Low level wrappers for the c++ api in ov::op::util. +""" +# flake8: noqa + +from openvino._pyopenvino.op.util import UnaryElementwiseArithmetic +from openvino._pyopenvino.op.util import BinaryElementwiseComparison +from openvino._pyopenvino.op.util import BinaryElementwiseArithmetic +from openvino._pyopenvino.op.util import BinaryElementwiseLogical +from openvino._pyopenvino.op.util import ArithmeticReduction +from openvino._pyopenvino.op.util import IndexReduction +from openvino._pyopenvino.op.util import VariableInfo +from openvino._pyopenvino.op.util import Variable +from openvino._pyopenvino.op.util import MergedInputDescription +from openvino._pyopenvino.op.util import InvariantInputDescription +from openvino._pyopenvino.op.util import SliceInputDescription +from openvino._pyopenvino.op.util import ConcatOutputDescription +from openvino._pyopenvino.op.util import BodyOutputDescription diff --git a/src/bindings/python/src/openvino/opset1/__init__.py b/src/bindings/python/src/openvino/opset1/__init__.py new file mode 100644 index 00000000000000..35ceec6b0d9892 --- /dev/null +++ b/src/bindings/python/src/openvino/opset1/__init__.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset1.ops import binary_convolution +from openvino.opset1.ops import broadcast +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset1.ops import negative +from openvino.opset1.ops import non_max_suppression +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset1.ops import shape_of +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset1.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset1/ops.py b/src/bindings/python/src/openvino/opset1/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset1/ops.py rename to src/bindings/python/src/openvino/opset1/ops.py index 54f32d404336d4..edca6c62a0b246 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/ops.py +++ b/src/bindings/python/src/openvino/opset1/ops.py @@ -9,7 +9,7 @@ from functools import partial from openvino.runtime import Node, PartialShape, Type -from openvino.runtime.op import Constant, Parameter, tensor_iterator +from openvino.op import Constant, Parameter, tensor_iterator from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op from openvino.runtime.utils.input_validation import ( diff --git a/src/bindings/python/src/openvino/opset10/__init__.py b/src/bindings/python/src/openvino/opset10/__init__.py new file mode 100644 index 00000000000000..659cb99bb8644d --- /dev/null +++ b/src/bindings/python/src/openvino/opset10/__init__.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset10.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset10/ops.py b/src/bindings/python/src/openvino/opset10/ops.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/opset10/ops.py rename to src/bindings/python/src/openvino/opset10/ops.py diff --git a/src/bindings/python/src/openvino/opset11/__init__.py b/src/bindings/python/src/openvino/opset11/__init__.py new file mode 100644 index 00000000000000..2a07b9fab9f2c4 --- /dev/null +++ b/src/bindings/python/src/openvino/opset11/__init__.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops.py b/src/bindings/python/src/openvino/opset11/ops.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/opset11/ops.py rename to src/bindings/python/src/openvino/opset11/ops.py diff --git a/src/bindings/python/src/openvino/opset12/__init__.py b/src/bindings/python/src/openvino/opset12/__init__.py new file mode 100644 index 00000000000000..1187f2c83e6a05 --- /dev/null +++ b/src/bindings/python/src/openvino/opset12/__init__.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset12.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset12/ops.py b/src/bindings/python/src/openvino/opset12/ops.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/opset12/ops.py rename to src/bindings/python/src/openvino/opset12/ops.py diff --git a/src/bindings/python/src/openvino/opset13/__init__.py b/src/bindings/python/src/openvino/opset13/__init__.py new file mode 100644 index 00000000000000..ab3f541e8f831c --- /dev/null +++ b/src/bindings/python/src/openvino/opset13/__init__.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset13.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset13.ops import multinomial +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset13.ops import nms_rotated +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset13.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset13.ops import scaled_dot_product_attention +from openvino.opset12.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops.py b/src/bindings/python/src/openvino/opset13/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset13/ops.py rename to src/bindings/python/src/openvino/opset13/ops.py index a624ffb4f79873..12f0d06b1a28e6 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/ops.py +++ b/src/bindings/python/src/openvino/opset13/ops.py @@ -12,8 +12,8 @@ log = logging.getLogger(__name__) from openvino.runtime import Node, Shape, Type, Output, Tensor -from openvino.runtime.op import Constant, Result -from openvino.runtime.opset1 import convert_like +from openvino.op import Constant, Result +from openvino.opset1 import convert_like from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op, overloading from openvino.runtime.utils.types import ( diff --git a/src/bindings/python/src/openvino/opset14/__init__.py b/src/bindings/python/src/openvino/opset14/__init__.py new file mode 100644 index 00000000000000..8a503a333bd3e1 --- /dev/null +++ b/src/bindings/python/src/openvino/opset14/__init__.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset14.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset13.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset14.ops import convert_promote_types +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset14.ops import inverse +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset14.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset13.ops import multinomial +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset13.ops import nms_rotated +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset13.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset13.ops import scaled_dot_product_attention +from openvino.opset12.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset14/ops.py b/src/bindings/python/src/openvino/opset14/ops.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/opset14/ops.py rename to src/bindings/python/src/openvino/opset14/ops.py diff --git a/src/bindings/python/src/openvino/opset15/__init__.py b/src/bindings/python/src/openvino/opset15/__init__.py new file mode 100644 index 00000000000000..d5b93924a69e6e --- /dev/null +++ b/src/bindings/python/src/openvino/opset15/__init__.py @@ -0,0 +1,204 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# New operations added in Opset15 +from openvino.opset15.ops import col2im +from openvino.opset15.ops import embedding_bag_offsets +from openvino.opset15.ops import embedding_bag_packed +from openvino.opset15.ops import scatter_nd_update +from openvino.opset15.ops import roi_align_rotated +from openvino.opset15.ops import string_tensor_pack +from openvino.opset15.ops import string_tensor_unpack +from openvino.opset15.ops import bitwise_left_shift +from openvino.opset15.ops import bitwise_right_shift +from openvino.opset15.ops import slice_scatter + +# Operators from previous opsets +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset14.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset13.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset14.ops import convert_promote_types +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset14.ops import inverse +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset14.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset13.ops import multinomial +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset13.ops import nms_rotated +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset13.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset13.ops import scaled_dot_product_attention +from openvino.opset12.ops import scatter_elements_update +from openvino.opset3.ops import scatter_update +from openvino.opset15.ops import search_sorted +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset15.ops import squeeze +from openvino.opset15.ops import stft +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/opset15/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset15/ops.py rename to src/bindings/python/src/openvino/opset15/ops.py index 93aacb29572340..8e6b8bd46d5f7c 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/opset15/ops.py @@ -8,8 +8,8 @@ import numpy as np from openvino.runtime import Node, Type -from openvino.runtime.opset1 import convert_like -from openvino.runtime.opset14 import constant +from openvino.opset1 import convert_like +from openvino.opset14 import constant from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op from openvino.runtime.utils.types import NodeInput, as_nodes diff --git a/src/bindings/python/src/openvino/opset16/__init__.py b/src/bindings/python/src/openvino/opset16/__init__.py new file mode 100644 index 00000000000000..06fa9d2e7d0070 --- /dev/null +++ b/src/bindings/python/src/openvino/opset16/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# New operations added in Opset16 +from openvino.opset16.ops import identity + +# Operators from previous opsets +# TODO (ticket: 156877): Add previous opset operators at the end of opset16 development diff --git a/src/bindings/python/src/openvino/runtime/opset16/ops.py b/src/bindings/python/src/openvino/opset16/ops.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/opset16/ops.py rename to src/bindings/python/src/openvino/opset16/ops.py diff --git a/src/bindings/python/src/openvino/opset2/__init__.py b/src/bindings/python/src/openvino/opset2/__init__.py new file mode 100644 index 00000000000000..1306c89b5241d8 --- /dev/null +++ b/src/bindings/python/src/openvino/opset2/__init__.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset1.ops import broadcast +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset1.ops import non_max_suppression +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset2.ops import roi_pooling +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset1.ops import shape_of +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset1.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset2/ops.py b/src/bindings/python/src/openvino/opset2/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset2/ops.py rename to src/bindings/python/src/openvino/opset2/ops.py index 287232d114c6bc..45b33f5bc0288b 100644 --- a/src/bindings/python/src/openvino/runtime/opset2/ops.py +++ b/src/bindings/python/src/openvino/opset2/ops.py @@ -10,7 +10,7 @@ import warnings from openvino.runtime import Node, Shape -from openvino.runtime.op import Constant, Parameter +from openvino.op import Constant, Parameter from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op from openvino.runtime.utils.input_validation import ( diff --git a/src/bindings/python/src/openvino/opset3/__init__.py b/src/bindings/python/src/openvino/opset3/__init__.py new file mode 100644 index 00000000000000..3a0baa675114f1 --- /dev/null +++ b/src/bindings/python/src/openvino/opset3/__init__.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset3.ops import assign +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset3.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset3.ops import read_value +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset3.ops import scatter_elements_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset3/ops.py b/src/bindings/python/src/openvino/opset3/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset3/ops.py rename to src/bindings/python/src/openvino/opset3/ops.py index d3403dbbbd2be8..989f5819acb685 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/ops.py +++ b/src/bindings/python/src/openvino/opset3/ops.py @@ -9,7 +9,7 @@ from functools import partial from openvino.runtime import Node, Shape -from openvino.runtime.op import Constant, Parameter +from openvino.op import Constant, Parameter from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op from openvino.runtime.utils.input_validation import ( diff --git a/src/bindings/python/src/openvino/opset4/__init__.py b/src/bindings/python/src/openvino/opset4/__init__.py new file mode 100644 index 00000000000000..6096ec431c796e --- /dev/null +++ b/src/bindings/python/src/openvino/opset4/__init__.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset3.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset1.ops import hard_sigmoid +from openvino.opset4.ops import hswish +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset4.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset3.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset4/ops.py b/src/bindings/python/src/openvino/opset4/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset4/ops.py rename to src/bindings/python/src/openvino/opset4/ops.py index dce4879bdb38f6..4f6ba016852b02 100644 --- a/src/bindings/python/src/openvino/runtime/opset4/ops.py +++ b/src/bindings/python/src/openvino/opset4/ops.py @@ -9,7 +9,7 @@ from functools import partial from openvino.runtime import Node, Shape -from openvino.runtime.op import Constant, Parameter +from openvino.op import Constant, Parameter from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op from openvino.runtime.utils.input_validation import ( diff --git a/src/bindings/python/src/openvino/opset5/__init__.py b/src/bindings/python/src/openvino/opset5/__init__.py new file mode 100644 index 00000000000000..202b8137093f57 --- /dev/null +++ b/src/bindings/python/src/openvino/opset5/__init__.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset3.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset5.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset3.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset5/ops.py b/src/bindings/python/src/openvino/opset5/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset5/ops.py rename to src/bindings/python/src/openvino/opset5/ops.py index 11e05aa1b7bcb4..20057b78c7c31d 100644 --- a/src/bindings/python/src/openvino/runtime/opset5/ops.py +++ b/src/bindings/python/src/openvino/opset5/ops.py @@ -9,7 +9,7 @@ from functools import partial from openvino.runtime import Node, Shape -from openvino.runtime.op import Constant, Parameter, loop +from openvino.op import Constant, Parameter, loop from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op from openvino.runtime.utils.input_validation import ( diff --git a/src/bindings/python/src/openvino/opset6/__init__.py b/src/bindings/python/src/openvino/opset6/__init__.py new file mode 100644 index 00000000000000..315d80bc025a38 --- /dev/null +++ b/src/bindings/python/src/openvino/opset6/__init__.py @@ -0,0 +1,154 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset5.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset6/ops.py b/src/bindings/python/src/openvino/opset6/ops.py similarity index 97% rename from src/bindings/python/src/openvino/runtime/opset6/ops.py rename to src/bindings/python/src/openvino/opset6/ops.py index 3209d7a8a53c99..8020715f20dea3 100644 --- a/src/bindings/python/src/openvino/runtime/opset6/ops.py +++ b/src/bindings/python/src/openvino/opset6/ops.py @@ -10,9 +10,9 @@ from functools import partial, singledispatch from openvino.runtime import Node, Type, PartialShape, Output, Shape -from openvino.runtime.op import assign, Constant, Parameter -from openvino.runtime.op import read_value as _read_value -from openvino.runtime.op.util import VariableInfo, Variable +from openvino.op import assign, Constant, Parameter +from openvino.op import read_value as _read_value +from openvino.op.util import VariableInfo, Variable from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import nameable_op, overloading from openvino.runtime.utils.types import ( diff --git a/src/bindings/python/src/openvino/opset7/__init__.py b/src/bindings/python/src/openvino/opset7/__init__.py new file mode 100644 index 00000000000000..16703072285487 --- /dev/null +++ b/src/bindings/python/src/openvino/opset7/__init__.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset7.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset5.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset7/ops.py b/src/bindings/python/src/openvino/opset7/ops.py similarity index 98% rename from src/bindings/python/src/openvino/runtime/opset7/ops.py rename to src/bindings/python/src/openvino/opset7/ops.py index 7cadaa42b80443..59e09b64888eb1 100644 --- a/src/bindings/python/src/openvino/runtime/opset7/ops.py +++ b/src/bindings/python/src/openvino/opset7/ops.py @@ -8,7 +8,7 @@ import numpy as np from openvino.runtime import Node, Shape -from openvino.runtime.op import Constant, Parameter +from openvino.op import Constant, Parameter from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op from openvino.runtime.utils.input_validation import ( diff --git a/src/bindings/python/src/openvino/opset8/__init__.py b/src/bindings/python/src/openvino/opset8/__init__.py new file mode 100644 index 00000000000000..e45c37863b193b --- /dev/null +++ b/src/bindings/python/src/openvino/opset8/__init__.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset1.ops import interpolate +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset8.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset8/ops.py b/src/bindings/python/src/openvino/opset8/ops.py similarity index 99% rename from src/bindings/python/src/openvino/runtime/opset8/ops.py rename to src/bindings/python/src/openvino/opset8/ops.py index f9176ecd32be14..05b97390baa780 100644 --- a/src/bindings/python/src/openvino/runtime/opset8/ops.py +++ b/src/bindings/python/src/openvino/opset8/ops.py @@ -8,7 +8,7 @@ import numpy as np from openvino.runtime.exceptions import UserInputError -from openvino.runtime.op import Constant, Parameter, if_op +from openvino.op import Constant, Parameter, if_op from openvino.runtime import Node from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import nameable_op diff --git a/src/bindings/python/src/openvino/opset9/__init__.py b/src/bindings/python/src/openvino/opset9/__init__.py new file mode 100644 index 00000000000000..03051d46b58759 --- /dev/null +++ b/src/bindings/python/src/openvino/opset9/__init__.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset1.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset9/ops.py b/src/bindings/python/src/openvino/opset9/ops.py similarity index 100% rename from src/bindings/python/src/openvino/runtime/opset9/ops.py rename to src/bindings/python/src/openvino/opset9/ops.py diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py index df2cab57bd0aba..e22e2be08cacea 100644 --- a/src/bindings/python/src/openvino/runtime/__init__.py +++ b/src/bindings/python/src/openvino/runtime/__init__.py @@ -58,6 +58,9 @@ from openvino.runtime import opset11 from openvino.runtime import opset12 from openvino.runtime import opset13 +from openvino.runtime import opset14 +from openvino.runtime import opset15 +from openvino.runtime import opset16 # Import properties API from openvino.runtime import properties diff --git a/src/bindings/python/src/openvino/runtime/op/__init__.py b/src/bindings/python/src/openvino/runtime/op/__init__.py index ee324dd76380f7..d7dd935d05f8d3 100644 --- a/src/bindings/python/src/openvino/runtime/op/__init__.py +++ b/src/bindings/python/src/openvino/runtime/op/__init__.py @@ -8,12 +8,12 @@ # flake8: noqa -from openvino._pyopenvino.op import Constant -from openvino._pyopenvino.op import assign -from openvino._pyopenvino.op import _PagedAttentionExtension -from openvino._pyopenvino.op import Parameter -from openvino._pyopenvino.op import if_op -from openvino._pyopenvino.op import loop -from openvino._pyopenvino.op import tensor_iterator -from openvino._pyopenvino.op import read_value -from openvino._pyopenvino.op import Result +from openvino.op import Constant +from openvino.op import assign +from openvino.op import _PagedAttentionExtension +from openvino.op import Parameter +from openvino.op import if_op +from openvino.op import loop +from openvino.op import tensor_iterator +from openvino.op import read_value +from openvino.op import Result diff --git a/src/bindings/python/src/openvino/runtime/op/util/__init__.py b/src/bindings/python/src/openvino/runtime/op/util/__init__.py index 0c946b115f451e..642434446c247d 100644 --- a/src/bindings/python/src/openvino/runtime/op/util/__init__.py +++ b/src/bindings/python/src/openvino/runtime/op/util/__init__.py @@ -7,16 +7,16 @@ """ # flake8: noqa -from openvino._pyopenvino.op.util import UnaryElementwiseArithmetic -from openvino._pyopenvino.op.util import BinaryElementwiseComparison -from openvino._pyopenvino.op.util import BinaryElementwiseArithmetic -from openvino._pyopenvino.op.util import BinaryElementwiseLogical -from openvino._pyopenvino.op.util import ArithmeticReduction -from openvino._pyopenvino.op.util import IndexReduction -from openvino._pyopenvino.op.util import VariableInfo -from openvino._pyopenvino.op.util import Variable -from openvino._pyopenvino.op.util import MergedInputDescription -from openvino._pyopenvino.op.util import InvariantInputDescription -from openvino._pyopenvino.op.util import SliceInputDescription -from openvino._pyopenvino.op.util import ConcatOutputDescription -from openvino._pyopenvino.op.util import BodyOutputDescription +from openvino.op.util import UnaryElementwiseArithmetic +from openvino.op.util import BinaryElementwiseComparison +from openvino.op.util import BinaryElementwiseArithmetic +from openvino.op.util import BinaryElementwiseLogical +from openvino.op.util import ArithmeticReduction +from openvino.op.util import IndexReduction +from openvino.op.util import VariableInfo +from openvino.op.util import Variable +from openvino.op.util import MergedInputDescription +from openvino.op.util import InvariantInputDescription +from openvino.op.util import SliceInputDescription +from openvino.op.util import ConcatOutputDescription +from openvino.op.util import BodyOutputDescription diff --git a/src/bindings/python/src/openvino/runtime/opset1/__init__.py b/src/bindings/python/src/openvino/runtime/opset1/__init__.py index ca7e1aef385b2a..35ceec6b0d9892 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset1/__init__.py @@ -2,110 +2,110 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset1.ops import batch_norm_inference -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset1.ops import broadcast -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset1.ops import gather -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset1.ops import non_max_suppression -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset1.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset1.ops import shape_of -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset1.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset1.ops import binary_convolution +from openvino.opset1.ops import broadcast +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset1.ops import negative +from openvino.opset1.ops import non_max_suppression +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset1.ops import shape_of +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset1.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset1/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset1/ops/__init__.py new file mode 100644 index 00000000000000..acdd7d8becf465 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset1/ops/__init__.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset1.ops import absolute +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset1.ops import binary_convolution +from openvino.opset1.ops import broadcast +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset1.ops import negative +from openvino.opset1.ops import non_max_suppression +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset1.ops import shape_of +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset1.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset10/__init__.py b/src/bindings/python/src/openvino/runtime/opset10/__init__.py index 29ebcb27039abc..659cb99bb8644d 100644 --- a/src/bindings/python/src/openvino/runtime/opset10/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset10/__init__.py @@ -2,178 +2,178 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset10.ops import interpolate -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset10.ops import is_finite -from openvino.runtime.opset10.ops import is_inf -from openvino.runtime.opset10.ops import is_nan -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset10.ops import unique -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset10.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset10/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset10/ops/__init__.py new file mode 100644 index 00000000000000..f6c134d9ab563e --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset10/ops/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset10.ops import interpolate +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset10.ops import unique diff --git a/src/bindings/python/src/openvino/runtime/opset11/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/__init__.py index b692741257d435..2a07b9fab9f2c4 100644 --- a/src/bindings/python/src/openvino/runtime/opset11/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset11/__init__.py @@ -2,178 +2,178 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset11.ops import interpolate -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset10.ops import is_finite -from openvino.runtime.opset10.ops import is_inf -from openvino.runtime.opset10.ops import is_nan -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset11.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset10.ops import unique -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset11/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset11/ops/__init__.py new file mode 100644 index 00000000000000..2ef74d4a6b58db --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset11/ops/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset11.ops import interpolate +from openvino.opset11.ops import topk diff --git a/src/bindings/python/src/openvino/runtime/opset12/__init__.py b/src/bindings/python/src/openvino/runtime/opset12/__init__.py index 381d82ce8c9be1..1187f2c83e6a05 100644 --- a/src/bindings/python/src/openvino/runtime/opset12/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset12/__init__.py @@ -2,179 +2,179 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset12.ops import group_normalization -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset11.ops import interpolate -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset10.ops import is_finite -from openvino.runtime.opset10.ops import is_inf -from openvino.runtime.opset10.ops import is_nan -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset12.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset4.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset12.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset11.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset10.ops import unique -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset12.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset12/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset12/ops/__init__.py new file mode 100644 index 00000000000000..b6ba8fe643b381 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset12/ops/__init__.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset12.ops import group_normalization +from openvino.opset12.ops import pad +from openvino.opset12.ops import scatter_elements_update diff --git a/src/bindings/python/src/openvino/runtime/opset13/__init__.py b/src/bindings/python/src/openvino/runtime/opset13/__init__.py index 7f330fbc87766d..ab3f541e8f831c 100644 --- a/src/bindings/python/src/openvino/runtime/opset13/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset13/__init__.py @@ -2,187 +2,187 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset13.ops import bitwise_and -from openvino.runtime.opset13.ops import bitwise_not -from openvino.runtime.opset13.ops import bitwise_or -from openvino.runtime.opset13.ops import bitwise_xor -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset13.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset13.ops import fake_convert -from openvino.runtime.opset13.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset12.ops import group_normalization -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset11.ops import interpolate -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset10.ops import is_finite -from openvino.runtime.opset10.ops import is_inf -from openvino.runtime.opset10.ops import is_nan -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset13.ops import multinomial -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset13.ops import nms_rotated -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset12.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset4.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset13.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset13.ops import scaled_dot_product_attention -from openvino.runtime.opset12.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset11.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset10.ops import unique -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset13.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset13.ops import multinomial +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset13.ops import nms_rotated +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset13.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset13.ops import scaled_dot_product_attention +from openvino.opset12.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset13/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset13/ops/__init__.py new file mode 100644 index 00000000000000..edc66e873e4779 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset13/ops/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset13.ops import constant +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset13.ops import multinomial +from openvino.opset13.ops import nms_rotated +from openvino.opset13.ops import result +from openvino.opset13.ops import scaled_dot_product_attention diff --git a/src/bindings/python/src/openvino/runtime/opset14/__init__.py b/src/bindings/python/src/openvino/runtime/opset14/__init__.py index 52ac785bd723e5..8a503a333bd3e1 100644 --- a/src/bindings/python/src/openvino/runtime/opset14/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset14/__init__.py @@ -2,189 +2,189 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset14.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset13.ops import bitwise_and -from openvino.runtime.opset13.ops import bitwise_not -from openvino.runtime.opset13.ops import bitwise_or -from openvino.runtime.opset13.ops import bitwise_xor -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset13.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset14.ops import convert_promote_types -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset13.ops import fake_convert -from openvino.runtime.opset13.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset12.ops import group_normalization -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset11.ops import interpolate -from openvino.runtime.opset14.ops import inverse -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset10.ops import is_finite -from openvino.runtime.opset10.ops import is_inf -from openvino.runtime.opset10.ops import is_nan -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset14.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset13.ops import multinomial -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset13.ops import nms_rotated -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset12.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset4.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset13.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset13.ops import scaled_dot_product_attention -from openvino.runtime.opset12.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset11.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset10.ops import unique -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset14.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset13.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset14.ops import convert_promote_types +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset14.ops import inverse +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset14.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset13.ops import multinomial +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset13.ops import nms_rotated +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset13.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset13.ops import scaled_dot_product_attention +from openvino.opset12.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset14/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset14/ops/__init__.py new file mode 100644 index 00000000000000..8f5d09b5d0733e --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset14/ops/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset14.ops import avg_pool +from openvino.opset14.ops import convert_promote_types +from openvino.opset14.ops import inverse +from openvino.opset14.ops import max_pool diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index c4dd48d9087ae1..d5b93924a69e6e 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -3,202 +3,202 @@ # SPDX-License-Identifier: Apache-2.0 # New operations added in Opset15 -from openvino.runtime.opset15.ops import col2im -from openvino.runtime.opset15.ops import embedding_bag_offsets -from openvino.runtime.opset15.ops import embedding_bag_packed -from openvino.runtime.opset15.ops import scatter_nd_update -from openvino.runtime.opset15.ops import roi_align_rotated -from openvino.runtime.opset15.ops import string_tensor_pack -from openvino.runtime.opset15.ops import string_tensor_unpack -from openvino.runtime.opset15.ops import bitwise_left_shift -from openvino.runtime.opset15.ops import bitwise_right_shift -from openvino.runtime.opset15.ops import slice_scatter +from openvino.opset15.ops import col2im +from openvino.opset15.ops import embedding_bag_offsets +from openvino.opset15.ops import embedding_bag_packed +from openvino.opset15.ops import scatter_nd_update +from openvino.opset15.ops import roi_align_rotated +from openvino.opset15.ops import string_tensor_pack +from openvino.opset15.ops import string_tensor_unpack +from openvino.opset15.ops import bitwise_left_shift +from openvino.opset15.ops import bitwise_right_shift +from openvino.opset15.ops import slice_scatter # Operators from previous opsets -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset14.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset13.ops import bitwise_and -from openvino.runtime.opset13.ops import bitwise_not -from openvino.runtime.opset13.ops import bitwise_or -from openvino.runtime.opset13.ops import bitwise_xor -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset13.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset14.ops import convert_promote_types -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset13.ops import fake_convert -from openvino.runtime.opset13.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset12.ops import group_normalization -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset11.ops import interpolate -from openvino.runtime.opset14.ops import inverse -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset10.ops import is_finite -from openvino.runtime.opset10.ops import is_inf -from openvino.runtime.opset10.ops import is_nan -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset14.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset13.ops import multinomial -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset13.ops import nms_rotated -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset12.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset4.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset13.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset13.ops import scaled_dot_product_attention -from openvino.runtime.opset12.ops import scatter_elements_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset15.ops import search_sorted -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset15.ops import squeeze -from openvino.runtime.opset15.ops import stft -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset11.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset10.ops import unique -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset14.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset13.ops import bitwise_and +from openvino.opset13.ops import bitwise_not +from openvino.opset13.ops import bitwise_or +from openvino.opset13.ops import bitwise_xor +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset13.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset14.ops import convert_promote_types +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset13.ops import fake_convert +from openvino.opset13.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset12.ops import group_normalization +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset11.ops import interpolate +from openvino.opset14.ops import inverse +from openvino.opset9.ops import irdft +from openvino.opset10.ops import is_finite +from openvino.opset10.ops import is_inf +from openvino.opset10.ops import is_nan +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset14.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset13.ops import multinomial +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset13.ops import nms_rotated +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset12.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset13.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset13.ops import scaled_dot_product_attention +from openvino.opset12.ops import scatter_elements_update +from openvino.opset3.ops import scatter_update +from openvino.opset15.ops import search_sorted +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset15.ops import squeeze +from openvino.opset15.ops import stft +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset11.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset10.ops import unique +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/ops/__init__.py new file mode 100644 index 00000000000000..57543286d31543 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset15/ops/__init__.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset15.ops import col2im +from openvino.opset15.ops import embedding_bag_offsets +from openvino.opset15.ops import embedding_bag_packed +from openvino.opset15.ops import scatter_nd_update +from openvino.opset15.ops import roi_align_rotated +from openvino.opset15.ops import string_tensor_pack +from openvino.opset15.ops import string_tensor_unpack +from openvino.opset15.ops import bitwise_left_shift +from openvino.opset15.ops import bitwise_right_shift +from openvino.opset15.ops import slice_scatter +from openvino.opset15.ops import search_sorted +from openvino.opset15.ops import squeeze +from openvino.opset15.ops import stft diff --git a/src/bindings/python/src/openvino/runtime/opset16/__init__.py b/src/bindings/python/src/openvino/runtime/opset16/__init__.py index ce52690e919fc3..06fa9d2e7d0070 100644 --- a/src/bindings/python/src/openvino/runtime/opset16/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset16/__init__.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # New operations added in Opset16 -from openvino.runtime.opset16.ops import identity +from openvino.opset16.ops import identity # Operators from previous opsets # TODO (ticket: 156877): Add previous opset operators at the end of opset16 development diff --git a/src/bindings/python/src/openvino/runtime/opset16/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset16/ops/__init__.py new file mode 100644 index 00000000000000..d28ce61bd00d54 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset16/ops/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset16.ops import identity diff --git a/src/bindings/python/src/openvino/runtime/opset2/__init__.py b/src/bindings/python/src/openvino/runtime/opset2/__init__.py index 6624149e157e9e..1306c89b5241d8 100644 --- a/src/bindings/python/src/openvino/runtime/opset2/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset2/__init__.py @@ -2,116 +2,116 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset1.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset1.ops import broadcast -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset1.ops import gather -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset2.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset2.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset1.ops import non_max_suppression -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset1.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset1.ops import shape_of -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset1.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset1.ops import broadcast +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset1.ops import non_max_suppression +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset2.ops import roi_pooling +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset1.ops import shape_of +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset1.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset2/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset2/ops/__init__.py new file mode 100644 index 00000000000000..89d30e8521e6d8 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset2/ops/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset2.ops import batch_to_space +from openvino.opset2.ops import gelu +from openvino.opset2.ops import mvn +from openvino.opset2.ops import reorg_yolo +from openvino.opset2.ops import roi_pooling +from openvino.opset2.ops import space_to_batch diff --git a/src/bindings/python/src/openvino/runtime/opset3/__init__.py b/src/bindings/python/src/openvino/runtime/opset3/__init__.py index 5bd68912cae807..3a0baa675114f1 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset3/__init__.py @@ -2,132 +2,132 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset3.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset1.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset1.ops import gather -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset2.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset1.ops import lstm_cell -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset2.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset3.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset1.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset3.ops import read_value -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset3.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset3.ops import assign +from openvino.opset1.ops import atan +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset1.ops import hard_sigmoid +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset1.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset3.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset1.ops import proposal +from openvino.opset1.ops import range +from openvino.opset3.ops import read_value +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset3.ops import scatter_elements_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset3/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset3/ops/__init__.py new file mode 100644 index 00000000000000..b8af66f9b514c8 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset3/ops/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset3.ops import assign +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset3.ops import gru_cell +from openvino.opset3.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset3.ops import read_value +from openvino.opset3.ops import rnn_cell +from openvino.opset3.ops import roi_align +from openvino.opset3.ops import scatter_elements_update +from openvino.opset3.ops import scatter_update +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset3.ops import topk diff --git a/src/bindings/python/src/openvino/runtime/opset4/__init__.py b/src/bindings/python/src/openvino/runtime/opset4/__init__.py index e7cef8eb216d17..6096ec431c796e 100644 --- a/src/bindings/python/src/openvino/runtime/opset4/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset4/__init__.py @@ -2,143 +2,143 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset3.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset1.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset1.ops import gather -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset2.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset2.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset4.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset3.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset3.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset3.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset1.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset1.ops import hard_sigmoid +from openvino.opset4.ops import hswish +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset4.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset3.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset4/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset4/ops/__init__.py new file mode 100644 index 00000000000000..d70649a6b1db17 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset4/ops/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset4.ops import acosh +from openvino.opset4.ops import asinh +from openvino.opset4.ops import atanh +from openvino.opset4.ops import ctc_loss +from openvino.opset4.ops import hswish +from openvino.opset4.ops import lstm_cell +from openvino.opset4.ops import mish +from openvino.opset4.ops import non_max_suppression +from openvino.opset4.ops import proposal +from openvino.opset4.ops import range +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset4.ops import scatter_nd_update +from openvino.opset4.ops import softplus +from openvino.opset4.ops import swish diff --git a/src/bindings/python/src/openvino/runtime/opset5/__init__.py b/src/bindings/python/src/openvino/runtime/opset5/__init__.py index 6d68b3e8d9f9cb..202b8137093f57 100644 --- a/src/bindings/python/src/openvino/runtime/opset5/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset5/__init__.py @@ -2,151 +2,151 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset3.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset1.ops import gather -from openvino.runtime.opset5.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset2.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset2.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset5.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset3.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset3.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset3.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset5.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset2.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset3.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset5/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset5/ops/__init__.py new file mode 100644 index 00000000000000..610622780c6f01 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset5/ops/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset5.ops import batch_norm_inference +from openvino.opset5.ops import gather_nd +from openvino.opset5.ops import gru_sequence +from openvino.opset5.ops import hsigmoid +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset5.ops import lstm_sequence +from openvino.opset5.ops import non_max_suppression +from openvino.opset5.ops import rnn_sequence +from openvino.opset5.ops import round diff --git a/src/bindings/python/src/openvino/runtime/opset6/__init__.py b/src/bindings/python/src/openvino/runtime/opset6/__init__.py index 2b2babb00c5021..315d80bc025a38 100644 --- a/src/bindings/python/src/openvino/runtime/opset6/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset6/__init__.py @@ -2,153 +2,153 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset1.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset5.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset2.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset5.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset3.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset1.ops import divide +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset1.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset5.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset2.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset6/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset6/ops/__init__.py new file mode 100644 index 00000000000000..fb23fe96e5e51f --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset6/ops/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0from openvino.opset6.ops import assign + +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset6.ops import gather_elements +from openvino.opset6.ops import mvn +from openvino.opset6.ops import read_value diff --git a/src/bindings/python/src/openvino/runtime/opset7/__init__.py b/src/bindings/python/src/openvino/runtime/opset7/__init__.py index 9ee692cea75f59..16703072285487 100644 --- a/src/bindings/python/src/openvino/runtime/opset7/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset7/__init__.py @@ -2,157 +2,157 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset1.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset1.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset7.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset5.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset1.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset5.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset1.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset3.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset1.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset1.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset1.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset7.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset5.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset1.ops import interpolate +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset1.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset1.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset1.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset7/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset7/ops/__init__.py new file mode 100644 index 00000000000000..9517e795d355d3 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset7/ops/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset7.ops import dft +from openvino.opset7.ops import einsum +from openvino.opset7.ops import gather +from openvino.opset7.ops import gelu +from openvino.opset7.ops import idft +from openvino.opset7.ops import roll diff --git a/src/bindings/python/src/openvino/runtime/opset8/__init__.py b/src/bindings/python/src/openvino/runtime/opset8/__init__.py index bf6be68ca0cbc6..e45c37863b193b 100644 --- a/src/bindings/python/src/openvino/runtime/opset8/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset8/__init__.py @@ -2,168 +2,168 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset8.ops import multiclass_nms -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset5.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset3.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset1.ops import interpolate +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset8.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset5.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset3.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset8/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset8/ops/__init__.py new file mode 100644 index 00000000000000..053708521a6dae --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset8/ops/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset8.ops import deformable_convolution +from openvino.opset8.ops import detection_output +from openvino.opset8.ops import gather +from openvino.opset8.ops import gather_nd +from openvino.opset8.ops import if_op +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset8.ops import multiclass_nms +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset8.ops import prior_box +from openvino.opset8.ops import random_uniform +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax diff --git a/src/bindings/python/src/openvino/runtime/opset9/__init__.py b/src/bindings/python/src/openvino/runtime/opset9/__init__.py index 138ecdbdcd5d33..03051d46b58759 100644 --- a/src/bindings/python/src/openvino/runtime/opset9/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset9/__init__.py @@ -2,174 +2,174 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime.opset1.ops import absolute -from openvino.runtime.opset1.ops import absolute as abs -from openvino.runtime.opset1.ops import acos -from openvino.runtime.opset4.ops import acosh -from openvino.runtime.opset8.ops import adaptive_avg_pool -from openvino.runtime.opset8.ops import adaptive_max_pool -from openvino.runtime.opset1.ops import add -from openvino.runtime.opset1.ops import asin -from openvino.runtime.opset4.ops import asinh -from openvino.runtime.opset6.ops import assign -from openvino.runtime.opset1.ops import atan -from openvino.runtime.opset4.ops import atanh -from openvino.runtime.opset1.ops import avg_pool -from openvino.runtime.opset5.ops import batch_norm_inference -from openvino.runtime.opset2.ops import batch_to_space -from openvino.runtime.opset1.ops import binary_convolution -from openvino.runtime.opset3.ops import broadcast -from openvino.runtime.opset3.ops import bucketize -from openvino.runtime.opset1.ops import ceiling -from openvino.runtime.opset1.ops import ceiling as ceil -from openvino.runtime.opset1.ops import clamp -from openvino.runtime.opset1.ops import concat -from openvino.runtime.opset1.ops import constant -from openvino.runtime.opset1.ops import convert -from openvino.runtime.opset1.ops import convert_like -from openvino.runtime.opset1.ops import convolution -from openvino.runtime.opset1.ops import convolution_backprop_data -from openvino.runtime.opset1.ops import cos -from openvino.runtime.opset1.ops import cosh -from openvino.runtime.opset1.ops import ctc_greedy_decoder -from openvino.runtime.opset6.ops import ctc_greedy_decoder_seq_len -from openvino.runtime.opset4.ops import ctc_loss -from openvino.runtime.opset3.ops import cum_sum -from openvino.runtime.opset3.ops import cum_sum as cumsum -from openvino.runtime.opset8.ops import deformable_convolution -from openvino.runtime.opset1.ops import deformable_psroi_pooling -from openvino.runtime.opset1.ops import depth_to_space -from openvino.runtime.opset8.ops import detection_output -from openvino.runtime.opset7.ops import dft -from openvino.runtime.opset1.ops import divide -from openvino.runtime.opset7.ops import einsum -from openvino.runtime.opset1.ops import elu -from openvino.runtime.opset3.ops import embedding_bag_offsets_sum -from openvino.runtime.opset3.ops import embedding_bag_packed_sum -from openvino.runtime.opset3.ops import embedding_segments_sum -from openvino.runtime.opset3.ops import extract_image_patches -from openvino.runtime.opset1.ops import equal -from openvino.runtime.opset1.ops import erf -from openvino.runtime.opset1.ops import exp -from openvino.runtime.opset9.ops import eye -from openvino.runtime.opset1.ops import fake_quantize -from openvino.runtime.opset1.ops import floor -from openvino.runtime.opset1.ops import floor_mod -from openvino.runtime.opset8.ops import gather -from openvino.runtime.opset6.ops import gather_elements -from openvino.runtime.opset8.ops import gather_nd -from openvino.runtime.opset1.ops import gather_tree -from openvino.runtime.opset7.ops import gelu -from openvino.runtime.opset9.ops import generate_proposals -from openvino.runtime.opset1.ops import greater -from openvino.runtime.opset1.ops import greater_equal -from openvino.runtime.opset9.ops import grid_sample -from openvino.runtime.opset1.ops import grn -from openvino.runtime.opset1.ops import group_convolution -from openvino.runtime.opset1.ops import group_convolution_backprop_data -from openvino.runtime.opset3.ops import gru_cell -from openvino.runtime.opset5.ops import gru_sequence -from openvino.runtime.opset1.ops import hard_sigmoid -from openvino.runtime.opset5.ops import hsigmoid -from openvino.runtime.opset4.ops import hswish -from openvino.runtime.opset7.ops import idft -from openvino.runtime.opset8.ops import if_op -from openvino.runtime.opset1.ops import interpolate -from openvino.runtime.opset9.ops import irdft -from openvino.runtime.opset8.ops import i420_to_bgr -from openvino.runtime.opset8.ops import i420_to_rgb -from openvino.runtime.opset1.ops import less -from openvino.runtime.opset1.ops import less_equal -from openvino.runtime.opset1.ops import log -from openvino.runtime.opset1.ops import logical_and -from openvino.runtime.opset1.ops import logical_not -from openvino.runtime.opset1.ops import logical_or -from openvino.runtime.opset1.ops import logical_xor -from openvino.runtime.opset5.ops import log_softmax -from openvino.runtime.opset5.ops import loop -from openvino.runtime.opset1.ops import lrn -from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset5.ops import lstm_sequence -from openvino.runtime.opset1.ops import matmul -from openvino.runtime.opset8.ops import matrix_nms -from openvino.runtime.opset8.ops import max_pool -from openvino.runtime.opset1.ops import maximum -from openvino.runtime.opset1.ops import minimum -from openvino.runtime.opset4.ops import mish -from openvino.runtime.opset1.ops import mod -from openvino.runtime.opset9.ops import multiclass_nms -from openvino.runtime.opset1.ops import multiply -from openvino.runtime.opset6.ops import mvn -from openvino.runtime.opset1.ops import negative -from openvino.runtime.opset9.ops import non_max_suppression -from openvino.runtime.opset3.ops import non_zero -from openvino.runtime.opset1.ops import normalize_l2 -from openvino.runtime.opset1.ops import not_equal -from openvino.runtime.opset8.ops import nv12_to_bgr -from openvino.runtime.opset8.ops import nv12_to_rgb -from openvino.runtime.opset1.ops import one_hot -from openvino.runtime.opset1.ops import pad -from openvino.runtime.opset1.ops import parameter -from openvino.runtime.opset1.ops import power -from openvino.runtime.opset1.ops import prelu -from openvino.runtime.opset8.ops import prior_box -from openvino.runtime.opset1.ops import prior_box_clustered -from openvino.runtime.opset1.ops import psroi_pooling -from openvino.runtime.opset4.ops import proposal -from openvino.runtime.opset1.ops import range -from openvino.runtime.opset8.ops import random_uniform -from openvino.runtime.opset9.ops import rdft -from openvino.runtime.opset6.ops import read_value -from openvino.runtime.opset4.ops import reduce_l1 -from openvino.runtime.opset4.ops import reduce_l2 -from openvino.runtime.opset1.ops import reduce_logical_and -from openvino.runtime.opset1.ops import reduce_logical_or -from openvino.runtime.opset1.ops import reduce_max -from openvino.runtime.opset1.ops import reduce_mean -from openvino.runtime.opset1.ops import reduce_min -from openvino.runtime.opset1.ops import reduce_prod -from openvino.runtime.opset1.ops import reduce_sum -from openvino.runtime.opset1.ops import region_yolo -from openvino.runtime.opset2.ops import reorg_yolo -from openvino.runtime.opset1.ops import relu -from openvino.runtime.opset1.ops import reshape -from openvino.runtime.opset1.ops import result -from openvino.runtime.opset1.ops import reverse_sequence -from openvino.runtime.opset3.ops import rnn_cell -from openvino.runtime.opset5.ops import rnn_sequence -from openvino.runtime.opset9.ops import roi_align -from openvino.runtime.opset2.ops import roi_pooling -from openvino.runtime.opset7.ops import roll -from openvino.runtime.opset5.ops import round -from openvino.runtime.opset3.ops import scatter_elements_update -from openvino.runtime.opset4.ops import scatter_nd_update -from openvino.runtime.opset3.ops import scatter_update -from openvino.runtime.opset1.ops import select -from openvino.runtime.opset1.ops import selu -from openvino.runtime.opset3.ops import shape_of -from openvino.runtime.opset3.ops import shuffle_channels -from openvino.runtime.opset1.ops import sigmoid -from openvino.runtime.opset1.ops import sign -from openvino.runtime.opset1.ops import sin -from openvino.runtime.opset1.ops import sinh -from openvino.runtime.opset8.ops import slice -from openvino.runtime.opset8.ops import softmax -from openvino.runtime.opset4.ops import softplus -from openvino.runtime.opset9.ops import softsign -from openvino.runtime.opset2.ops import space_to_batch -from openvino.runtime.opset1.ops import space_to_depth -from openvino.runtime.opset1.ops import split -from openvino.runtime.opset1.ops import sqrt -from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze -from openvino.runtime.opset1.ops import strided_slice -from openvino.runtime.opset1.ops import subtract -from openvino.runtime.opset4.ops import swish -from openvino.runtime.opset1.ops import tan -from openvino.runtime.opset1.ops import tanh -from openvino.runtime.opset1.ops import tensor_iterator -from openvino.runtime.opset1.ops import tile -from openvino.runtime.opset3.ops import topk -from openvino.runtime.opset1.ops import transpose -from openvino.runtime.opset1.ops import unsqueeze -from openvino.runtime.opset1.ops import variadic_split +from openvino.opset1.ops import absolute +from openvino.opset1.ops import absolute as abs +from openvino.opset1.ops import acos +from openvino.opset4.ops import acosh +from openvino.opset8.ops import adaptive_avg_pool +from openvino.opset8.ops import adaptive_max_pool +from openvino.opset1.ops import add +from openvino.opset1.ops import asin +from openvino.opset4.ops import asinh +from openvino.opset6.ops import assign +from openvino.opset1.ops import atan +from openvino.opset4.ops import atanh +from openvino.opset1.ops import avg_pool +from openvino.opset5.ops import batch_norm_inference +from openvino.opset2.ops import batch_to_space +from openvino.opset1.ops import binary_convolution +from openvino.opset3.ops import broadcast +from openvino.opset3.ops import bucketize +from openvino.opset1.ops import ceiling +from openvino.opset1.ops import ceiling as ceil +from openvino.opset1.ops import clamp +from openvino.opset1.ops import concat +from openvino.opset1.ops import constant +from openvino.opset1.ops import convert +from openvino.opset1.ops import convert_like +from openvino.opset1.ops import convolution +from openvino.opset1.ops import convolution_backprop_data +from openvino.opset1.ops import cos +from openvino.opset1.ops import cosh +from openvino.opset1.ops import ctc_greedy_decoder +from openvino.opset6.ops import ctc_greedy_decoder_seq_len +from openvino.opset4.ops import ctc_loss +from openvino.opset3.ops import cum_sum +from openvino.opset3.ops import cum_sum as cumsum +from openvino.opset8.ops import deformable_convolution +from openvino.opset1.ops import deformable_psroi_pooling +from openvino.opset1.ops import depth_to_space +from openvino.opset8.ops import detection_output +from openvino.opset7.ops import dft +from openvino.opset1.ops import divide +from openvino.opset7.ops import einsum +from openvino.opset1.ops import elu +from openvino.opset3.ops import embedding_bag_offsets_sum +from openvino.opset3.ops import embedding_bag_packed_sum +from openvino.opset3.ops import embedding_segments_sum +from openvino.opset3.ops import extract_image_patches +from openvino.opset1.ops import equal +from openvino.opset1.ops import erf +from openvino.opset1.ops import exp +from openvino.opset9.ops import eye +from openvino.opset1.ops import fake_quantize +from openvino.opset1.ops import floor +from openvino.opset1.ops import floor_mod +from openvino.opset8.ops import gather +from openvino.opset6.ops import gather_elements +from openvino.opset8.ops import gather_nd +from openvino.opset1.ops import gather_tree +from openvino.opset7.ops import gelu +from openvino.opset9.ops import generate_proposals +from openvino.opset1.ops import greater +from openvino.opset1.ops import greater_equal +from openvino.opset9.ops import grid_sample +from openvino.opset1.ops import grn +from openvino.opset1.ops import group_convolution +from openvino.opset1.ops import group_convolution_backprop_data +from openvino.opset3.ops import gru_cell +from openvino.opset5.ops import gru_sequence +from openvino.opset1.ops import hard_sigmoid +from openvino.opset5.ops import hsigmoid +from openvino.opset4.ops import hswish +from openvino.opset7.ops import idft +from openvino.opset8.ops import if_op +from openvino.opset1.ops import interpolate +from openvino.opset9.ops import irdft +from openvino.opset8.ops import i420_to_bgr +from openvino.opset8.ops import i420_to_rgb +from openvino.opset1.ops import less +from openvino.opset1.ops import less_equal +from openvino.opset1.ops import log +from openvino.opset1.ops import logical_and +from openvino.opset1.ops import logical_not +from openvino.opset1.ops import logical_or +from openvino.opset1.ops import logical_xor +from openvino.opset5.ops import log_softmax +from openvino.opset5.ops import loop +from openvino.opset1.ops import lrn +from openvino.opset4.ops import lstm_cell +from openvino.opset5.ops import lstm_sequence +from openvino.opset1.ops import matmul +from openvino.opset8.ops import matrix_nms +from openvino.opset8.ops import max_pool +from openvino.opset1.ops import maximum +from openvino.opset1.ops import minimum +from openvino.opset4.ops import mish +from openvino.opset1.ops import mod +from openvino.opset9.ops import multiclass_nms +from openvino.opset1.ops import multiply +from openvino.opset6.ops import mvn +from openvino.opset1.ops import negative +from openvino.opset9.ops import non_max_suppression +from openvino.opset3.ops import non_zero +from openvino.opset1.ops import normalize_l2 +from openvino.opset1.ops import not_equal +from openvino.opset8.ops import nv12_to_bgr +from openvino.opset8.ops import nv12_to_rgb +from openvino.opset1.ops import one_hot +from openvino.opset1.ops import pad +from openvino.opset1.ops import parameter +from openvino.opset1.ops import power +from openvino.opset1.ops import prelu +from openvino.opset8.ops import prior_box +from openvino.opset1.ops import prior_box_clustered +from openvino.opset1.ops import psroi_pooling +from openvino.opset4.ops import proposal +from openvino.opset1.ops import range +from openvino.opset8.ops import random_uniform +from openvino.opset9.ops import rdft +from openvino.opset6.ops import read_value +from openvino.opset4.ops import reduce_l1 +from openvino.opset4.ops import reduce_l2 +from openvino.opset1.ops import reduce_logical_and +from openvino.opset1.ops import reduce_logical_or +from openvino.opset1.ops import reduce_max +from openvino.opset1.ops import reduce_mean +from openvino.opset1.ops import reduce_min +from openvino.opset1.ops import reduce_prod +from openvino.opset1.ops import reduce_sum +from openvino.opset1.ops import region_yolo +from openvino.opset2.ops import reorg_yolo +from openvino.opset1.ops import relu +from openvino.opset1.ops import reshape +from openvino.opset1.ops import result +from openvino.opset1.ops import reverse_sequence +from openvino.opset3.ops import rnn_cell +from openvino.opset5.ops import rnn_sequence +from openvino.opset9.ops import roi_align +from openvino.opset2.ops import roi_pooling +from openvino.opset7.ops import roll +from openvino.opset5.ops import round +from openvino.opset3.ops import scatter_elements_update +from openvino.opset4.ops import scatter_nd_update +from openvino.opset3.ops import scatter_update +from openvino.opset1.ops import select +from openvino.opset1.ops import selu +from openvino.opset3.ops import shape_of +from openvino.opset3.ops import shuffle_channels +from openvino.opset1.ops import sigmoid +from openvino.opset1.ops import sign +from openvino.opset1.ops import sin +from openvino.opset1.ops import sinh +from openvino.opset8.ops import slice +from openvino.opset8.ops import softmax +from openvino.opset4.ops import softplus +from openvino.opset9.ops import softsign +from openvino.opset2.ops import space_to_batch +from openvino.opset1.ops import space_to_depth +from openvino.opset1.ops import split +from openvino.opset1.ops import sqrt +from openvino.opset1.ops import squared_difference +from openvino.opset1.ops import squeeze +from openvino.opset1.ops import strided_slice +from openvino.opset1.ops import subtract +from openvino.opset4.ops import swish +from openvino.opset1.ops import tan +from openvino.opset1.ops import tanh +from openvino.opset1.ops import tensor_iterator +from openvino.opset1.ops import tile +from openvino.opset3.ops import topk +from openvino.opset1.ops import transpose +from openvino.opset1.ops import unsqueeze +from openvino.opset1.ops import variadic_split diff --git a/src/bindings/python/src/openvino/runtime/opset9/ops/__init__.py b/src/bindings/python/src/openvino/runtime/opset9/ops/__init__.py new file mode 100644 index 00000000000000..a82d349508c058 --- /dev/null +++ b/src/bindings/python/src/openvino/runtime/opset9/ops/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from openvino.opset9.ops import eye +from openvino.opset9.ops import generate_proposals +from openvino.opset9.ops import grid_sample +from openvino.opset9.ops import irdft +from openvino.opset9.ops import multiclass_nms +from openvino.opset9.ops import non_max_suppression +from openvino.opset9.ops import rdft +from openvino.opset9.ops import roi_align +from openvino.opset9.ops import softsign diff --git a/tools/benchmark_tool/openvino/__init__.py b/tools/benchmark_tool/openvino/__init__.py index 57f03f00c2eebf..e4d1a247520332 100644 --- a/tools/benchmark_tool/openvino/__init__.py +++ b/tools/benchmark_tool/openvino/__init__.py @@ -56,6 +56,24 @@ from openvino._pyopenvino import RemoteTensor from openvino._pyopenvino import Op +# Import opsets +from openvino import opset1 +from openvino import opset2 +from openvino import opset3 +from openvino import opset4 +from openvino import opset5 +from openvino import opset6 +from openvino import opset7 +from openvino import opset8 +from openvino import opset9 +from openvino import opset10 +from openvino import opset11 +from openvino import opset12 +from openvino import opset13 +from openvino import opset14 +from openvino import opset15 +from openvino import opset16 + # libva related: from openvino._pyopenvino import VAContext from openvino._pyopenvino import VASurfaceTensor diff --git a/tools/ovc/openvino/__init__.py b/tools/ovc/openvino/__init__.py index 57f03f00c2eebf..e4d1a247520332 100644 --- a/tools/ovc/openvino/__init__.py +++ b/tools/ovc/openvino/__init__.py @@ -56,6 +56,24 @@ from openvino._pyopenvino import RemoteTensor from openvino._pyopenvino import Op +# Import opsets +from openvino import opset1 +from openvino import opset2 +from openvino import opset3 +from openvino import opset4 +from openvino import opset5 +from openvino import opset6 +from openvino import opset7 +from openvino import opset8 +from openvino import opset9 +from openvino import opset10 +from openvino import opset11 +from openvino import opset12 +from openvino import opset13 +from openvino import opset14 +from openvino import opset15 +from openvino import opset16 + # libva related: from openvino._pyopenvino import VAContext from openvino._pyopenvino import VASurfaceTensor From 2d78f2a75cf97e15c6e813ec0a62b9acf4601388 Mon Sep 17 00:00:00 2001 From: Kelvin Choi Date: Wed, 11 Dec 2024 01:54:01 +0900 Subject: [PATCH 38/43] [GPU] Fix gws of Resample onnx kernel (#27990) ### Details: - *Fix gws value of resample onnx kernel with fs_b_yx_fsv32 format* ### Tickets: - *158837* --- .../kernels/resample/resample_kernel_onnx.cpp | 21 +++++++++++-------- .../unit/test_cases/resample_gpu_test.cpp | 1 + 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.cpp index 3b4cd32dc6b5c1..d6cdae39bda33a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/resample/resample_kernel_onnx.cpp @@ -84,6 +84,14 @@ DeviceFeaturesKey ResampleKernelOnnx::get_required_device_features_key(const Par return get_common_subgroups_device_features_key(params); } +static size_t get_vec_size(const resample_params ¶ms) { + if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) { + return 2; + } else { + return 1; + } +} + ResampleKernelBase::DispatchData ResampleKernelOnnx::SetDefault(const kernel_selector::resample_params& arg) const { DispatchData dispatchData; std::vector> dims_by_gws; @@ -96,7 +104,7 @@ ResampleKernelBase::DispatchData ResampleKernelOnnx::SetDefault(const kernel_sel } dispatchData.gws[0] = CeilDiv(out.X().v, opt_x_block_size) * out.Y().v * out.Z().v; - dispatchData.gws[1] = Align(out.Feature().v, sub_group_size); + dispatchData.gws[1] = Align(CeilDiv(out.Feature().v, get_vec_size(arg)), sub_group_size); dispatchData.gws[2] = arg.outputs[0].Batch().v; dispatchData.lws[0] = 1; @@ -151,14 +159,9 @@ JitConstants ResampleKernelOnnx::GetJitConstants(const resample_params& params) jit.AddConstant(MakeJitConstant("X_BLOCKS", CeilDiv(params.outputs[0].X().v, opt_x_block_size))); jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", sub_group_size)); - size_t vec_size = 0; - if (params.inputs[0].GetLayout() == DataLayout::fs_b_yx_fsv32) { - vec_size = 2; - jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 32)); - } else { - vec_size = 1; - jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 16)); - } + size_t vec_size = get_vec_size(params); + jit.AddConstant(MakeJitConstant("FEATURE_SLICE_SIZE", 16 * vec_size)); + if (IsThreeSpatialResample(params)) jit.AddConstant(MakeJitConstant("THREE_SPATIAL_RESAMPLE", "")); diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp index 99d6803e8f2dd0..13c77d2ac49bfd 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/resample_gpu_test.cpp @@ -2371,6 +2371,7 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx_4d_simple, { data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}}, { data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::bs_fs_yx_bsv16_fsv16, format::bs_fs_yx_bsv16_fsv16, {}, {}}, { data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}}, + { data_types::f16, {2, 32, 14, 14}, {2, 32, 28, 28}, 1, resample::InterpolateOp::InterpolateMode::LINEAR_ONNX, 1, format::fs_b_yx_fsv32, format::fs_b_yx_fsv32, {}, {}}, } )); From f7a69336eab21b11a4129d4a1905ade8dd58757a Mon Sep 17 00:00:00 2001 From: Andrey Babushkin Date: Wed, 11 Dec 2024 06:09:22 +0000 Subject: [PATCH 39/43] Add more workflows to the metrics collection workflow (#28010) Remove the small ones we probably don't need to track this way --- .../workflows/send_workflows_to_opentelemetry.yml | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/send_workflows_to_opentelemetry.yml b/.github/workflows/send_workflows_to_opentelemetry.yml index ba38d6a9f90fed..31fd7adcd89ac6 100644 --- a/.github/workflows/send_workflows_to_opentelemetry.yml +++ b/.github/workflows/send_workflows_to_opentelemetry.yml @@ -5,11 +5,7 @@ on: workflows: - Android ARM64 with vcpkg - Android x64 - - Documentation - - Cleanup PIP caches - - Code snippets - - Code Style - - Code coverage + - Cleanup caches - Coverity (Ubuntu 20.04, Python 3.11) - Debian 10 ARM - Fedora 29 (RHEL 8.4), Python 3.9 @@ -19,10 +15,12 @@ on: - Linux ARM64 (Ubuntu 20.04, Python 3.11) - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang) - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10) + - Linux (Ubuntu 22.04, Python 3.11, Intel DPC++ Compiler) + - Linux CPU Plugin Snippets with LIBXSMM (Ubuntu 20.04) + - Linux Sanitizers (Ubuntu 20.04, Python 3.9) - macOS (Python 3.11) - macOS ARM64 (Python 3.11) - - MO - - Python API Checks + - Manylinux 2014 - Webassembly - Windows (VS 2019, Python 3.11, Release) - Windows (VS 2019, Python 3.11, Debug) From 8659cd27792a5bb907c8d4ab0bf1b050f8a6a7c5 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Wed, 11 Dec 2024 15:37:45 +0900 Subject: [PATCH 40/43] [GPU] Not to apply swiglu fusion for onednn (#28011) ### Details: - Disable swiglu fusion which was removed from the previous PR https://github.com/openvinotoolkit/openvino/pull/27831 by accident.. ### Tickets: - *ticket-id* --- .../src/graph/graph_optimizer/prepare_primitive_fusing.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 93f0905b3a1ef7..05f907dcd81f0a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -171,6 +171,10 @@ void prepare_primitive_fusing::fuse_swiglu(program &p) { // Apply only for high performant GPU if (disable_fc_swiglu_fusion || p.get_engine().get_device_info().execution_units_count < 128) return; + + if (p.get_engine().get_device_info().supports_immad) + return; + // TODO: to support other glu types && other weight data types auto itr = p.get_processing_order().begin(); std::map>> fusing_history; From 9da62a48b534d8e3e21037b07e12b8a6adc6245d Mon Sep 17 00:00:00 2001 From: Mohammad Haghighipanah Date: Tue, 10 Dec 2024 22:53:07 -0800 Subject: [PATCH 41/43] [Coverity] GPU Plugin Medium issues (#27761) ### Details: - This PR fixes: 1562334, 1559879, 1559863, 1559854, 1559677, 1559879, 1559863 - 1559461 and 1559460 are related to ov::pass::pattern::matcher in `decompose_reduce_scalar_output.cpp` - 1559886 and 1559881 are related to ov::pass::pattern::matcher in `transpose_fusion.cpp` - 1559862 is related to ov::pass::pattern::matcher in `transpose_fusion.cpp` - 1559698 and 1559691 are related to ov::pass::pattern::matcher in `bcast_and_pad_zp_buffers.cpp` - all member variables are initialized in the constructors' initializer lists of `ov::pass::pattern::matcher` to address 1559461, 1559460, 1559886, 1559881, 1559862, 1559698, 1559691. ### Tickets: - [*CVS-153064*](https://jira.devtools.intel.com/browse/CVS-153064) - CVS-145082 --------- Co-authored-by: Pavel Durandin --- .../include/openvino/pass/pattern/matcher.hpp | 39 +++++++++++++++---- .../include/intel_gpu/primitives/resample.hpp | 2 +- .../graph/impls/onednn/convolution_onednn.cpp | 2 +- .../fully_connected_kernel_bf_tiled.cpp | 3 ++ .../transformations/kv_cache_fusion.cpp | 2 +- 5 files changed, 38 insertions(+), 10 deletions(-) diff --git a/src/core/include/openvino/pass/pattern/matcher.hpp b/src/core/include/openvino/pass/pattern/matcher.hpp index bbd7e32b0a1802..7112ac9ff85e64 100644 --- a/src/core/include/openvino/pass/pattern/matcher.hpp +++ b/src/core/include/openvino/pass/pattern/matcher.hpp @@ -62,10 +62,31 @@ class OPENVINO_API Matcher { // Avoid implicit string construction from nullptr. Matcher(const std::shared_ptr pattern_node, std::nullptr_t name) = delete; - Matcher() = default; - Matcher(Output& pattern_node) : m_pattern_node{pattern_node} {} - - Matcher(Output& pattern_node, const std::string& name) : m_pattern_node(pattern_node), m_name{name} {} + Matcher() + : m_match_root{}, + m_pattern_node{}, + m_pattern_map{}, + m_pattern_value_maps{}, + m_matched_list{}, + m_name{""}, + m_strict_mode{false} {} + Matcher(Output& pattern_node) + : m_match_root{}, + m_pattern_node{pattern_node}, + m_pattern_map{}, + m_pattern_value_maps{}, + m_matched_list{}, + m_name{""}, + m_strict_mode{false} {} + + Matcher(Output& pattern_node, const std::string& name) + : m_match_root{}, + m_pattern_node{pattern_node}, + m_pattern_map{}, + m_pattern_value_maps{}, + m_matched_list{}, + m_name{name}, + m_strict_mode{false} {} /// \brief Constructs a Matcher object /// @@ -73,9 +94,13 @@ class OPENVINO_API Matcher { /// \param name is a string which is used for logging and disabling a matcher /// \param strict_mode forces a matcher to consider shapes and ET of nodes Matcher(const Output& pattern_node, const std::string& name, bool strict_mode) - : m_pattern_node(pattern_node), - m_name(name), - m_strict_mode(strict_mode) {} + : m_match_root{}, + m_pattern_node{pattern_node}, + m_pattern_map{}, + m_pattern_value_maps{}, + m_matched_list{}, + m_name{name}, + m_strict_mode{strict_mode} {} // Some matches should start on a node rather than an output. These three constructors // are transition until we work out the right way to do that. diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp index 62d32a3619e329..f6e32661974cb8 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/resample.hpp @@ -15,7 +15,7 @@ namespace cldnn { struct resample : public primitive_base { CLDNN_DECLARE_PRIMITIVE(resample) - resample() : primitive_base("", {}) {} + resample() : primitive_base("", {}), scales_port(0) {} using InterpolateOp = ov::op::util::InterpolateBase; diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index a11ceef8b0f2dd..a2d3c007e29aa8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -121,7 +121,7 @@ struct convolution_onednn : typed_primitive_onednn_impl { private: int _zero_point_mask; - dnnl::memory::data_type _wzp_data_type; + dnnl::memory::data_type _wzp_data_type = dnnl::memory::data_type::undef; protected: std::unique_ptr clone() const override { diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index d0f881adcd88b1..0774c62add1643 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -846,9 +846,11 @@ void FullyConnected_bf_tiled::GetUpdateDispatchDataFunc(KernelData& kd) const { // quantized input is char type kd.internalBufferSizes.push_back(input_size); // half type of de_quan_scale and activation sum for each quantized group + OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero."); kd.internalBufferSizes.push_back((input_size / quantize_grp_size) * 2 * 2); } + OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero."); kd.kernels[0].params.workGroups.global = {std::max((input_size / quantize_grp_size), (size_t)1), 1, 1}; kd.kernels[0].params.workGroups.local = {16, 1, 1}; } @@ -983,6 +985,7 @@ KernelsData FullyConnected_bf_tiled::GetMultiKernelsData(const Params ¶ms, const auto& fc_params = static_cast(params); size_t quantize_grp_size = get_dynamic_quantize_group_size(fc_params); + OPENVINO_ASSERT(quantize_grp_size != 0, "Error: quantize_grp_size is zero."); bool bProperInput = fc_params.inputs[0].GetLayout() == dl; if (!bProperInput && !fc_params.inputs[0].PitchesDifferFromLogicalDims()) { diff --git a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp index b97389a7d18c76..8be42a1311094b 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/kv_cache_fusion.cpp @@ -63,7 +63,7 @@ KVCacheFusionMatcher::KVCacheFusionMatcher() { return false; // TODO: Support conversion internally - if (concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) + if (!concat_node || concat_node->get_output_element_type(0) != past_node->get_output_element_type(0)) return false; auto variable = past_node->get_variable(); From 778011b0efe59ff4a00cf61f2162d3e79b172785 Mon Sep 17 00:00:00 2001 From: Alexey Moskalev Date: Wed, 11 Dec 2024 11:06:25 +0400 Subject: [PATCH 42/43] Create RELEASE.MD (#27370) The first draft to review. --- docs/RELEASE.MD | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 docs/RELEASE.MD diff --git a/docs/RELEASE.MD b/docs/RELEASE.MD new file mode 100644 index 00000000000000..b345431f3f2bcf --- /dev/null +++ b/docs/RELEASE.MD @@ -0,0 +1,29 @@ +# OpenVINO Release Management +The process described below reflects the approach to managing OpenVINO releases. + +## Release Milestones +- Planning +- Execution (development of new features) +- Stabilization (Feature Freeze, Code Freeze milestones) +- Validation +- Distribution + +### Planning +This phase takes 2-4 weeks and involves scoping the backlog, prioritizing it, analyzing, and making commitments by developers for timelines specified by the release manager. + +### Execution (development of new features) +- [OpenVINO Contributing Guide](https://github.com/openvinotoolkit/openvino/blob/master/CONTRIBUTING.md) +- [Code Contribution Guide](https://docs.openvino.ai/2024/about-openvino/contributing/code-contribution-guide.html) +- [OpenVINO First Good Issue](https://github.com/openvinotoolkit/openvino/issues/17502) + +### Stabilization (Feature Freeze, Code Freeze milestones) +- **Feature Freeze**: This milestone ensures that no new features are added to the software after a certain point. This allows the development team to focus on stabilizing and refining the existing features, fixing bugs, and improving performance without the risk of introducing new issues. +- **Code Freeze**: This milestone marks the point where no new code changes are allowed except for critical bug fixes. This helps in ensuring that the final product is stable and reliable, as it minimizes the risk of last-minute changes that could introduce new bugs or instability. + +### Release Validation +- This is a continuous process executed on a regular basis with cadence based on testing type: nightly, bi-weekly, weekly. +- After Code Freeze, the testing team can perform final regression testing to ensure that recent changes have not introduced new bugs and that the software meets the required quality standards. + +### Distribution +- OpenVINO has different types of build distribution: Regular releases, Long-Term Support, Pre-release releases, Nightly builds. Read more here: [OpenVINO Release Policy](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino/release-policy.html) +- Different distribution channels are supported. Explore different options here: [OpenVINO Download](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html) From 859958f4e22e76c98121361ec19ea8ddb7cb8233 Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Wed, 11 Dec 2024 16:54:48 +0900 Subject: [PATCH 43/43] [GPU] Disable KV cache compression and FC scaling for GPU with systolic (#27988) ### Details: - GPU with systolic does not need FC scaling - GPU with systolic does not support kv cache compresssion yet --- .../include/intel_gpu/runtime/execution_config.hpp | 2 +- src/plugins/intel_gpu/src/plugin/plugin.cpp | 4 ++-- .../intel_gpu/src/runtime/execution_config.cpp | 8 +++++--- .../tests/functional/behavior/properties.cpp | 12 ++++++++++-- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp index 3e854e4c9c5ada..5e059b17da0e97 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/execution_config.hpp @@ -140,7 +140,7 @@ class ExecutionConfig { // Note that RT info property value has lower priority than values set by user via core.set_property or passed to compile_model call // So this method should be called after setting all user properties, but before apply_user_properties() call. - void apply_rt_info(const ov::RTMap& rt_info); + void apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info); std::string to_string() const; diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 9f9c9692b57b42..f2fa9bcdeeab1b 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -190,7 +190,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); if (model->has_rt_info("runtime_options")) - config.apply_rt_info(model->get_rt_info("runtime_options")); + config.apply_rt_info(context->get_engine().get_device_info(), model->get_rt_info("runtime_options")); config.apply_user_properties(context->get_engine().get_device_info()); set_cache_info(model, config); @@ -281,7 +281,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& ExecutionConfig config = m_configs_map.at(device_id); config.set_user_property(orig_config); if (model->has_rt_info("runtime_options")) - config.apply_rt_info(model->get_rt_info("runtime_options")); + config.apply_rt_info(ctx->get_engine().get_device_info(), model->get_rt_info("runtime_options")); config.apply_user_properties(ctx->get_engine().get_device_info()); ProgramBuilder prog(ctx->get_engine(), config); diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 804ad81f2d3735..a698ec7eb6c5a0 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -262,10 +262,12 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { user_properties.clear(); } -void ExecutionConfig::apply_rt_info(const ov::RTMap& rt_info) { - apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); +void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) { + if (!info.supports_immad) { + apply_rt_info_property(ov::hint::kv_cache_precision, rt_info); + apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); + } apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info); - apply_rt_info_property(ov::hint::activations_scale_factor, rt_info); } std::string ExecutionConfig::to_string() const { diff --git a/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp index 93a00262db35c2..4af05f30c6b4b0 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/properties.cpp @@ -2,7 +2,9 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include "openvino/runtime/properties.hpp" +#include "openvino/runtime/intel_gpu/properties.hpp" #include "base/ov_behavior_test_utils.hpp" #include "openvino/runtime/core.hpp" #include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" @@ -43,11 +45,17 @@ TEST_F(TestPropertiesGPU, RTInfoPropertiesWithDefault) { model->set_rt_info("8.0", "runtime_options", ov::hint::activations_scale_factor.name()); OV_ASSERT_NO_THROW(compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU)); - OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); OV_ASSERT_NO_THROW(size = compiled_model.get_property(ov::hint::dynamic_quantization_group_size)); + ASSERT_EQ(size.as(), 0); + + // GPU with systolic does not support some of rt_info + auto capabilities = core.get_property(ov::test::utils::DEVICE_GPU, ov::device::capabilities); + if (find(capabilities.cbegin(), capabilities.cend(), ov::intel_gpu::capability::HW_MATMUL) != capabilities.cend()) + return; + + OV_ASSERT_NO_THROW(type = compiled_model.get_property(ov::hint::kv_cache_precision)); OV_ASSERT_NO_THROW(scale = compiled_model.get_property(ov::hint::activations_scale_factor)); ASSERT_EQ(type.as(), ov::element::f16); - ASSERT_EQ(size.as(), 0); ASSERT_EQ(scale.as(), 8.0f); }