diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/pyproject.toml b/modules/custom_operations/pyproject.toml similarity index 52% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/pyproject.toml rename to modules/custom_operations/pyproject.toml index 338bf5690..416bc6ccf 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/pyproject.toml +++ b/modules/custom_operations/pyproject.toml @@ -6,27 +6,37 @@ requires-python = ">=3.8" authors = [ { name = "OpenVINO Developers", email = "openvino@intel.com" }, ] +classifiers = [ + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', +] dependencies = [ - "openvino", + "openvino>=2023.1", "numpy" ] [project.optional-dependencies] dev = [ + "black", "ruff", "pytest", ] transformers = [ - "transformers[sentencepiece,tiktoken]" -] -tiktoken = [ - "tiktoken" + "transformers[sentencepiece]" ] all = [ - "ov_tokenizer[dev,transformers,tiktoken]" + "ov_tokenizer[dev,transformers]" ] + +[tool.black] +line-length = 119 +target-versions = ["py38", "py39", "py310", "py311", "py312"] + + [tool.ruff] ignore = ["C901", "E501", "E741", "W605"] select = ["C", "E", "F", "I", "W"] @@ -38,3 +48,11 @@ line-length = 119 [tool.ruff.isort] lines-after-imports = 2 + +[build-system] +requires = [ + "setuptools>=42", + "scikit-build~=0.17.0", + "cmake>=3.14" +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/modules/custom_operations/setup.py b/modules/custom_operations/setup.py new file mode 100644 index 000000000..a8cd8d6fe --- /dev/null +++ b/modules/custom_operations/setup.py @@ -0,0 +1,15 @@ +from skbuild import setup +from skbuild import constants + +setup( + packages=["ov_tokenizer"], + package_dir={"": "user_ie_extensions/src/tokenizer/python"}, + cmake_install_dir="user_ie_extensions/src/tokenizer/python/ov_tokenizer/libs", + cmake_args=['-DCUSTOM_OPERATIONS:STRING=tokenizer', + '-DBUILD_FAST_TOKENIZERS=OFF'] +) + +# When building extension modules `cmake_install_dir` should always be set to the +# location of the package you are building extension modules for. +# Specifying the installation directory in the CMakeLists subtley breaks the relative +# paths in the helloTargets.cmake file to all of the library components. \ No newline at end of file diff --git a/modules/custom_operations/user_ie_extensions/CMakeLists.txt b/modules/custom_operations/user_ie_extensions/CMakeLists.txt index c830c0a21..05a2d2469 100644 --- a/modules/custom_operations/user_ie_extensions/CMakeLists.txt +++ b/modules/custom_operations/user_ie_extensions/CMakeLists.txt @@ -1,10 +1,8 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # - -if(POLICY CMP0079) - cmake_policy(SET CMP0079 NEW) -endif() +cmake_policy(SET CMP0057 NEW) +cmake_policy(SET CMP0079 NEW) if(POLICY CMP0057) cmake_policy(SET CMP0057 NEW) @@ -24,14 +22,17 @@ find_package(OpenCV COMPONENTS core) set(OP_REQ_TBB "complex_mul" "fft") +set(SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src") +set(HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/include") + # # Select specific operations # if(NOT CUSTOM_OPERATIONS) - file(GLOB op_src "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp") - file(GLOB op_dirs LIST_DIRECTORIES true "${CMAKE_CURRENT_SOURCE_DIR}/*") - list(REMOVE_ITEM op_dirs "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + file(GLOB op_src "${SOURCES}/*.cpp") + file(GLOB op_dirs LIST_DIRECTORIES true "${SOURCES}/*") + list(REMOVE_ITEM op_dirs "${SOURCES}/cmake") foreach(op IN LISTS op_src) get_filename_component(op_name ${op} NAME_WE) @@ -49,30 +50,31 @@ if(NOT CUSTOM_OPERATIONS) list(REMOVE_ITEM CUSTOM_OPERATIONS ov_extension) endif() -list(APPEND SRC "${CMAKE_CURRENT_SOURCE_DIR}/ov_extension.cpp") +list(APPEND SRC "${SOURCES}/ov_extension.cpp") # filter out some operations, requiring specific dependencies if(NOT OpenCV_FOUND) - list(REMOVE_ITEM SRC "${CMAKE_CURRENT_SOURCE_DIR}/fft.cpp") + list(REMOVE_ITEM SRC "${SOURCES}/fft.cpp") list(REMOVE_ITEM CUSTOM_OPERATIONS fft) endif() if(NOT TBB_FOUND) foreach(op IN LISTS OP_REQ_TBB) - list(REMOVE_ITEM SRC "${CMAKE_CURRENT_SOURCE_DIR}/${op}.cpp") + list(REMOVE_ITEM SRC "${SOURCES}/${op}.cpp") list(REMOVE_ITEM CUSTOM_OPERATIONS ${op}) endforeach() endif() message(" List of custom operations in ${TARGET_NAME} extension: ") foreach(op IN LISTS CUSTOM_OPERATIONS) - if(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/${op}") - file(GLOB op_src "${CMAKE_CURRENT_SOURCE_DIR}/${op}/*.cpp") + if(IS_DIRECTORY "${SOURCES}/${op}") + file(GLOB op_src "${SOURCES}/${op}/*.cpp") list(APPEND SRC ${op_src}) - elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${op}.cpp") - list(APPEND SRC "${CMAKE_CURRENT_SOURCE_DIR}/${op}.cpp") + elseif(EXISTS "${SOURCES}/${op}.cpp") + list(APPEND SRC "${SOURCES}/${op}.cpp") else() + message("${SOURCES}/${op}") message(FATAL_ERROR "${TARGET_NAME} does not have operation with name '${op}'") endif() @@ -85,6 +87,12 @@ endforeach() add_library(${TARGET_NAME} SHARED ${SRC}) +set_target_properties(${TARGET_NAME} PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} # .exe and .dll + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib # .lib and .a + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} # .so and .dylib +) + if(OpenCV_FOUND) target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES}) endif() @@ -94,11 +102,27 @@ if(TBB_FOUND) endif() # Left sentence_piece for backward compatibility -if(tokenizer IN_LIST CUSTOM_OPERATIONS) - add_subdirectory(tokenizer) +if("tokenizer" IN_LIST CUSTOM_OPERATIONS) + add_subdirectory(${SOURCES}/tokenizer) + if(extra_dlls) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${extra_dlls} $) + install(FILES ${extra_dlls} DESTINATION .) + endif() endif() target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime) target_compile_definitions(${TARGET_NAME} PRIVATE ${CUSTOM_OPERATIONS}) target_include_directories(${TARGET_NAME} PUBLIC ./include/) + +if(DEFINED SKBUILD) + # Installing the extension module to the root of the package + install(TARGETS ${TARGET_NAME} RUNTIME DESTINATION .) + if(APPLE) + set_target_properties( + ${TARGET_NAME} PROPERTIES INSTALL_RPATH "@loader_path") + else() + set_target_properties(${TARGET_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN") + endif() +endif() diff --git a/modules/custom_operations/user_ie_extensions/calculate_grid.hpp b/modules/custom_operations/user_ie_extensions/include/calculate_grid.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/calculate_grid.hpp rename to modules/custom_operations/user_ie_extensions/include/calculate_grid.hpp diff --git a/modules/custom_operations/user_ie_extensions/complex_mul.hpp b/modules/custom_operations/user_ie_extensions/include/complex_mul.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/complex_mul.hpp rename to modules/custom_operations/user_ie_extensions/include/complex_mul.hpp diff --git a/modules/custom_operations/user_ie_extensions/fft.hpp b/modules/custom_operations/user_ie_extensions/include/fft.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/fft.hpp rename to modules/custom_operations/user_ie_extensions/include/fft.hpp diff --git a/modules/custom_operations/user_ie_extensions/grid_sample.hpp b/modules/custom_operations/user_ie_extensions/include/grid_sample.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/grid_sample.hpp rename to modules/custom_operations/user_ie_extensions/include/grid_sample.hpp diff --git a/modules/custom_operations/user_ie_extensions/sparse_conv.hpp b/modules/custom_operations/user_ie_extensions/include/sparse_conv.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/sparse_conv.hpp rename to modules/custom_operations/user_ie_extensions/include/sparse_conv.hpp diff --git a/modules/custom_operations/user_ie_extensions/sparse_conv_transpose.hpp b/modules/custom_operations/user_ie_extensions/include/sparse_conv_transpose.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/sparse_conv_transpose.hpp rename to modules/custom_operations/user_ie_extensions/include/sparse_conv_transpose.hpp diff --git a/modules/custom_operations/user_ie_extensions/calculate_grid.cpp b/modules/custom_operations/user_ie_extensions/src/calculate_grid.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/calculate_grid.cpp rename to modules/custom_operations/user_ie_extensions/src/calculate_grid.cpp diff --git a/modules/custom_operations/user_ie_extensions/complex_mul.cpp b/modules/custom_operations/user_ie_extensions/src/complex_mul.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/complex_mul.cpp rename to modules/custom_operations/user_ie_extensions/src/complex_mul.cpp diff --git a/modules/custom_operations/user_ie_extensions/fft.cpp b/modules/custom_operations/user_ie_extensions/src/fft.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/fft.cpp rename to modules/custom_operations/user_ie_extensions/src/fft.cpp diff --git a/modules/custom_operations/user_ie_extensions/grid_sample.cpp b/modules/custom_operations/user_ie_extensions/src/grid_sample.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/grid_sample.cpp rename to modules/custom_operations/user_ie_extensions/src/grid_sample.cpp diff --git a/modules/custom_operations/user_ie_extensions/ov_extension.cpp b/modules/custom_operations/user_ie_extensions/src/ov_extension.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/ov_extension.cpp rename to modules/custom_operations/user_ie_extensions/src/ov_extension.cpp diff --git a/modules/custom_operations/user_ie_extensions/sparse_conv.cpp b/modules/custom_operations/user_ie_extensions/src/sparse_conv.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/sparse_conv.cpp rename to modules/custom_operations/user_ie_extensions/src/sparse_conv.cpp diff --git a/modules/custom_operations/user_ie_extensions/sparse_conv_transpose.cpp b/modules/custom_operations/user_ie_extensions/src/sparse_conv_transpose.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/sparse_conv_transpose.cpp rename to modules/custom_operations/user_ie_extensions/src/sparse_conv_transpose.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/CMakeLists.txt b/modules/custom_operations/user_ie_extensions/src/tokenizer/CMakeLists.txt similarity index 87% rename from modules/custom_operations/user_ie_extensions/tokenizer/CMakeLists.txt rename to modules/custom_operations/user_ie_extensions/src/tokenizer/CMakeLists.txt index 09da10fd3..4937702a0 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/CMakeLists.txt +++ b/modules/custom_operations/user_ie_extensions/src/tokenizer/CMakeLists.txt @@ -1,16 +1,12 @@ # Copyright (C) 2018-2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # - if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() option(BUILD_FAST_TOKENIZERS OFF) -# to build only sentencepiece-static target -set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL ON) - # # Compile flags # @@ -112,10 +108,12 @@ else() FetchContent_MakeAvailable(fast_tokenizer) include("${fast_tokenizer_SOURCE_DIR}/FastTokenizer.cmake") + set(fast_tokenizer_SOURCE_DIR "${fast_tokenizer_SOURCE_DIR}" PARENT_SCOPE) + if(WIN32 AND X86_64) # we use re2 library in regex_normalization operation, so have to add to this list # because prebuilt fast_tokenizers package does not provide this library - list(APPEND FAST_TOKENIZER_LIBS re2) + set (RE2_LIBS re2) endif() endif() @@ -134,13 +132,32 @@ target_include_directories(${TARGET_NAME} PRIVATE # fast_tokenizer ${FAST_TOKENIZER_INCS}) +set_property(DIRECTORY ${sentencepiece_SOURCE_DIR} + PROPERTY EXCLUDE_FROM_ALL ON) + +set_property(DIRECTORY ${re2_SOURCE_DIR} + PROPERTY EXCLUDE_FROM_ALL ON) + if(CMAKE_CL_64) target_compile_definitions(sentencepiece-static PRIVATE _CRT_SECURE_NO_WARNINGS _SCL_SECURE_NO_WARNINGS) endif() -target_link_libraries(${TARGET_NAME} PRIVATE ${FAST_TOKENIZER_LIBS} sentencepiece-static) +target_link_libraries(${TARGET_NAME} PRIVATE ${FAST_TOKENIZER_LIBS} ${RE2_LIBS} sentencepiece-static) # string_view is used from cxx17 string(REPLACE " " ";" cxx_flags "${cxx_flags}") set_target_properties(${TARGET_NAME} PROPERTIES CXX_STANDARD 17 COMPILE_OPTIONS "${cxx_flags}") +# +# Post build steps to copy core_tokenizers dependencies +# + +if(WIN32 AND X86_64) + if(BUILD_FAST_TOKENIZERS) + # TODO + else() + set(extra_dlls "${fast_tokenizer_SOURCE_DIR}/lib/core_tokenizers.dll" + "${fast_tokenizer_SOURCE_DIR}/third_party/lib/icudt70.dll" + "${fast_tokenizer_SOURCE_DIR}/third_party/lib/icuuc70.dll" PARENT_SCOPE) + endif() +endif() \ No newline at end of file diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/bpe_tokenizer.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/bpe_tokenizer.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/bpe_tokenizer.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/bpe_tokenizer.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/bpe_tokenizer.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/bpe_tokenizer.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/bpe_tokenizer.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/bpe_tokenizer.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/bytes_to_chars.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/bytes_to_chars.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/bytes_to_chars.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/bytes_to_chars.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/bytes_to_chars.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/bytes_to_chars.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/bytes_to_chars.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/bytes_to_chars.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/case_fold.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/case_fold.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/case_fold.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/case_fold.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/case_fold.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/case_fold.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/case_fold.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/case_fold.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/chars_to_bytes.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/chars_to_bytes.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/chars_to_bytes.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/chars_to_bytes.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/chars_to_bytes.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/chars_to_bytes.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/chars_to_bytes.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/chars_to_bytes.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/combine_segments.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/combine_segments.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/combine_segments.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/combine_segments.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/combine_segments.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/combine_segments.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/combine_segments.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/combine_segments.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/icu4c.patch b/modules/custom_operations/user_ie_extensions/src/tokenizer/icu4c.patch similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/icu4c.patch rename to modules/custom_operations/user_ie_extensions/src/tokenizer/icu4c.patch diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/normalize_unicode.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/normalize_unicode.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/normalize_unicode.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/normalize_unicode.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/normalize_unicode.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/normalize_unicode.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/normalize_unicode.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/normalize_unicode.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/README.md b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/README.md similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/README.md rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/README.md diff --git a/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/__init__.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/__init__.py new file mode 100644 index 000000000..2b237f8eb --- /dev/null +++ b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/__init__.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import os +import sys +import openvino +from openvino.runtime.utils.node_factory import NodeFactory + +from .convert_tokenizer import convert_tokenizer +from .node_factory import init_extension +from .str_pack import pack_strings, unpack_strings +from .utils import add_greedy_decoding, connect_models + +_ext_name = "user_ov_extensions" +_ext_libs_path = os.path.join(os.path.dirname(__file__), "libs") + +if sys.platform == "win32": + _ext_path = os.path.join(_ext_libs_path, f'{_ext_name}.dll') + if os.path.isdir(_ext_libs_path): + # On Windows, with Python >= 3.8, DLLs are no longer imported from the PATH. + os.add_dll_directory(os.path.abspath(_ext_path)) + else: + sys.exit(f'Error: extention libriary path {_ext_libs_path} not found') +elif sys.platform == "darwin": + _ext_path = os.path.join(_ext_libs_path, f'lib{_ext_name}.dylib') +elif sys.platform == "linux": + _ext_path = os.path.join(_ext_libs_path, f'lib{_ext_name}.so') +else: + sys.exit(f'Error: extention does not support platform {sys.platform}') + +old_core_init = openvino.runtime.Core.__init__ +def new_core_init(self, *k, **kw): + old_core_init(self, *k, **kw) + self.add_extension(_ext_path) +openvino.runtime.Core.__init__ = new_core_init + +_factory = NodeFactory() +_factory.add_extension(_ext_path) diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/constants.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/constants.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/constants.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/constants.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/convert_tokenizer.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/convert_tokenizer.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/convert_tokenizer.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/convert_tokenizer.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/hf_parser.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/hf_parser.py similarity index 97% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/hf_parser.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/hf_parser.py index 401c8ea2b..27f8bc261 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/hf_parser.py +++ b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/hf_parser.py @@ -25,7 +25,7 @@ TOKENIZER_DECODER_NAME, TOKENIZER_ENCODER_NAME, ) -from .node_factory import factory +from . import _factory from .tokenizer_pipeline import ( BPETokenizationStep, BytesToCharsStep, @@ -116,7 +116,7 @@ def __init__(self, tokenizer_object: Any, number_of_inputs: int = 1) -> None: self.original_tokenizer = tokenizer_object with TemporaryDirectory() as tmpdir: tokenizer_object.save_pretrained(tmpdir) - with open(Path(tmpdir) / "tokenizer.json") as tj: + with open(Path(tmpdir) / "tokenizer.json", encoding="utf8") as tj: self.tokenizer_json = json.load(tj) self.pipeline = TokenizerPipeline() self.number_of_inputs = number_of_inputs @@ -313,7 +313,7 @@ def is_sentencepiece_model(hf_tokenizer: "PreTrainedTokenizerBase") -> bool: def add_tokens_to_sentencepiece_model(sp_model_path: Path, hf_tokenizer: "PreTrainedTokenizerBase") -> None: model_pb = import_protobuf() model = model_pb.ModelProto() - with open(sp_model_path, "rb") as model_file: + with open(sp_model_path, "rb", encoding="utf8") as model_file: model.ParseFromString(model_file.read()) add_token_dict = hf_tokenizer.tokenizer.index_special_tokens @@ -322,7 +322,7 @@ def add_tokens_to_sentencepiece_model(sp_model_path: Path, hf_tokenizer: "PreTra new_piece.piece = token model.pieces.append(new_piece) - with open(sp_model_path, "wb") as model_file: + with open(sp_model_path, "wb", encoding="utf8") as model_file: model_file.write(model.SerializeToString()) @@ -365,7 +365,7 @@ def convert_sentencepiece_model_tokenizer( ) add_bos_token = getattr(hf_tokenizer, "add_bos_token", add_eos_token) or False - tokenizer_node = factory.create( + tokenizer_node = _factory.create( "SentencepieceTokenizer", [sp_model_node, input_node], { @@ -383,7 +383,7 @@ def convert_sentencepiece_model_tokenizer( default_value = make_constant_node(hf_tokenizer.pad_token_id or 0, values.element_type) broadcast = opset.broadcast(default_value, dense_shape) - scatternd_input_ids = factory.create( + scatternd_input_ids = _factory.create( "ScatterNDUpdate", [broadcast, indices, values], # FIXME: pad left side instead of right ) @@ -399,7 +399,7 @@ def convert_sentencepiece_model_tokenizer( outputs = scatternd_input_ids.outputs() if add_attention_mask: - attention_mask = factory.create( + attention_mask = _factory.create( "ScatterNDUpdate", [ broadcast, @@ -432,7 +432,7 @@ def convert_sentencepiece_model_tokenizer( def get_sp_decoder(sp_model_node: Node, streaming_decoder: bool = False) -> Model: token_ids = op.Parameter(Type.i32, PartialShape(["?", "?"])) # (batch, sequence) - decoder = factory.create( + decoder = _factory.create( "SentencepieceStreamDetokenizer" if streaming_decoder else "SentencepieceDetokenizer", [sp_model_node, token_ids], ).outputs() @@ -440,7 +440,7 @@ def get_sp_decoder(sp_model_node: Node, streaming_decoder: bool = False) -> Mode if streaming_decoder: decoder = RegexDecodingStep.replace_sp_spaces().get_ov_subgraph(decoder) - string_output = factory.create("StringTensorPack", decoder).outputs() + string_output = _factory.create("StringTensorPack", decoder).outputs() string_output[0].tensor.add_names({STRING_OUTPUT_NAME}) tokenizer_decoder = Model(string_output, [token_ids], TOKENIZER_DECODER_NAME) tokenizer_decoder.validate_nodes_and_infer_types() diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/node_factory.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/node_factory.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/node_factory.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/node_factory.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/str_pack.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/str_pack.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/str_pack.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/str_pack.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/tiktoken_parser.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/tiktoken_parser.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/tiktoken_parser.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/tiktoken_parser.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/tokenizer_pipeline.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/tokenizer_pipeline.py similarity index 96% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/tokenizer_pipeline.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/tokenizer_pipeline.py index 74654344a..75e25d45f 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/tokenizer_pipeline.py +++ b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/tokenizer_pipeline.py @@ -22,7 +22,7 @@ TOKENIZER_DECODER_NAME, TOKENIZER_ENCODER_NAME, ) -from .node_factory import factory +from . import _factory from .str_pack import pack_string, pack_strings @@ -61,7 +61,7 @@ def create_string_constant_node(value: Union[str, List[str]]) -> op.Constant: else: # support only 1D strings for now ps = pack_strings(value) - return factory.create("StringTensorUnpack", op.Constant(ps).outputs()) + return _factory.create("StringTensorUnpack", op.Constant(ps).outputs()) @dataclass @@ -74,7 +74,7 @@ class NormalizeUnicode(NormalizationStep): normalization_form: str = "NFD" def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: - return factory.create( + return _factory.create( "NormalizeUnicode", input_nodes, {"normalization_form": self.normalization_form}, @@ -84,7 +84,7 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: @dataclass class CaseFoldStep(NormalizationStep): def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: - return factory.create("CaseFold", input_nodes).outputs() + return _factory.create("CaseFold", input_nodes).outputs() @dataclass @@ -122,7 +122,7 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: self.create_string_constant_node(self.replace_term), ) ) - return factory.create("RegexNormalization", input_nodes).outputs() + return _factory.create("RegexNormalization", input_nodes).outputs() @dataclass @@ -233,7 +233,7 @@ def digits_splitter(cls, behaviour="isolate") -> "RegexSplitStep": def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: input_nodes.extend(self.create_string_constant_node(self.split_pattern).outputs()) - return factory.create( + return _factory.create( "RegexSplit", input_nodes, { @@ -263,7 +263,7 @@ class BytesToCharsStep(PreTokenizatinStep): """Maps chars to other chars for Byte-level BPE Tokenizer""" def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: - return factory.create( + return _factory.create( "BytesToChars", input_nodes, ).outputs() @@ -307,7 +307,7 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: *as_node(self.unk_token_id).outputs(), ) ) - return factory.create( + return _factory.create( "WordpieceTokenizer", input_nodes, { @@ -379,7 +379,7 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: *self.create_string_constant_node(self.merges).outputs(), ) ) - return factory.create( + return _factory.create( "BPETokenizer", input_nodes, { @@ -587,7 +587,7 @@ def get_ov_subgraph(self, input_nodes): raise UserInputError(f"Unexpected node type in CombineSegments: {type(node)}") op_inputs.append(make_constant_node(self.segment_ids, Type.i32).output(0)) - return factory.create("CombineSegments", op_inputs).outputs() + return _factory.create("CombineSegments", op_inputs).outputs() @dataclass @@ -632,7 +632,7 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: names = [TOKEN_IDS_INPUT_NAME, TOKEN_TYPE_IDS_INPUT_NAME][: len(input_nodes) // 3] for i, name in enumerate(names): - cur_outputs = factory.create( + cur_outputs = _factory.create( "RaggedToDense", input_nodes[3 * i : 3 * (i + 1)] + max_length.outputs() + make_constant_node(0, Type.i32).outputs(), ).outputs() @@ -662,13 +662,13 @@ def get_vocab_node_outputs(self) -> Optional[List[Output]]: def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: input_nodes.extend(self.get_vocab_node_outputs()) - return factory.create("VocabDecoder", input_nodes, {}).outputs() + return _factory.create("VocabDecoder", input_nodes, {}).outputs() @dataclass class CharsToBytesStep(DecodingStep): def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: - return factory.create("CharsToBytes", input_nodes, {}).outputs() + return _factory.create("CharsToBytes", input_nodes, {}).outputs() @dataclass @@ -690,7 +690,7 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]: *self.create_string_constant_node(self.replace_term).outputs(), ) ) - return factory.create("RegexNormalization", input_nodes).outputs() + return _factory.create("RegexNormalization", input_nodes).outputs() @classmethod def replace_sp_spaces(cls) -> "RegexDecodingStep": @@ -733,7 +733,7 @@ def get_encoder_ov_subgraph(self) -> Model: processing_outputs = [] for input_node in string_inputs: - input_node = factory.create("StringTensorUnpack", input_node.outputs()).outputs() + input_node = _factory.create("StringTensorUnpack", input_node.outputs()).outputs() for step in self.normalization_steps: input_node = step.get_ov_subgraph(input_node) input_node = self.add_ragged_dimension(input_node) @@ -783,7 +783,7 @@ def create_decoding_pipeline(self, input_nodes: List[Output]) -> List[Output]: pipeline_step = step.get_ov_subgraph(input_nodes) input_nodes = pipeline_step - return factory.create("StringTensorPack", input_nodes).outputs() + return _factory.create("StringTensorPack", input_nodes).outputs() def get_decoder_ov_subgraph(self) -> Model: input_node = op.Parameter(Type.i32, PartialShape(["?", "?"])) diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/utils.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/utils.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/utils.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/ov_tokenizer/utils.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/conftest.py similarity index 73% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/conftest.py index 054388410..074efa64d 100644 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/conftest.py +++ b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/conftest.py @@ -6,17 +6,17 @@ import pytest -def prebuild_extenson_path(): - ext_path = os.getenv("CUSTOM_OP_LIB") or os.getenv("OV_TOKENIZER_PREBUILD_EXTENSION_PATH") - if not ext_path: - raise EnvironmentError( - "No extension path found in the environment. " - "Export path to libuser_ov_extensions.so to CUSTOM_OP_LIB or OV_TOKENIZER_PREBUILD_EXTENSION_PATH variable." - ) - return ext_path +# def prebuild_extenson_path(): +# ext_path = os.getenv("CUSTOM_OP_LIB") or os.getenv("OV_TOKENIZER_PREBUILD_EXTENSION_PATH") +# if not ext_path: +# raise EnvironmentError( +# "No extension path found in the environment. " +# "Export path to libuser_ov_extensions.so to CUSTOM_OP_LIB or OV_TOKENIZER_PREBUILD_EXTENSION_PATH variable." +# ) +# return ext_path -os.environ["OV_TOKENIZER_PREBUILD_EXTENSION_PATH"] = prebuild_extenson_path() +# os.environ["OV_TOKENIZER_PREBUILD_EXTENSION_PATH"] = prebuild_extenson_path() PASS_RATES_FILE = Path(__file__).parent / "pass_rates.json" diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/pass_rates.json b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/pass_rates.json similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/tests/pass_rates.json rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/pass_rates.json diff --git a/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/test.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/test.py new file mode 100644 index 000000000..34d0fb3af --- /dev/null +++ b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/test.py @@ -0,0 +1,27 @@ +import os +# import openvino +import ov_tokenizer + +from transformers import AutoTokenizer, AutoModelForSequenceClassification +from openvino import compile_model, convert_model +from ov_tokenizer import init_extension, convert_tokenizer, pack_strings, connect_models + +checkpoint = "mrm8488/bert-tiny-finetuned-sms-spam-detection" +hf_tokenizer = AutoTokenizer.from_pretrained(checkpoint) +hf_model = AutoModelForSequenceClassification.from_pretrained(checkpoint) + +text_input = ["Free money!!!"] +hf_input = hf_tokenizer(text_input, return_tensors="pt") +hf_output = hf_model(**hf_input) + +ov_tokenizer = convert_tokenizer(hf_tokenizer) +ov_model = convert_model(hf_model, example_input=hf_input.data) +combined_model = connect_models(ov_tokenizer, ov_model) +compiled_combined_model = compile_model(combined_model) + +openvino_output = compiled_combined_model(pack_strings(text_input)) + +print(f"OpenVINO logits: {openvino_output['logits']}") +# OpenVINO logits: [[ 1.2007061 -1.4698029]] +print(f"HuggingFace logits {hf_output.logits}") +# HuggingFace logits tensor([[ 1.2007, -1.4698]], grad_fn=)s \ No newline at end of file diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/tests/tokenizers_test.py b/modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/tokenizers_test.py similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/python/tests/tokenizers_test.py rename to modules/custom_operations/user_ie_extensions/src/tokenizer/python/tests/tokenizers_test.py diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/ragged_tensor_pack.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_tensor_pack.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/ragged_tensor_pack.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_tensor_pack.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/ragged_tensor_pack.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_tensor_pack.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/ragged_tensor_pack.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_tensor_pack.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/ragged_to_dense.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_to_dense.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/ragged_to_dense.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_to_dense.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/ragged_to_dense.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_to_dense.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/ragged_to_dense.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/ragged_to_dense.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/regex_normalization.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/regex_normalization.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/regex_normalization.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/regex_normalization.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/regex_normalization.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/regex_normalization.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/regex_normalization.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/regex_normalization.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/regex_split.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/regex_split.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/regex_split.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/regex_split.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/regex_split.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/regex_split.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/regex_split.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/regex_split.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/sentence_piece.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/sentence_piece.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/sentence_piece.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/sentence_piece.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/sentence_piece.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_pack.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_pack.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_pack.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_pack.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_pack.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_pack.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_pack.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_pack.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_unpack.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_unpack.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_unpack.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_unpack.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_unpack.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_unpack.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/string_tensor_unpack.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/string_tensor_unpack.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/tensorflow_translators.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/tensorflow_translators.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/tensorflow_translators.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/tensorflow_translators.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/tensorflow_translators.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/tokenizer.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/tokenizer.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/tokenizer.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/tokenizer.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/utils.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/utils.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/utils.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/utils.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/utils.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/utils.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/utils.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/utils.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/vocab_decoder.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/vocab_decoder.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/vocab_decoder.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/vocab_decoder.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/vocab_decoder.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/vocab_decoder.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/vocab_decoder.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/vocab_decoder.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/wordpiece_tokenizer.cpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/wordpiece_tokenizer.cpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/wordpiece_tokenizer.cpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/wordpiece_tokenizer.cpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/wordpiece_tokenizer.hpp b/modules/custom_operations/user_ie_extensions/src/tokenizer/wordpiece_tokenizer.hpp similarity index 100% rename from modules/custom_operations/user_ie_extensions/tokenizer/wordpiece_tokenizer.hpp rename to modules/custom_operations/user_ie_extensions/src/tokenizer/wordpiece_tokenizer.hpp diff --git a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/__init__.py b/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/__init__.py deleted file mode 100644 index ce757b861..000000000 --- a/modules/custom_operations/user_ie_extensions/tokenizer/python/ov_tokenizer/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2018-2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from .convert_tokenizer import convert_tokenizer -from .node_factory import init_extension -from .str_pack import pack_strings, unpack_strings -from .utils import add_greedy_decoding, connect_models