From d102d39812ab8d6270787df8cc5b9d4336a7fe5d Mon Sep 17 00:00:00 2001 From: Ryad ZENINE Date: Thu, 13 Feb 2020 14:40:17 +0100 Subject: [PATCH] Add support for png decoding on linux --- .circleci/config.yml | 7 +- .circleci/config.yml.in | 7 +- .gitmodules | 3 + .travis.yml | 1 + CMakeLists.txt | 19 +++++- packaging/torchvision/meta.yaml | 1 + setup.py | 79 +++++++++++++++------- test/test_image.py | 40 +++++++++++ third_party/libpng | 1 + torchvision/csrc/cpu/image/readpng_cpu.cpp | 75 ++++++++++++++++++++ torchvision/csrc/cpu/image/readpng_cpu.h | 6 ++ torchvision/csrc/image.h | 4 ++ torchvision/csrc/vision.cpp | 6 ++ torchvision/io/image.py | 48 +++++++++++++ 14 files changed, 266 insertions(+), 31 deletions(-) create mode 100644 .gitmodules create mode 100644 test/test_image.py create mode 160000 third_party/libpng create mode 100644 torchvision/csrc/cpu/image/readpng_cpu.cpp create mode 100644 torchvision/csrc/cpu/image/readpng_cpu.h create mode 100644 torchvision/csrc/image.h create mode 100644 torchvision/io/image.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 461bd6f9cb7..682e6291941 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -21,6 +21,9 @@ commands: description: "checkout merge branch" steps: - checkout + - run: + name: initialize submodules + command: git submodule update --init --recursive # - run: # name: Checkout merge branch # command: | @@ -83,6 +86,8 @@ jobs: resource_class: 2xlarge+ steps: - checkout_merge + - run: + command: yum install -yq zlib-devel - run: packaging/build_wheel.sh - store_artifacts: path: dist @@ -128,7 +133,7 @@ jobs: ca-certificates \ curl \ gnupg-agent \ - software-properties-common + software-properties-common curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index e3747134c6f..69e95748abe 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -21,6 +21,9 @@ commands: description: "checkout merge branch" steps: - checkout + - run: + name: initialize submodules + command: git submodule update --init --recursive # - run: # name: Checkout merge branch # command: | @@ -83,6 +86,8 @@ jobs: resource_class: 2xlarge+ steps: - checkout_merge + - run: + command: yum install -yq zlib-devel - run: packaging/build_wheel.sh - store_artifacts: path: dist @@ -128,7 +133,7 @@ jobs: ca-certificates \ curl \ gnupg-agent \ - software-properties-common + software-properties-common curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000000..82602d36583 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "third_party/libpng"] + path = third_party/libpng + url = https://github.com/glennrp/libpng diff --git a/.travis.yml b/.travis.yml index 1b6ecb7a65b..0ad4f4b8e1a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,6 +26,7 @@ matrix: before_install: - sudo apt-get update + - sudo apt-get install -y zlib1g-dev - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f04d51131e..a4f7bfa2e8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,10 @@ if(WITH_CUDA) add_definitions(-D__CUDA_NO_HALF_OPERATORS__) endif() +if(Unix) + add_subdirectory("third_party/libpng") +endif() + find_package(Torch REQUIRED) find_package(pybind11 REQUIRED) @@ -21,8 +25,17 @@ endif() file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h) file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp) -add_library(${PROJECT_NAME} SHARED ${MODELS_SOURCES} ${OPERATOR_SOURCES}) -target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} pybind11::pybind11) +file(GLOB IMAGE_HEADERS torchvision/csrc/image.h) +file(GLOB IMAGE_SOURCES torchvision/csrc/cpu/image/*.h torchvision/csrc/cpu/image/*.cpp) + +if(Unix) + add_library(${PROJECT_NAME} SHARED ${MODELS_SOURCES} ${OPERATOR_SOURCES} {IMAGE_SOURCES}) + target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} pybind11::pybind11 "${PNG_LIBRARIES}") +else() + add_library(${PROJECT_NAME} SHARED ${MODELS_SOURCES} ${OPERATOR_SOURCES}) + target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} pybind11::pybind11) +endif() + set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchVision) target_include_directories(${PROJECT_NAME} INTERFACE @@ -49,7 +62,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfig.cmake install(TARGETS ${PROJECT_NAME} EXPORT TorchVisionTargets) -install(EXPORT TorchVisionTargets +install(EXPORT TorchVisionTargets NAMESPACE TorchVision:: DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml index 1bc199e437b..ca15dbd66a6 100644 --- a/packaging/torchvision/meta.yaml +++ b/packaging/torchvision/meta.yaml @@ -8,6 +8,7 @@ source: requirements: build: - {{ compiler('c') }} # [win] + - zlib host: - python diff --git a/setup.py b/setup.py index 71d420573ed..f2aa02db838 100644 --- a/setup.py +++ b/setup.py @@ -83,9 +83,21 @@ def get_extensions(): main_file = glob.glob(os.path.join(extensions_dir, '*.cpp')) source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp')) + source_image_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', 'image', '*.cpp')) source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu')) sources = main_file + source_cpu + + libraries = [] + extra_objects= [] + extra_compile_args = {} + third_party_search_directories = [] + + if sys.platform.startswith('linux'): + sources = sources + source_image_cpu + libraries.append('png') + third_party_search_directories.append(os.path.join(cwd, "third_party/libpng")) + extension = CppExtension compile_cpp_tests = os.getenv('WITH_CPP_MODELS_TEST', '0') == '1' @@ -102,7 +114,6 @@ def get_extensions(): define_macros = [] - extra_compile_args = {} if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1': extension = CUDAExtension sources += source_cuda @@ -142,9 +153,12 @@ def get_extensions(): extension( 'torchvision._C', sources, - include_dirs=include_dirs, + libraries= libraries, + library_dirs=third_party_search_directories, + include_dirs=include_dirs + third_party_search_directories, define_macros=define_macros, extra_compile_args=extra_compile_args, + extra_objects=extra_objects ) ] if compile_cpp_tests: @@ -196,29 +210,42 @@ def run(self): # It's an old-style class in Python 2.7... distutils.command.clean.clean.run(self) +def build_deps(): + this_dir = os.path.dirname(os.path.abspath(__file__)) + if sys.platform.startswith('linux'): + os.chdir("third_party/libpng/") + os.system('cmake .') + os.system("cmake --build .") + os.chdir(this_dir) + + + +def build_ext_with_dependencies(self): + build_deps() + return BuildExtension.with_options(no_python_abi_suffix=True)(self) setup( - # Metadata - name=package_name, - version=version, - author='PyTorch Core Team', - author_email='soumith@pytorch.org', - url='https://github.com/pytorch/vision', - description='image and video datasets and models for torch deep learning', - long_description=readme, - license='BSD', - - # Package info - packages=find_packages(exclude=('test',)), - - zip_safe=False, - install_requires=requirements, - extras_require={ - "scipy": ["scipy"], - }, - ext_modules=get_extensions(), - cmdclass={ - 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True), - 'clean': clean, - } -) + # Metadata + name=package_name, + version=version, + author='PyTorch Core Team', + author_email='soumith@pytorch.org', + url='https://github.com/pytorch/vision', + description='image and video datasets and models for torch deep learning', + long_description=readme, + license='BSD', + + # Package info + packages=find_packages(exclude=('test',)), + + zip_safe=False, + install_requires=requirements, + extras_require={ + "scipy": ["scipy"], + }, + ext_modules=get_extensions(), + cmdclass={ + 'build_ext': build_ext_with_dependencies, + 'clean': clean, + } + ) diff --git a/test/test_image.py b/test/test_image.py new file mode 100644 index 00000000000..070b5857076 --- /dev/null +++ b/test/test_image.py @@ -0,0 +1,40 @@ +import os +import unittest +import sys + +import torch +from PIL import Image +if sys.platform.startswith('linux'): + from torchvision.io.image import read_png, decode_png +import numpy as np + +IMAGE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "imagefolder") + + +def get_images(directory, img_ext): + assert os.path.isdir(directory) + for root, dir, files in os.walk(directory): + for fl in files: + _, ext = os.path.splitext(fl) + if ext == img_ext: + yield os.path.join(root, fl) + + +class ImageTester(unittest.TestCase): + @unittest.skipUnless(sys.platform.startswith("linux"), "Support only available on linux for now.") + def test_read_png(self): + for img_path in get_images(IMAGE_DIR, "png"): + img_pil = torch.from_numpy(np.array(Image.open(img_path))) + img_lpng = read_png(img_path) + self.assertEqual(img_lpng, img_pil) + + @unittest.skipUnless(sys.platform.startswith("linux"), "Support only available on linux for now.") + def test_decode_png(self): + for img_path in get_images(IMAGE_DIR, "png"): + img_pil = torch.from_numpy(np.array(Image.open(img_path))) + size = os.path.getsize(img_path) + img_lpng = decode_png(torch.from_file(img_path, dtype=torch.uint8, size=size)) + self.assertEqual(img_lpng, img_pil) + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/libpng b/third_party/libpng new file mode 160000 index 00000000000..301f7a14295 --- /dev/null +++ b/third_party/libpng @@ -0,0 +1 @@ +Subproject commit 301f7a14295a3bdfaf406dbb5004d0784dc137ea diff --git a/torchvision/csrc/cpu/image/readpng_cpu.cpp b/torchvision/csrc/cpu/image/readpng_cpu.cpp new file mode 100644 index 00000000000..c6581f168b1 --- /dev/null +++ b/torchvision/csrc/cpu/image/readpng_cpu.cpp @@ -0,0 +1,75 @@ +#include "readpng_cpu.h" + +#include +#include +#include + +torch::Tensor decodePNG(const torch::Tensor& data) { + auto png_ptr = + png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + TORCH_CHECK(png_ptr, "libpng read structure allocation failed!") + auto info_ptr = png_create_info_struct(png_ptr); + if (!info_ptr) { + png_destroy_read_struct(&png_ptr, nullptr, nullptr); + // Seems redundant with the if statement. done here to avoid leaking memory. + TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") + } + + auto datap = data.accessor().data(); + + if (setjmp(png_jmpbuf(png_ptr)) != 0) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(false, "Internal error."); + } + auto is_png = !png_sig_cmp(datap, 0, 8); + TORCH_CHECK(is_png, "Content is not png!") + + struct Reader { + png_const_bytep ptr; + } reader; + reader.ptr = png_const_bytep(datap) + 8; + + auto read_callback = + [](png_structp png_ptr, png_bytep output, png_size_t bytes) { + auto reader = static_cast(png_get_io_ptr(png_ptr)); + std::copy(reader->ptr, reader->ptr + bytes, output); + reader->ptr += bytes; + }; + png_set_sig_bytes(png_ptr, 8); + png_set_read_fn(png_ptr, &reader, read_callback); + png_read_info(png_ptr, info_ptr); + + png_uint_32 width, height; + int bit_depth, color_type; + auto retval = png_get_IHDR( + png_ptr, + info_ptr, + &width, + &height, + &bit_depth, + &color_type, + nullptr, + nullptr, + nullptr); + + if (retval != 1) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(retval == 1, "Could read image metadata from content.") + } + if (color_type != PNG_COLOR_TYPE_RGB) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK( + color_type == PNG_COLOR_TYPE_RGB, "Non RGB images are not supported.") + } + + auto tensor = + torch::empty({int64_t(height), int64_t(width), int64_t(3)}, torch::kU8); + auto ptr = tensor.accessor().data(); + auto bytes = png_get_rowbytes(png_ptr, info_ptr); + for (decltype(height) i = 0; i < height; ++i) { + png_read_row(png_ptr, ptr, nullptr); + ptr += bytes; + } + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + return tensor; +} diff --git a/torchvision/csrc/cpu/image/readpng_cpu.h b/torchvision/csrc/cpu/image/readpng_cpu.h new file mode 100644 index 00000000000..d2151a43aa9 --- /dev/null +++ b/torchvision/csrc/cpu/image/readpng_cpu.h @@ -0,0 +1,6 @@ +#pragma once + +#include +#include + +torch::Tensor decodePNG(const torch::Tensor& data); diff --git a/torchvision/csrc/image.h b/torchvision/csrc/image.h new file mode 100644 index 00000000000..bdb08113a8d --- /dev/null +++ b/torchvision/csrc/image.h @@ -0,0 +1,4 @@ +#pragma once + +#include "cpu/image/readpng_cpu.h" + diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp index 8d8699ecc26..04992705cc3 100644 --- a/torchvision/csrc/vision.cpp +++ b/torchvision/csrc/vision.cpp @@ -11,6 +11,9 @@ #include "ROIAlign.h" #include "ROIPool.h" #include "empty_tensor_op.h" +#ifdef __linux__ + #include "image.h" +#endif #include "nms.h" // If we are in a Windows environment, we need to define @@ -49,4 +52,7 @@ static auto registry = .op("torchvision::ps_roi_align", &ps_roi_align) .op("torchvision::ps_roi_pool", &ps_roi_pool) .op("torchvision::deform_conv2d", &deform_conv2d) +#ifdef __linux__ + .op("torchvision::decode_png", &decodePNG) +#endif .op("torchvision::_cuda_version", &_cuda_version); diff --git a/torchvision/io/image.py b/torchvision/io/image.py new file mode 100644 index 00000000000..bbf7470b097 --- /dev/null +++ b/torchvision/io/image.py @@ -0,0 +1,48 @@ +import torch +from torch import nn, Tensor +import os + + +def decode_png(input): + # type: (Tensor) -> Tensor + """ + Decodes a PNG image into a 3 dimensional RGB Tensor. + The values of the output tensor are uint8 between 0 and 255. + + Arguments: + input (Tensor[1]): a one dimensional int8 tensor containing + the raw bytes of the PNG image. + + Returns: + output (Tensor[image_width, image_height, 3]) + """ + if not isinstance(input, torch.Tensor) or len(input) == 0: + raise ValueError("Expected a non empty 1-dimensional tensor.") + + if not input.dtype == torch.uint8: + raise ValueError("Expected a torch.uint8 tensor.") + output = torch.ops.torchvision.decode_png(input) + return output + + +def read_png(path): + # type: (str) -> Tensor + """ + Reads a PNG image into a 3 dimensional RGB Tensor. + The values of the output tensor are uint8 between 0 and 255. + + Arguments: + path (str): path of the PNG image. + + Returns: + output (Tensor[image_width, image_height, 3]) + """ + if not os.path.isfile(path): + raise ValueError("Expected a valid file path.") + + size = os.path.getsize(path) + if size == 0: + raise ValueError("Expected a non empty file.") + data = torch.from_file(path, dtype=torch.uint8, size=size) + return decode_png(data) +