diff --git a/CMakeLists.txt b/CMakeLists.txt index f519fe98a6..387679292f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,7 @@ if(${TC_BUILD_PYTHON}) include(UseCython) add_definitions(-DTC_HAS_PYTHON) + add_definitions(-DTC_BUILD_VISUALIZATION_CLIENT) else() message("Skipping python libraries.") endif() @@ -183,7 +184,7 @@ message(WARNING "OpenMP Libraries were not found") #**************************************************************************/ if (CLANG) set(CPP11_FLAGS "-std=c++11 -stdlib=libc++ -Wno-deprecated-register -Wno-enum-compare -Wno-conversion-null -Wno-constant-logical-operand -Wno-parentheses-equality -ftemplate-depth=900" CACHE STRING "C++11 enabling flags") -set(WERROR_FLAGS "-Werror -Wno-error=tautological-undefined-compare -Wno-error=reorder -Wno-error=exceptions -Wno-error=switch -Wno-error=sometimes-uninitialized -Wno-error=unused-lambda-capture -Wno-error=missing-braces -Wno-error=absolute-value -Wno-error=potentially-evaluated-expression -Wno-error=null-arithmetic -Wno-error=format -Wno-error=pessimizing-move -Wno-error=comment -Wno-error=main -Wno-error=constant-conversion -Wno-error=deprecated-declarations -Wno-error=return-type -Wno-error=inconsistent-missing-override -Wno-error=overloaded-virtual -Wno-error=unused-private-field -Wno-error=unused-variable -Wno-error=unused-local-typedef") +set(WERROR_FLAGS "-Werror -Wno-error=tautological-undefined-compare -Wno-error=reorder -Wno-error=exceptions -Wno-error=switch -Wno-error=sometimes-uninitialized -Wno-error=unused-lambda-capture -Wno-error=missing-braces -Wno-error=absolute-value -Wno-error=potentially-evaluated-expression -Wno-error=null-arithmetic -Wno-error=format -Wno-error=pessimizing-move -Wno-error=comment -Wno-error=main -Wno-error=constant-conversion -Wno-error=deprecated-declarations -Wno-error=return-type -Wno-error=inconsistent-missing-override -Wno-error=overloaded-virtual -Wno-error=unused-private-field -Wno-error=unused-variable -Wno-error=unused-local-typedef -Wno-error=unguarded-availability-new") else() set(CPP11_FLAGS "-std=c++11 -Wno-enum-compare -Wno-conversion-null -ftemplate-depth=900" CACHE STRING "C++11 enabling flags") set(WERROR_FLAGS "") @@ -411,21 +412,6 @@ include(copy_file) include(CMakeParseArguments) include(eval) -add_custom_target(external_dependencies) - -include(ExternalProject) -file(GLOB packages "${DEPS_CMAKE}/ExternalProject*.cmake") -foreach(package ${packages}) - message(STATUS "We found local package: ${package}") - get_filename_component(packagename "${package}" NAME_WE) - #package is of the form ExternalProjectXXX" - include(${package}) - STRING(SUBSTRING "${packagename}" 15 -1 depname) - message(STATUS "We found local package definition: ${depname}") - string(TOLOWER ${depname} depname) - set(package_${depname} requires_${depname} CACHE STRING "Package map") - add_dependencies(external_dependencies ex_${depname}) -endforeach() # This is an internal function and should not be used # Usage: @@ -738,7 +724,6 @@ function (make_boost_test NAME) endfunction() -# Core ML is only present on macOS 10.13 or higher if(APPLE) EXEC_PROGRAM(xcrun ARGS --show-sdk-version OUTPUT_VARIABLE mac_version RETURN_VALUE _xcrun_ret) @@ -878,6 +863,21 @@ endmacro() include(SharedLibraryFromStatic) include(MergeStaticLibraries) +add_custom_target(external_dependencies) + +include(ExternalProject) +file(GLOB packages "${DEPS_CMAKE}/ExternalProject*.cmake") +foreach(package ${packages}) + message(STATUS "We found local package: ${package}") + get_filename_component(packagename "${package}" NAME_WE) + #package is of the form ExternalProjectXXX" + include(${package}) + STRING(SUBSTRING "${packagename}" 15 -1 depname) + message(STATUS "We found local package definition: ${depname}") + string(TOLOWER ${depname} depname) + set(package_${depname} requires_${depname} CACHE STRING "Package map") + add_dependencies(external_dependencies ex_${depname}) +endforeach() include_directories(src) include_directories(SYSTEM src/external) diff --git a/build_capi.sh b/build_capi.sh index 1f36af4d23..7d1d6be02d 100755 --- a/build_capi.sh +++ b/build_capi.sh @@ -141,7 +141,6 @@ function build_capi { echo "Stripping local and debug symbols." strip -S -x ${install_dir}/lib*.* || echo "Non-fatal error stripping symbols." fi - } function build_capi_framework { @@ -149,7 +148,7 @@ function build_capi_framework { echo "Building C-API as macOS/iOS Framework" echo - run_configure --with-capi-framework ${ios_flag} --no-python --no-visualization -D TC_CAPI_FRAMEWORK_PATH=\"${framework_path}\" || exit 1 + run_configure --with-capi-framework ${ios_flag} --no-python --no-visualization -D TC_CAPI_FRAMEWORK_PATH=\"${framework_path}\" --release-opt-for-size || exit 1 mkdir -p ${target_dir} cd ${build_dir}/src/capi || exit 1 make -j ${jobs} || exit 1 diff --git a/build_python_wheel.sh b/build_python_wheel.sh index 82f528d931..c78d84ec00 100755 --- a/build_python_wheel.sh +++ b/build_python_wheel.sh @@ -110,14 +110,5 @@ rm -rf ${target_dir}/python mkdir -p ${target_dir}/python bash scripts/make_wheel.sh --skip_test --skip_cpp_test --build_number="$build_number" --num_procs=${jobs} --${build_mode} --target-dir="${install_dir}" -pushd ${build_mode}/src - -if [[ $apple -eq 1 ]]; then - find . -type f -name '*.dylib' -o -name '*.so' | xargs strip -x - -else - find . -type f -name '*.so' | xargs strip -s -fi - -find . -type f -name '*.dylib' -o -name '*.so' | xargs tar cvzf ${install_dir}/shared_objects.tar.gz diff --git a/gitlab_scripts/use_ccache.sh b/gitlab_scripts/use_ccache.sh index 7890079117..6cb4238738 100755 --- a/gitlab_scripts/use_ccache.sh +++ b/gitlab_scripts/use_ccache.sh @@ -1,4 +1,2 @@ -export CCACHE_COMPILERCHECK=content -export CCACHE_DIR=/var/ccache ccache -M 100.0G ccache -s diff --git a/scenario-tests/additional_requirements.txt b/scenario-tests/additional_requirements.txt index 31cb4c9c25..06f86460ce 100644 --- a/scenario-tests/additional_requirements.txt +++ b/scenario-tests/additional_requirements.txt @@ -1,6 +1,5 @@ beautifulsoup4 certifi==2015.04.28 -gensim==0.12.2 nltk==3.2 pyscreenshot==0.4 python-swiftclient diff --git a/scripts/make_wheel.sh b/scripts/make_wheel.sh index 705306a74a..7f05d36e10 100755 --- a/scripts/make_wheel.sh +++ b/scripts/make_wheel.sh @@ -272,11 +272,13 @@ package_wheel() { temp=`echo $WHEEL_PATH | perl -ne 'print m/(^.*-).*$/'` temp=${temp/-cpdarwin-/-cp35m-} - platform_tag="macosx_10_13_intel.macosx_10_13_x86_64" - mac_version=`sw_vers -productVersion` - if [[ $mac_version =~ ^10\.12(.\d+)? ]]; then - platform_tag="macosx_10_12_intel.macosx_10_12_x86_64" - fi + platform_tag="macosx_10_12_intel.macosx_10_12_x86_64.macosx_10_13_intel.macosx_10_13_x86_64.macosx_10_14_intel.macosx_10_14_x86_64" + # sdk_version=`xcrun --show-sdk-version` + # if [[ $sdk_version =~ ^10\.13 ]]; then + # platform_tag="macosx_10_13_intel.macosx_10_12_x86_64" + # elif [[ $sdk_version =~ ^10\.12 ]]; then + # platform_tag="macosx_10_12_intel.macosx_10_12_x86_64" + # fi NEW_WHEEL_PATH=${temp}${platform_tag}".whl" mv ${WHEEL_PATH} ${NEW_WHEEL_PATH} diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 474d36daf8..b15b9a6395 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,4 +1,4 @@ -coremltools==0.8 +coremltools==2.0b1 scipy==0.19.1 numpy==1.11.1 cython==0.24 @@ -16,4 +16,4 @@ scikit-learn==0.17.1 six==1.10.0 statsmodels==0.8.0 wheel==0.29.0 -mxnet==0.11 +mxnet==1.1.0 diff --git a/src/fileio/fileio_constants.hpp b/src/fileio/fileio_constants.hpp index 6c6691a779..683eacd982 100644 --- a/src/fileio/fileio_constants.hpp +++ b/src/fileio/fileio_constants.hpp @@ -85,6 +85,12 @@ extern size_t FILEIO_WRITER_BUFFER_SIZE; */ extern std::string S3_ENDPOINT; +/** + * \ingroup fileio + * The number of GPUs. + */ +extern int64_t NUM_GPUS; + /** * \ingroup fileio * Gets the alternative ssl certificate file and directory. diff --git a/src/fileio/fs_utils.cpp b/src/fileio/fs_utils.cpp index cccdf429ab..720fb22b1c 100644 --- a/src/fileio/fs_utils.cpp +++ b/src/fileio/fs_utils.cpp @@ -155,14 +155,10 @@ EXPORT file_status get_file_status(const std::string& path) { return file_status::MISSING; } } else if (boost::starts_with(path, "s3://")) { -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("s3:// URLs not supported."); -#else std::pair ret = is_directory(path); if (ret.first == false) return file_status::MISSING; else if (ret.second == false) return file_status::REGULAR_FILE; else if (ret.second == true) return file_status::DIRECTORY; -#endif } else if (is_web_protocol(get_protocol(path))) { return file_status::REGULAR_FILE; // some other web protocol? @@ -208,9 +204,6 @@ get_directory_listing(const std::string& path) { // failure for some reason. return with nothing } } else if (boost::starts_with(path, "s3://")) { -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("s3:// URLs not supported."); -#else list_objects_response response = list_directory(path); for (auto dir: response.directories) { ret.push_back({dir, file_status::DIRECTORY}); @@ -253,9 +246,6 @@ EXPORT bool create_directory(const std::string& path) { #ifdef TC_ENABLE_REMOTEFS } else if(boost::starts_with(path, "hdfs://")) { // hdfs -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("hdfs:// URLs not supported."); -#else std::string host, port, hdfspath; std::tie(host, port, hdfspath) = parse_hdfs_url(path); try { @@ -342,9 +332,6 @@ bool delete_path_impl(const std::string& path, return false; } } else if (boost::starts_with(path, "s3://")) { -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("s3:// URLs not supported."); -#else return delete_object(path).empty(); #endif } else { @@ -373,9 +360,6 @@ EXPORT bool delete_path_recursive(const std::string& path) { #ifdef TC_ENABLE_REMOTEFS } else if(boost::starts_with(path, "hdfs://")) { // hdfs -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("hdfs:// URLs not supported."); -#else std::string host, port, hdfspath; std::tie(host, port, hdfspath) = parse_hdfs_url(path); try { @@ -387,11 +371,7 @@ EXPORT bool delete_path_recursive(const std::string& path) { // failure for some reason. return with nothing return false; } -#endif } else if(boost::starts_with(path, "s3://")) { -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("s3:// URLs not supported."); -#else return delete_prefix(path).empty(); #endif } else { @@ -683,9 +663,6 @@ bool change_file_mode(const std::string path, short mode) { } if(boost::starts_with(path, "hdfs://")) { -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("hdfs:// URLs not supported."); -#else #ifdef HAS_HADOOP // hdfs std::string host, port, hdfspath; @@ -701,7 +678,6 @@ bool change_file_mode(const std::string path, short mode) { } #else return false; -#endif #endif } else if (boost::starts_with(path, fileio::get_cache_prefix())) { // this is a cache file. There is no filesystem. diff --git a/src/fileio/union_fstream.cpp b/src/fileio/union_fstream.cpp index 6ac0b84e78..1c06ae73ce 100644 --- a/src/fileio/union_fstream.cpp +++ b/src/fileio/union_fstream.cpp @@ -52,9 +52,6 @@ union_fstream::union_fstream(std::string url, #ifdef TC_ENABLE_REMOTEFS } else if(boost::starts_with(url, "hdfs://")) { // HDFS file type -#ifdef TC_BUILD_CAPI_IOS - log_and_throw("hdfs:// URLs not supported."); -#else type = HDFS; std::string host, port, path; std::tie(host, port, path) = fileio::parse_hdfs_url(url); @@ -78,9 +75,6 @@ union_fstream::union_fstream(std::string url, } else if (boost::starts_with(url, "s3://")) { // the S3 file type currently works by download/uploading a local file // i.e. the s3_stream simply remaps a local file stream -#ifdef TC_BUILD_CAPI_IOS - log_and_throw_io_failure("Not implemented: compiled without support for s3:// URLs."); -#else type = STD; if (is_output_stream) { output_stream = std::make_shared(url, true); diff --git a/src/serialization/oarchive.hpp b/src/serialization/oarchive.hpp index 01168fe658..0cbc4c09d4 100644 --- a/src/serialization/oarchive.hpp +++ b/src/serialization/oarchive.hpp @@ -128,7 +128,7 @@ namespace turi { inline void direct_assign(const T& t) { if (out == NULL) { expand_buf(sizeof(T)); - (*reinterpret_cast(buf + off)) = t; + std::memcpy(buf + off, &t, sizeof(T)); off += sizeof(T); } else { diff --git a/src/serialization/vector.hpp b/src/serialization/vector.hpp index 150798b6e0..8b68126e1b 100644 --- a/src/serialization/vector.hpp +++ b/src/serialization/vector.hpp @@ -56,7 +56,7 @@ namespace turi { struct vector_serialize_impl { static void exec(OutArcType& oarc, const std::vector& vec) { oarc << size_t(vec.size()); - serialize(oarc, &(vec[0]),sizeof(ValueType)*vec.size()); + serialize(oarc, vec.data(),sizeof(ValueType)*vec.size()); } }; @@ -80,7 +80,7 @@ namespace turi { size_t len; iarc >> len; vec.clear(); vec.resize(len); - deserialize(iarc, &(vec[0]), sizeof(ValueType)*vec.size()); + deserialize(iarc, vec.data(), sizeof(ValueType)*vec.size()); } }; diff --git a/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.cpp b/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.cpp index 1eb03062cb..db0c74f88b 100644 --- a/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.cpp +++ b/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.cpp @@ -25,8 +25,8 @@ void image_deep_feature_extractor_toolkit::init_options(const std::mapextract_features(data[column_name]); + bool verbose, size_t batch_size) const { + return m_feature_extractor->extract_features(data[column_name], verbose, batch_size); } } // image_deep_feature_extractor diff --git a/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.hpp b/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.hpp index 2ae69cca52..73a5201b20 100644 --- a/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.hpp +++ b/src/toolkits/image_deep_feature_extractor/image_deep_feature_extractor_toolkit.hpp @@ -25,7 +25,7 @@ class EXPORT image_deep_feature_extractor_toolkit : public ml_model_base { void init_options(const std::map& options); - gl_sarray extract_features(gl_sframe data, const std::string& column_name, bool verbose) const; + gl_sarray extract_features(gl_sframe data, const std::string& column_name, bool verbose, size_t batch_size) const; inline size_t get_version() const { return -1; } @@ -41,7 +41,7 @@ class EXPORT image_deep_feature_extractor_toolkit : public ml_model_base { REGISTER_CLASS_MEMBER_FUNCTION(image_deep_feature_extractor_toolkit::init_options, "options"); - REGISTER_CLASS_MEMBER_FUNCTION(image_deep_feature_extractor_toolkit::extract_features, "data", "column_name", "verbose"); + REGISTER_CLASS_MEMBER_FUNCTION(image_deep_feature_extractor_toolkit::extract_features, "data", "column_name", "verbose", "batch_size"); END_CLASS_MEMBER_REGISTRATION diff --git a/src/toolkits/image_deep_feature_extractor/image_feature_extractor.hpp b/src/toolkits/image_deep_feature_extractor/image_feature_extractor.hpp index 1fb1a2c2bd..4447f8f5eb 100644 --- a/src/toolkits/image_deep_feature_extractor/image_feature_extractor.hpp +++ b/src/toolkits/image_deep_feature_extractor/image_feature_extractor.hpp @@ -30,7 +30,7 @@ class image_feature_extractor { // free to perform this computation in a more optimized fashion. The input // SArray may also contain flex_string values, in which case each string is // interpreted as a URL from which the image can be loaded. - virtual gl_sarray extract_features(gl_sarray images) const = 0; + virtual gl_sarray extract_features(gl_sarray images, bool verbose, size_t batch_size) const = 0; }; } // image_deep_feature_extractor diff --git a/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.hpp b/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.hpp index 891433a37b..d2a7e4922b 100644 --- a/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.hpp +++ b/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.hpp @@ -27,7 +27,7 @@ class mlmodel_image_feature_extractor: public image_feature_extractor { // image_feature_extractor interface const CoreML::Specification::Model& coreml_spec() const override; - gl_sarray extract_features(gl_sarray images) const override; + gl_sarray extract_features(gl_sarray images, bool verbose, size_t batch_size) const override; private: // Use PIMPL pattern to hide Objective C from this C++ header. diff --git a/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.mm b/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.mm index 9a24fc4bab..f81c72f2e0 100644 --- a/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.mm +++ b/src/toolkits/image_deep_feature_extractor/mlmodel_image_feature_extractor.mm @@ -6,13 +6,17 @@ #include #include +#include #include #include #include +#include +#include #include #include #import +#include #include @@ -99,6 +103,7 @@ bool has_feature_layer_output_name(const CoreML::Specification::NeuralNetworkLay const std::map model_name_to_info = {{"resnet-50", {224, 224, 2048, "flatten0", "data", "Resnet50.mlmodel"}}, + {"VisionFeaturePrint_Screen", {299, 299, 2048, "output", "image_input", ""}}, {"squeezenet_v1.1", {227, 227, 1000, "pool10", "image", "https://docs-assets.developer.apple.com/coreml/models/SqueezeNet.mlmodel"}}}; @@ -118,34 +123,102 @@ static void checkNSError(NSError *error) { return model_info_entry->second; } +void build_vision_feature_print_screen_spec(const std::string& model_path) { + const neural_network_model_details& model_info = get_model_info("VisionFeaturePrint_Screen"); + + CoreML::Specification::Model spec = CoreML::Specification::Model(); + spec.set_specificationversion(CoreML::MLMODEL_SPECIFICATION_VERSION); + + auto* description = spec.mutable_description(); + + auto* input = description->add_input(); + input->set_name("image_input"); + auto* input_type = input->mutable_type()->mutable_imagetype(); + + input_type->set_width(model_info.input_width); + input_type->set_height(model_info.input_height); + + input_type->set_colorspace(CoreML::Specification::ImageFeatureType::BGR); + + auto* output = description->add_output(); + output->set_name("output"); + auto* output_type = output->mutable_type()->mutable_multiarraytype(); + output_type->set_datatype(CoreML::Specification::ArrayFeatureType::DOUBLE); + output_type->add_shape(model_info.feature_layer_size); + + auto vision_feature_print = spec.mutable_visionfeatureprint(); + auto scene = vision_feature_print->mutable_scene(); + scene->set_version(CoreML::Specification::CoreMLModels::VisionFeaturePrint_Scene_SceneVersion_SCENE_VERSION_1); + + // Save the model + CoreML::Result r = CoreML::Model(spec).save(model_path); + if(!r.good()) { + log_and_throw("Could not save model: " + r.message()); + } + +} static MLModel *create_model(const std::string& download_path, const std::string& model_name) { - const std::string modified_model_path = download_path + "/" + model_name + "_modified.mlmodel"; - if(! boost::filesystem::exists(modified_model_path)) { - std::string base_model_path; - const neural_network_model_details& model_info = get_model_info(model_name); - - if(turi::fileio::get_protocol(model_info.base_model_url) != "") { - base_model_path = download_path + "/" + model_name + ".mlmodel"; - logstream(LOG_PROGRESS) << "Downloading base mlmodel" << std::endl; - turi::download_url(model_info.base_model_url, base_model_path); + + const std::string compiled_modified_model_path = download_path + "/" + model_name + "_modified.mlmodelc"; + + // Create the compiled modified model, if we don't already have it + if(! boost::filesystem::exists(compiled_modified_model_path)) { + + // Create the modified model + const std::string modified_model_path = download_path + "/" + model_name + "_modified.mlmodel"; + if(model_name == "VisionFeaturePrint_Screen") { + build_vision_feature_print_screen_spec(modified_model_path); } else { - base_model_path = download_path + "/" + model_info.base_model_url; + std::string base_model_path; + const neural_network_model_details& model_info = get_model_info(model_name); + + if(turi::fileio::get_protocol(model_info.base_model_url) != "") { + base_model_path = download_path + "/" + model_name + ".mlmodel"; + logstream(LOG_PROGRESS) << "Downloading base mlmodel" << std::endl; + turi::download_url(model_info.base_model_url, base_model_path); + } else { + base_model_path = download_path + "/" + model_info.base_model_url; + } + + model_info.modify_neural_network(base_model_path, modified_model_path); } - model_info.modify_neural_network(base_model_path, modified_model_path); + @autoreleasepool { + NSError* error = nil; + + // Swallow output for the very verbose coremlcompiler + int stdoutBack = dup(STDOUT_FILENO); + int devnull = open("/dev/null", O_WRONLY); + dup2(devnull, STDOUT_FILENO); + + // Compile the modified model + NSString* temp = [NSString stringWithUTF8String:modified_model_path.c_str()]; + NSURL* specPath = [NSURL fileURLWithPath:temp]; + NSURL* modelPath = [MLModel compileModelAtURL:specPath error:&error]; + checkNSError(error); + + // Close all the file descriptors and revert back to normal + dup2(stdoutBack, STDOUT_FILENO); + close(devnull); + close(stdoutBack); + + // Copy the compiled modified model + temp = [NSString stringWithUTF8String:compiled_modified_model_path.c_str()]; + NSURL* compiledModelPath = [NSURL fileURLWithPath:temp]; + [[NSFileManager defaultManager] copyItemAtURL:modelPath toURL:compiledModelPath error:&error]; + checkNSError(error); + } } - // Load the model. + // Load the compiled modified model MLModel* result = nil; @autoreleasepool { NSError* error = nil; - NSString* temp = [NSString stringWithUTF8String:modified_model_path.c_str()]; - NSURL* specPath = [NSURL fileURLWithPath:temp]; - NSURL* modelPath = [MLModel compileModelAtURL:specPath error:&error]; - checkNSError(error); - result = [MLModel modelWithContentsOfURL:modelPath error:&error]; + NSString* temp = [NSString stringWithUTF8String:compiled_modified_model_path.c_str()]; + NSURL* compiledModelPath = [NSURL fileURLWithPath:temp]; + result = [MLModel modelWithContentsOfURL:compiledModelPath error:&error]; checkNSError(error); result = [result retain]; // Safe to retain now that no exceptions possible } @@ -162,8 +235,7 @@ static void handleCVReturn(CVReturn status) { } } - -static CVPixelBufferRef flex_image_to_CVPixelBuffer(const flex_image image) { +CVPixelBufferRef create_pixel_buffer_from_flex_image(const flex_image image) { // The code in this function is largely adapted from convertValueToImage here: // https://github.com/apple/coremltools/blob/master/coremlpython/CoreMLPythonUtils.mm @@ -243,7 +315,7 @@ static CVPixelBufferRef flex_image_to_CVPixelBuffer(const flex_image image) { } m_impl->name = model_name; - m_impl->model = create_model(download_path, model_name); // retained + m_impl->model = create_model(download_path, model_name); // Read the spec from the file written to produce the MLModel. // TODO: Just save this value before writing it to disk. @@ -262,54 +334,175 @@ static CVPixelBufferRef flex_image_to_CVPixelBuffer(const flex_image image) { } gl_sarray -mlmodel_image_feature_extractor::extract_features(gl_sarray data) const { - const neural_network_model_details& model_info = get_model_info(m_impl->name); +mlmodel_image_feature_extractor::extract_features(gl_sarray data, bool verbose, size_t kBatchSize) const { ASSERT_EQ((int)data.dtype(), (int)flex_type_enum::IMAGE); + ASSERT_TRUE(kBatchSize >= 1); + + const neural_network_model_details& model_info = get_model_info(m_impl->name); + + BOOL use_only_cpu = (turi::fileio::NUM_GPUS == 0); std::vector result(data.size()); - @autoreleasepool { - NSError* error = nil; - for(size_t i = 0; i < data.size(); i++) { + + mutex mut; + + timer tt; + tt.start(); + table_printer table( + { {"Images Processed", 0}, {"Elapsed Time", 0}, {"Percent Complete", 0} }, 0); + if (verbose) { + logprogress_stream << "Analyzing and extracting image features." << std::endl; + table.print_header(); + } + + // Lambda converting one flex_image from `data` into a MLFeatureProvider to + // feed into the CoreML model. Must be called inside an autorelease pool. + auto convert_image_to_feature_provider = [&](size_t i) { flexible_type decoded_image = image_util::resize_image(data[i], model_info.input_width, model_info.input_height, 3, true); const flex_image& image = decoded_image.get(); - CVPixelBufferRef buffer = flex_image_to_CVPixelBuffer(image); - + CVPixelBufferRef buffer = create_pixel_buffer_from_flex_image(image); MLFeatureValue* image_feature = [MLFeatureValue featureValueWithPixelBuffer:buffer]; - NSString* input_name = [NSString stringWithUTF8String: model_info.input_name.c_str()]; - MLDictionaryFeatureProvider *input = [[MLDictionaryFeatureProvider alloc] initWithDictionary:@{input_name: image_feature} error:&error]; - checkNSError(error); - id model_prediction = [m_impl->model predictionFromFeatures:input error:&error]; - checkNSError(error); + CFRelease(buffer); + NSString* input_name = [NSString stringWithUTF8String: model_info.input_name.c_str()]; + NSError *error = nil; + MLDictionaryFeatureProvider *input = [[[MLDictionaryFeatureProvider alloc] initWithDictionary:@{input_name: image_feature} error:&error] autorelease]; + checkNSError(error); // Can throw, must autorelease before here. + return input; + }; + + // Lambda converting one MLFeatureProvider output from the CoreML model into + // a flex_vec value, written to `result[i]`. Must be called inside an + // autorelease pool. + auto set_output_vector = + [&](size_t i, id model_prediction) { MLFeatureValue* deep_features = [model_prediction featureValueForName: [NSString stringWithUTF8String: model_info.feature_layer_output_name.c_str()]]; MLMultiArray* deep_features_values = [deep_features multiArrayValue]; // Santiy check prediction shape NSArray * shape = [deep_features_values shape]; - ASSERT_EQ(shape.count, (unsigned long)5); - ASSERT_EQ(shape[0].intValue, 1); - ASSERT_EQ(shape[1].intValue, 1); - ASSERT_EQ(shape[2].intValue, model_info.feature_layer_size); - ASSERT_EQ(shape[3].intValue, 1); - ASSERT_EQ(shape[4].intValue, 1); + size_t feature_dim = -1; + if(m_impl->name != "VisionFeaturePrint_Screen") { + ASSERT_EQ(shape.count, (unsigned long)5); + ASSERT_EQ(shape[0].intValue, 1); + ASSERT_EQ(shape[1].intValue, 1); + ASSERT_EQ(shape[2].intValue, model_info.feature_layer_size); + ASSERT_EQ(shape[3].intValue, 1); + ASSERT_EQ(shape[4].intValue, 1); + feature_dim = 2; + } else { + ASSERT_EQ(shape.count, (unsigned long)1); + ASSERT_EQ(shape[0].intValue, model_info.feature_layer_size); + feature_dim = 0; + } // Copy deep features to a flexible type vector - size_t deep_feature_length = shape[2].intValue; - size_t stride = deep_features_values.strides[2].intValue; + size_t deep_feature_length = shape[feature_dim].intValue; + size_t stride = deep_features_values.strides[feature_dim].intValue; flex_vec dest(deep_feature_length); double *srcPtr = (double *) deep_features_values.dataPointer; for(size_t j = 0; j < deep_feature_length; j++) { size_t offset = j * stride; dest[j] = srcPtr[offset]; } - result[i] = dest; + result[i] = std::move(dest); + }; + + // Lambda performing feature extraction on one batch of images, writing the + // output into `results`. Must be called inside an autorelease pool. + auto perform_batch = [&](size_t batch_index) { + const size_t batch_offset = batch_index * kBatchSize; + const size_t batch_end = std::min(data.size(), batch_offset + kBatchSize); + const size_t batch_size = batch_end - batch_offset; + + // Create the batch input for the CoreML model. + NSMutableArray> *inputs = + [NSMutableArray arrayWithCapacity:batch_size]; + for (size_t i = 0; i < batch_size; ++i) { + [inputs addObject: convert_image_to_feature_provider(batch_offset + i)]; + } + NSMutableArray> *outputs = + [NSMutableArray arrayWithCapacity:batch_size]; + + // The CoreML batch API only exists if the base SDK is new enough. +#ifdef HAS_CORE_ML_BATCH_INFERENCE + // Even when compiled with a new enough SDK, guard against older deployment + // targets at runtime. + if (@available(macOS 10.14, *)) { + // Invoke CoreML using the batch inference API for better performance. + MLArrayBatchProvider *image_batch = [[MLArrayBatchProvider alloc] initWithFeatureProviderArray: inputs]; + MLPredictionOptions* options = [[MLPredictionOptions alloc] init]; + [options setUsesCPUOnly:use_only_cpu]; + NSError *error = nil; + id features_batch = [m_impl->model predictionsFromBatch:image_batch options:options error:&error]; + [options release]; + [image_batch release]; + checkNSError(error); + + for (NSInteger i = 0; i < features_batch.featureProviderCount; ++i) { + [outputs addObject:[features_batch featureProviderAtIndex:i]]; + } + } else { +#else + { +#endif + // Once it's our turn to use CoreML, don't let any other threads in until + // we're done and ready to move on to the CPU-bound phase of processing. + std::lock_guard lock(mut); + for (size_t i = 0; i < batch_size; ++i) { + // Invoke the CoreML model. + NSError *error = nil; + MLPredictionOptions* options = [[MLPredictionOptions alloc] init]; + [options setUsesCPUOnly:use_only_cpu]; + id features = [m_impl->model predictionFromFeatures:inputs[i] options:options error:&error]; + [options release]; + checkNSError(error); + + // Just collect the outputs for now. Delay any copying until after we + // release the mutex. + [outputs addObject:features]; + } + } + // Convert/copy the output of the CoreML model. + for (size_t i = 0; i < batch_size; ++i) { + set_output_vector(batch_offset + i, outputs[i]); + } + }; + + // Submit batches in parallel, one for each CPU core, so that: + // - CoreML is busy all the time, assuming each core can prepare a batch + // faster than CoreML can evaluate the other n - 1 batches. + // - Every core is busy, except when there is a backlog of batches. + // - The number of batches in flight is bounded (by the number of cores). + // - The worker threads do not contend or synchronize with one another, except + // within CoreML and when joining at the very end. + std::atomic batches_completed(0); + const size_t batch_count = (data.size() + kBatchSize - 1) / kBatchSize; + parallel_for(0, batch_count, [&](size_t batch_index) { + @autoreleasepool { + + if (verbose) { + std::ostringstream d; + // For pretty printing, floor percent done + // resolution to the nearest .25% interval. Do this by multiplying by + // 400, then do integer division by the total size, then float divide + // by 4.0 + d << (double(size_t(400 * batches_completed) / batch_count) / 4.0) << '%'; + table.print_progress_row(batches_completed, batches_completed * kBatchSize, + progress_time(tt), d.str()); + } + + perform_batch(batch_index); + batches_completed++; + } // end autoreleasepool + }); + + if (verbose) { + table.print_footer(); } - - } // end autoreleasepool - return gl_sarray(result, flex_type_enum::VECTOR); } -} // image_deep_feature_extractor +} // namespace image_deep_feature_extractor } // namespace turi diff --git a/src/unity/extensions/additional_sframe_utilities.cpp b/src/unity/extensions/additional_sframe_utilities.cpp index 4b11934e9b..d7419e3f84 100644 --- a/src/unity/extensions/additional_sframe_utilities.cpp +++ b/src/unity/extensions/additional_sframe_utilities.cpp @@ -5,9 +5,11 @@ */ #include #include +#include #include #include #include +#include #include #include #include @@ -17,60 +19,57 @@ using namespace turi; template -void copy_image_to_memory(const image_type& img, T *outptr, +void copy_image_to_memory(const image_type& input, T *outptr, const std::vector& outstrides, + const std::vector& outshape, bool channel_last) { ASSERT_EQ(outstrides.size(), 3); - size_t index_h, index_w, index_c; + ASSERT_EQ(outshape.size(), 3); + size_t stride_h, stride_w, stride_c; + size_t height, width, channels; if (channel_last) { // Format: HWC - index_h = 0; - index_w = 1; - index_c = 2; + stride_h = outstrides[0]; + stride_w = outstrides[1]; + stride_c = outstrides[2]; + height = outshape[0]; + width = outshape[1]; + channels = outshape[2]; } else { // Format: CHW - index_c = 0; - index_h = 1; - index_w = 2; + stride_c = outstrides[0]; + stride_h = outstrides[1]; + stride_w = outstrides[2]; + channels = outshape[0]; + height = outshape[1]; + width = outshape[2]; } - // Decode if needed - if (!img.is_decoded()) { - char* buf = NULL; - size_t length = 0; - if (img.m_format == Format::JPG) { - decode_jpeg((const char*)img.get_image_data(), img.m_image_data_size, &buf, length); - } else if (img.m_format == Format::PNG) { - decode_png((const char*)img.get_image_data(), img.m_image_data_size, &buf, length); - } else { - ASSERT_MSG(false, "Unsupported image format"); - } - size_t cnt = 0; - for (size_t i = 0; i < img.m_height; ++i) { - for (size_t j = 0; j < img.m_width; ++j) { - for (size_t k = 0; k < img.m_channels; ++k) { - outptr[i * outstrides[index_h] + j * outstrides[index_w] + k * outstrides[index_c]] = static_cast(buf[cnt++]); - } - } - } - delete[] buf; - } else { - size_t cnt = 0; - const unsigned char* raw_data = img.get_image_data(); - for (size_t i = 0; i < img.m_height; ++i) { - for (size_t j = 0; j < img.m_width; ++j) { - for (size_t k = 0; k < img.m_channels; ++k) { - outptr[i * outstrides[index_h] + j * outstrides[index_w] + k * outstrides[index_c]] = static_cast(raw_data[cnt++]); - } + // Resize. + flexible_type resized = image_util::resize_image(input, width, height, + channels, /* decode */ true); + const image_type& img = resized.get(); + + // Copy. + size_t cnt = 0; + const unsigned char* raw_data = img.get_image_data(); + for (size_t i = 0; i < img.m_height; ++i) { + for (size_t j = 0; j < img.m_width; ++j) { + for (size_t k = 0; k < img.m_channels; ++k) { + outptr[i * stride_h + j * stride_w + k * stride_c] = + static_cast(raw_data[cnt++]); } } } + + // Further optimization is possible (but not trivial) by combining the resize + // operation and the copy operation, removing an intermediate buffer. } void copy_to_memory(const sframe_rows::row& data, float* outptr, const std::vector& outstrides, - const std::vector& field_length_p) { + const std::vector& outshape) { ASSERT_GE(data.size(), 1); for (size_t i = 0; i < data.size(); ++i) { @@ -82,7 +81,7 @@ void copy_to_memory(const sframe_rows::row& data, if (type == flex_type_enum::IMAGE) { ASSERT_MSG(data.size() == 1, "Image data only support one input field"); const image_type& img = data[0].get(); - copy_image_to_memory(img, outptr, outstrides, false); + copy_image_to_memory(img, outptr, outstrides, outshape, false); return; } else if (data.size() == 1 && (type == flex_type_enum::FLOAT || type == flex_type_enum::INTEGER)) { // Case 2: Single value type (should really get rid of this special case) @@ -91,21 +90,20 @@ void copy_to_memory(const sframe_rows::row& data, return; } else if (data.size() == 1 && type == flex_type_enum::LIST) { // Case 3: 2D arrays: list of vectors or list of lists of values - // field_length defines shape of the 2d array - ASSERT_EQ(field_length_p.size(), 2); + ASSERT_EQ(outshape.size(), 2); const flex_list& dim0_lst = data[0].to(); - ASSERT_EQ(dim0_lst.size(), field_length_p[0]); + ASSERT_EQ(dim0_lst.size(), outshape[0]); for (size_t i = 0; i < dim0_lst.size(); ++i) { auto dim1_type = dim0_lst[i].get_type(); if (dim1_type == flex_type_enum::VECTOR) { const flex_vec& dim1_vec = dim0_lst[i].to(); - ASSERT_EQ(dim1_vec.size(), field_length_p[1]); + ASSERT_EQ(dim1_vec.size(), outshape[1]); for (size_t j = 0; j < dim1_vec.size(); ++j) { outptr[outstrides[0] * i + outstrides[1] * j] = (float)(dim1_vec[j]); } } else if (dim1_type == flex_type_enum::LIST) { const flex_list& dim1_lst = dim0_lst[i].to(); - ASSERT_EQ(dim1_lst.size(), field_length_p[1]); + ASSERT_EQ(dim1_lst.size(), outshape[1]); for (size_t j = 0; j < dim1_lst.size(); ++j) { auto value_type = dim1_lst[j].get_type(); if (value_type == flex_type_enum::INTEGER || @@ -122,12 +120,12 @@ void copy_to_memory(const sframe_rows::row& data, } else { // Case 4: Array type or mixed types ASSERT_EQ(outstrides.size(), 1); + ASSERT_EQ(outshape.size(), 1); size_t pos = 0; for (size_t i = 0; i < data.size(); ++i) { auto type = data[i].get_type(); if (type == flex_type_enum::VECTOR) { const flex_vec& v = data[i].to(); - ASSERT_EQ(v.size(), field_length_p[i]); for (size_t j = 0; j < v.size(); ++j) { outptr[outstrides[0] * pos] = (float)(v[j]); ++pos; @@ -140,13 +138,14 @@ void copy_to_memory(const sframe_rows::row& data, ASSERT_MSG(false, "Unsupported type"); } } + ASSERT_EQ(pos, outshape[0]); } return; } void sframe_load_to_numpy(turi::gl_sframe input, size_t outptr_addr, - std::vector outstrides, - std::vector field_length, - size_t begin, size_t end) { + std::vector outstrides, + std::vector outshape, + size_t begin, size_t end) { if (!input.is_materialized()) { input.materialize(); } @@ -154,27 +153,41 @@ void sframe_load_to_numpy(turi::gl_sframe input, size_t outptr_addr, ASSERT_MSG(input.num_columns() > 0, "SFrame has no column"); float* outptr = reinterpret_cast(outptr_addr); + ASSERT_EQ(outstrides.size(), outshape.size()); ASSERT_GE(outstrides.size(), 1); for (size_t& stride: outstrides) { stride /= sizeof(float); } + // we consume the first index. copy_to_memory takes the rest - std::vector descendent_strides(outstrides.begin() + 1, outstrides.end()); - for (const auto& row : input.range_iterator(begin, end)) { - copy_to_memory(row, outptr, descendent_strides, field_length); - outptr += outstrides[0]; - } + size_t row_stride = outstrides[0]; + outstrides.erase(outstrides.begin()); + outshape.erase(outshape.begin()); + + const size_t num_rows = end - begin; + in_parallel([&](size_t worker_idx, size_t num_workers) { + // Compute the input range and output address for this thread. + size_t worker_begin = begin + num_rows * worker_idx / num_workers; + size_t worker_end = begin + num_rows * (worker_idx + 1) / num_workers; + float* worker_out = outptr + row_stride * (worker_begin - begin); + + for (const auto& row : input.range_iterator(worker_begin, worker_end)) { + copy_to_memory(row, worker_out, outstrides, outshape); + worker_out += row_stride; + } + }); } // Loads image into row-major array with shape HWC (height, width, channel) void image_load_to_numpy(const image_type& img, size_t outptr_addr, const std::vector& outstrides) { unsigned char *outptr = reinterpret_cast(outptr_addr); - copy_image_to_memory(img, outptr, outstrides, true); + copy_image_to_memory(img, outptr, outstrides, + {img.m_height, img.m_width, img.m_channels}, true); } BEGIN_FUNCTION_REGISTRATION -REGISTER_FUNCTION(sframe_load_to_numpy, "input", "outptr_addr", "outstrides", "field_length", "begin", "end"); +REGISTER_FUNCTION(sframe_load_to_numpy, "input", "outptr_addr", "outstrides", "outshape", "begin", "end"); REGISTER_FUNCTION(image_load_to_numpy, "img", "outptr_addr", "outstrides"); END_FUNCTION_REGISTRATION diff --git a/src/unity/lib/image_util.cpp b/src/unity/lib/image_util.cpp index 88df99fbd6..f9a8773f26 100644 --- a/src/unity/lib/image_util.cpp +++ b/src/unity/lib/image_util.cpp @@ -220,13 +220,11 @@ std::vector get_directory_files(std::string url, bool recursive) { path_status_vec_t path_status_vec = fileio::get_directory_listing(url); std::vector ret; for (const auto& path_status : path_status_vec) { - if (path_status.first[0] != '.') { - if (recursive && path_status.second == fileio::file_status::DIRECTORY) { - auto tmp = get_directory_files(path_status.first, recursive); - ret.insert(ret.end(), tmp.begin(), tmp.end()); - } else if (path_status.second == fileio::file_status::REGULAR_FILE){ - ret.push_back(path_status.first); - } + if (recursive && path_status.second == fileio::file_status::DIRECTORY) { + auto tmp = get_directory_files(path_status.first, recursive); + ret.insert(ret.end(), tmp.begin(), tmp.end()); + } else if (path_status.second == fileio::file_status::REGULAR_FILE){ + ret.push_back(path_status.first); } } return ret; @@ -340,25 +338,19 @@ flexible_type resize_image(const flexible_type& input, size_t resized_width, std::string error = "Cannot resize non-image type"; log_and_throw(error); } - const flex_image& src_image = image.get(); - // is this resize a no opt? - if (src_image.m_width == resized_width && src_image.m_height == resized_height && src_image.m_channels == resized_channels && src_image.is_decoded() == decode) { - return image; + flex_image image = input.get(); + auto has_desired_size = [&] { + return image.m_width == resized_width && image.m_height == resized_height && image.m_channels == resized_channels; + }; + + // Is this resize a no-op? + if (has_desired_size() && image.is_decoded() == decode) { + return input; } - char* resized_data; - if (src_image.is_decoded()) { - // skip decoding - image_util_detail::resize_image_impl((const char*)src_image.get_image_data(), - src_image.m_width, src_image.m_height, src_image.m_channels, resized_width, - resized_height, resized_channels, &resized_data); - } else { - // make a copy and decode - flexible_type tmp = image; - flex_image& decoded_image = tmp.mutable_get(); - image_util_detail::decode_image_impl(decoded_image); - image_util_detail::resize_image_impl((const char*)decoded_image.get_image_data(), - decoded_image.m_width, decoded_image.m_height, decoded_image.m_channels, resized_width, - resized_height, resized_channels, &resized_data); + + // Decode if necessary. + if (!image.is_decoded()) { + image_util_detail::decode_image_impl(image); } // Resize if necessary. @@ -379,9 +371,10 @@ flexible_type resize_image(const flexible_type& input, size_t resized_width, // Encode if necessary. if (!decode) { - image_util_detail::encode_image_impl(dst_img); + image_util_detail::encode_image_impl(image); } - return dst_img; + + return image; }; diff --git a/src/unity/lib/unity_sarray.cpp b/src/unity/lib/unity_sarray.cpp index f33404b140..30b3c67cc0 100644 --- a/src/unity/lib/unity_sarray.cpp +++ b/src/unity/lib/unity_sarray.cpp @@ -2902,7 +2902,6 @@ std::shared_ptr unity_sarray::plot(const std::string& path_to_client using namespace turi; using namespace turi::visualization; - logprogress_stream << "Materializing SArray" << std::endl; this->materialize(); if (this->size() == 0) { diff --git a/src/unity/lib/version_number.hpp b/src/unity/lib/version_number.hpp index a45b8d99b6..9ec8e4f2be 100644 --- a/src/unity/lib/version_number.hpp +++ b/src/unity/lib/version_number.hpp @@ -3,4 +3,4 @@ * Use of this source code is governed by a BSD-3-clause license that can * be found in the LICENSE.txt file or at https://opensource.org/licenses/BSD-3-Clause */ -#define __UNITY_VERSION__ "4.3a1"//#{{VERSION_STRING}} +#define __UNITY_VERSION__ "5.0b1"//#{{VERSION_STRING}} diff --git a/src/unity/python/setup.py b/src/unity/python/setup.py index 497f4be888..c2dd7911a6 100644 --- a/src/unity/python/setup.py +++ b/src/unity/python/setup.py @@ -13,7 +13,7 @@ from setuptools.command.install import install PACKAGE_NAME="turicreate" -VERSION='4.3.2'#{{VERSION_STRING}} +VERSION='5.0b1'#{{VERSION_STRING}} # Prevent distutils from thinking we are a pure python package class BinaryDistribution(Distribution): @@ -178,8 +178,8 @@ def run(self): "decorator >= 4.0.9", "prettytable == 0.7.2", "requests >= 2.9.1", - "mxnet >= 0.11, < 1.2.0", - "coremltools == 0.8", + "mxnet >= 1.1.0, < 1.2.0", + "coremltools == 2.0b1", "pillow >= 3.3.0", "pandas >= 0.19.0", "numpy" diff --git a/src/unity/python/turicreate/CMakeLists.txt b/src/unity/python/turicreate/CMakeLists.txt index da460708db..172ac803c7 100644 --- a/src/unity/python/turicreate/CMakeLists.txt +++ b/src/unity/python/turicreate/CMakeLists.txt @@ -43,6 +43,7 @@ ADD_CUSTOM_COMMAND( ) add_subdirectory(cython) +add_subdirectory(toolkits) set_property(DIRECTORY PROPERTY INSTALLATION_EXTENSIONS "${INSTALLATION_EXTENSIONS}") set_property(DIRECTORY PROPERTY INSTALLATION_BINARY_TARGETS "${INSTALLATION_BINARY_TARGETS}") @@ -59,11 +60,6 @@ if (APPLE) "${CMAKE_BINARY_DIR}/src/visualization/${CMAKE_BUILD_TYPE}/Turi Create Visualization.app" ) add_dependencies(visualization_client_app visualization_client) - add_custom_target( - DEPENDS unity_toolkits - COMMENT "cp tcmps dylib" - COMMAND cp -a "${CMAKE_BINARY_DIR}/src/unity/toolkits/tcmps/${CMAKE_BUILD_TYPE}/libtcmps.dylib" ${CMAKE_CURRENT_BINARY_DIR} - ) elseif(LINUX) add_custom_target( visualization_client_app ALL diff --git a/src/unity/python/turicreate/__init__.py b/src/unity/python/turicreate/__init__.py index a47e452472..c51dcdc4a0 100644 --- a/src/unity/python/turicreate/__init__.py +++ b/src/unity/python/turicreate/__init__.py @@ -79,6 +79,8 @@ import turicreate.toolkits.style_transfer as style_transfer import turicreate.toolkits.activity_classifier as activity_classifier +from turicreate.toolkits.image_analysis.image_analysis import load_images + from turicreate.toolkits import evaluation # internal util diff --git a/src/unity/python/turicreate/cython/cy_server.pyx b/src/unity/python/turicreate/cython/cy_server.pyx index f3feb9dc04..570c5a0c7f 100644 --- a/src/unity/python/turicreate/cython/cy_server.pyx +++ b/src/unity/python/turicreate/cython/cy_server.pyx @@ -63,6 +63,11 @@ class GraphLabServer(object): """ Return the logger object. """ raise NotImplementedError + def log_progress_enabled(self): + """ Return True if progress is enabled else False. """ + raise NotImplementedError + + cdef void print_status(const string& status_string) nogil: with gil: print_callback(cpp_to_str(status_string).rstrip()) @@ -83,6 +88,7 @@ class EmbeddedServer(GraphLabServer): root_path = os.path.abspath(os.path.join(root_path, os.pardir)) # sframe/ self.root_path = root_path self.started = False + self._log_progress_enabled = False if not self.unity_log: self.unity_log = default_local_conf.get_unity_log() @@ -122,11 +128,17 @@ class EmbeddedServer(GraphLabServer): def get_logger(self): return self.logger + def log_progress_enabled(self): + """ Return True if progress is enabled else False. """ + raise NotImplementedError + def set_log_progress(self, enable): if enable: set_log_progress_callback(print_status) + self._log_progress_enabled = True else: set_log_progress(False) + self._log_progress_enabled = False class QuietProgress(object): """ @@ -136,7 +148,10 @@ class QuietProgress(object): def __init__(self, verbose): self.verbose = verbose def __enter__(self): + server = _connect.main.get_server() + self.log_progress_enabled = server.log_progress_enabled if not self.verbose: - _connect.main.get_server().set_log_progress(False) + server.set_log_progress(False) + def __exit__(self, type, value, traceback): - _connect.main.get_server().set_log_progress(False) + _connect.main.get_server().set_log_progress(self.log_progress_enabled) diff --git a/src/unity/python/turicreate/mx/_mx_sframe_iter.py b/src/unity/python/turicreate/mx/_mx_sframe_iter.py index 8879ead946..bdb8a1ce3e 100644 --- a/src/unity/python/turicreate/mx/_mx_sframe_iter.py +++ b/src/unity/python/turicreate/mx/_mx_sframe_iter.py @@ -23,15 +23,15 @@ from turicreate import extensions as sf_extension -def _copy_from_sframe(sf, buf, start, end, field_length, bias=0): +def _copy_from_sframe(sf, buf, start, end, shape, bias=0): assert isinstance(sf, SFrame) - sf_extension.sframe_load_to_numpy(sf, buf.ctypes.data + buf.strides[0] * bias, buf.strides, field_length, start, end) + sf_extension.sframe_load_to_numpy(sf, buf.ctypes.data + buf.strides[0] * bias, buf.strides, shape, start, end) -def _copy_from_sarray(sa, buf, start, end, field_length, bias=0): +def _copy_from_sarray(sa, buf, start, end, shape, bias=0): assert isinstance(sa, SArray) sf = SFrame({'__tmp__': sa}) - _copy_from_sframe(sf, buf, start, end, [field_length], bias) + _copy_from_sframe(sf, buf, start, end, shape, bias) def _init_data(data, allow_empty, default_name): @@ -115,12 +115,10 @@ def __init__(self, sframe, data_field, label_field=None, batch_size=1, data_name self.label_sframe = sframe[label_field] # allocate ndarray - inferred_shape = self.infer_shape() - data_shape = list(inferred_shape["final_shape"]) + data_shape = list(self.infer_shape()) data_shape.insert(0, batch_size) self.data_shape = tuple(data_shape) self.label_shape = (batch_size, ) - self.field_length = inferred_shape["field_length"] self.data_ndarray = np.zeros(self.data_shape, dtype=np.float32) self.label_ndarray = np.zeros(self.label_shape, dtype=np.float32) self.data_mx_ndarray = None @@ -176,7 +174,7 @@ def _infer_column_shape(self, sarray): return (first_image.channels, first_image.height, first_image.width) def infer_shape(self): - ret = {"field_length": [], "final_shape": None} + ret = None features = self.data_sframe.column_names() assert len(features) > 0 if len(features) > 1: @@ -187,22 +185,16 @@ def infer_shape(self): if len(colshape) != 1: raise ValueError('Only one column is allowed if input is image typed') shape += colshape[0] - ret["field_length"].append(colshape[0]) - ret["final_shape"] = (shape,) + ret = (shape,) else: - col_shape = self._infer_column_shape(self.data_sframe[features[0]]) - ret["final_shape"] = col_shape - length = 1 - for x in col_shape: - length = length * x - ret["field_length"].append(length) + ret = self._infer_column_shape(self.data_sframe[features[0]]) return ret def _copy(self, start, end, bias=0): - _copy_from_sframe(self.data_sframe, self.data_ndarray, start, end, self.field_length, bias) + _copy_from_sframe(self.data_sframe, self.data_ndarray, start, end, self.data_shape, bias) self.data_mx_ndarray = None if self.label_field is not None: - _copy_from_sarray(self.label_sframe, self.label_ndarray, start, end, 1, bias) + _copy_from_sarray(self.label_sframe, self.label_ndarray, start, end, (self.batch_size, 1), bias) self.label_mx_ndarray = None def iter_next(self): @@ -256,6 +248,8 @@ class SFrameImageIter(SFrameIter): label field in SFrame batch_size : int, optional batch size + image_shape : tuple, optional + if specified, each image will be resized to this (channel, height, width) mean_r : float, optional normalize the image by subtracting the mean value of r channel, or the first channel for mean_g : float, optional @@ -284,10 +278,11 @@ class SFrameImageIter(SFrameIter): Notes ----- - - Image column must contain images of the same size. + - Image column must contain images of the same size if image_shape is not provided. """ def __init__(self, sframe, data_field, label_field=None, batch_size=1, + image_shape=None, data_name='data', label_name='softmax_label', mean_r=0.0, mean_g=0.0, @@ -296,6 +291,10 @@ def __init__(self, sframe, data_field, label_field=None, batch_size=1, scale=1.0, random_flip=False, **kwargs): + if image_shape is not None and len(image_shape) != 3: + raise ValueError('image_shape must be a (channels, height, width) tuple') + self.image_shape = image_shape + super(SFrameImageIter, self).__init__(sframe, data_field, label_field, batch_size, data_name, label_name) @@ -340,6 +339,9 @@ def _infer_column_shape(self, sarray): if not dtype is Image: raise TypeError('Data column must be image type') + if self.image_shape is not None: + return self.image_shape + first_image = sarray.head(1)[0] if first_image is None: raise ValueError('Column cannot contain missing value') diff --git a/src/unity/python/turicreate/test/test_image_classifier.py b/src/unity/python/turicreate/test/test_image_classifier.py index 1728f74ba2..11ee346114 100644 --- a/src/unity/python/turicreate/test/test_image_classifier.py +++ b/src/unity/python/turicreate/test/test_image_classifier.py @@ -18,10 +18,12 @@ import coremltools import platform -def _get_data(num_examples = 100): +def _get_data(num_examples = 100, label_type = int): from PIL import Image as _PIL_Image import numpy as np + assert(label_type in [str, int]) + rs = np.random.RandomState(1234) _format = {'JPG': 0, 'PNG': 1, 'RAW': 2, 'UNDEFINED': 3} @@ -47,12 +49,20 @@ def from_pil_image(pil_img): return img images = [] - random_labels = [rs.randint(0,5) for i in range(num_examples)] + if label_type == int: + random_labels = [rs.randint(0,5) for _ in range(num_examples)] + else: + random_labels = [rs.choice(['a', 'b', 'c', 'd', 'e']) for _ in range(num_examples)] for i in range(num_examples): img_shape = tuple(rs.randint(100, 1000, size=2)) + (3,) img = rs.randint(255, size=img_shape) - label = random_labels[i] + # Give a slight color hint about the label + if label_type == int: + label = int(random_labels[i]) + else: + label = ord(random_labels[i]) - ord('a') + img = (img + [label * 3, 0, -label * 3]).clip(0, 255) pil_img = _PIL_Image.fromarray(img, mode='RGB') images.append(from_pil_image(pil_img)) @@ -63,14 +73,15 @@ def from_pil_image(pil_img): class ImageClassifierTest(unittest.TestCase): @classmethod - def setUpClass(self, model='resnet-50', input_image_shape=(3, 224, 224), tol=0.02, num_examples = 100): + def setUpClass(self, model = 'resnet-50', input_image_shape = (3, 224, 224), tol=0.02, + num_examples = 100, label_type = int): self.feature = 'awesome_image' self.target = 'awesome_label' self.input_image_shape = input_image_shape self.pre_trained_model = model self.tolerance = tol - self.sf = _get_data(num_examples) + self.sf = _get_data(num_examples = num_examples, label_type = label_type) self.model = tc.image_classifier.create(self.sf, target=self.target, model=self.pre_trained_model, seed=42) @@ -161,8 +172,8 @@ def test_export_coreml_with_predict(self): coreml_model = coremltools.models.MLModel(filename) img = self.sf[0:1][self.feature][0] img_fixed = tc.image_analysis.resize(img, *reversed(self.input_image_shape)) - import PIL - pil_img = PIL.Image.fromarray(img_fixed.pixel_data) + from PIL import Image + pil_img = Image.fromarray(img_fixed.pixel_data) if _mac_ver() >= (10, 13): classes = self.model.classifier.classes @@ -238,6 +249,16 @@ def setUpClass(self): input_image_shape=(3, 227, 227), tol=0.005, num_examples = 200) +# TODO: if on skip OS, test negative case +@unittest.skipIf(_mac_ver() < (10,14), 'VisionFeaturePrint_Screen only supported on macOS 10.14+') +class VisionFeaturePrintScreenTest(ImageClassifierTest): + @classmethod + def setUpClass(self): + super(VisionFeaturePrintScreenTest, self).setUpClass(model='VisionFeaturePrint_Screen', + input_image_shape=(3, 299, 299), + tol=0.005, num_examples = 100, + label_type = str) + @unittest.skipIf(tc.util._num_available_cuda_gpus() == 0, 'Requires CUDA GPU') @pytest.mark.gpu diff --git a/src/unity/python/turicreate/test/test_image_similarity.py b/src/unity/python/turicreate/test/test_image_similarity.py index 1d23b3c409..869635a90c 100644 --- a/src/unity/python/turicreate/test/test_image_similarity.py +++ b/src/unity/python/turicreate/test/test_image_similarity.py @@ -17,7 +17,7 @@ from turicreate.toolkits._main import ToolkitError as _ToolkitError -def _get_data(): +def _get_data(image_length): from PIL import Image as _PIL_Image random = np.random.RandomState(100) @@ -45,7 +45,7 @@ def from_pil_image(pil_img): return img num_examples = 100 - dims = (224, 224) + dims = (image_length, image_length) total_dims = dims[0] * dims[1] images = [] for i in range(num_examples): @@ -64,13 +64,13 @@ def rand_image(): class ImageSimilarityTest(unittest.TestCase): @classmethod - def setUpClass(self, model = 'resnet-50'): + def setUpClass(self, input_image_shape = (3,224,224), model = 'resnet-50'): """ The setup class method for the basic test case with all default values. """ self.feature = 'awesome_image' self.label = None - self.input_image_shape = (3, 224, 224) + self.input_image_shape = input_image_shape self.pre_trained_model = model ## Create the model @@ -80,7 +80,7 @@ def setUpClass(self, model = 'resnet-50'): } # Model - self.sf = _get_data() + self.sf = _get_data(self.input_image_shape[2]) self.model = tc.image_similarity.create(self.sf, feature=self.feature, label=None, model=self.pre_trained_model) self.nn_model = self.model.feature_extractor @@ -197,7 +197,7 @@ def test_export_coreml(self): # Compare distances coreml_distances = np.array(sorted(coreml_ret['distance'])) tc_distances = tc_ret['distance'].to_numpy() - self.assertListAlmostEquals(tc_distances, coreml_distances, 0.02) + self.assertListAlmostEquals(tc_distances, coreml_distances, 0.025) def test_save_and_load(self): with test_util.TempDirectory() as filename: @@ -218,6 +218,11 @@ def test_save_and_load(self): self.test_export_coreml() print("Export coreml passed") +class ImageSimilaritySqueezeNetTest(ImageSimilarityTest): + @classmethod + def setUpClass(self): + super(ImageSimilaritySqueezeNetTest, self).setUpClass(model='squeezenet_v1.1', + input_image_shape=(3, 227, 227)) @unittest.skipIf(tc.util._num_available_cuda_gpus() == 0, 'Requires CUDA GPU') @pytest.mark.gpu diff --git a/src/unity/python/turicreate/test/test_object_detector.py b/src/unity/python/turicreate/test/test_object_detector.py index fa4812b28b..1c1d926f82 100644 --- a/src/unity/python/turicreate/test/test_object_detector.py +++ b/src/unity/python/turicreate/test/test_object_detector.py @@ -290,6 +290,7 @@ def test_export_coreml(self): @unittest.skipIf(_mac_ver() < (10, 14), "Non-maximum suppression is only supported on MacOS 10.14+.") def test_export_coreml_with_non_maximum_suppression(self): + from PIL import Image filename = tempfile.mkstemp('bingo.mlmodel')[1] self.model.export_coreml(filename, include_non_maximum_suppression=True) @@ -317,8 +318,6 @@ def test_export_coreml_with_non_maximum_suppression(self): model2 = tc.object_detector.create(sf, max_iterations=1) model2.export_coreml(filename2, include_non_maximum_suppression=True) - - @unittest.skipIf(sys.platform != 'darwin' or _mac_ver() >= (10, 14), "GPU selection should fail on macOS 10.13 or below") def test_no_gpu_support_on_unsupported_macos(self): diff --git a/src/unity/python/turicreate/test/test_style_transfer.py b/src/unity/python/turicreate/test/test_style_transfer.py index 8ea7e287d9..ec96f7196b 100644 --- a/src/unity/python/turicreate/test/test_style_transfer.py +++ b/src/unity/python/turicreate/test/test_style_transfer.py @@ -71,12 +71,13 @@ def setUpClass(self): self.pre_trained_model = 'resnet-16' ## Create the model # Model - self.style_sf = _get_data(feature=self.feature, num_examples=_NUM_STYLES) - self.content_sf = _get_data(feature=self.feature) + self.style_sf = _get_data(feature=self.style_feature, num_examples=_NUM_STYLES) + self.content_sf = _get_data(feature=self.content_feature) self.num_styles = _NUM_STYLES self.model = tc.style_transfer.create(self.style_sf, self.content_sf, - feature=self.feature, + style_feature=self.style_feature, + content_feature=self.content_feature, max_iterations=0, model=self.pre_trained_model) @@ -144,7 +145,13 @@ def test_stylize_success(self): # Check the structure of the output _raise_error_if_not_sframe(stylized_out) - self.assertEqual(len(stylized_out), len(sf)) + if style is None: + num_styles = self.num_styles + elif isinstance(style, list): + num_styles = len(style) + else: + num_styles = 1 + self.assertEqual(len(stylized_out), len(sf)*num_styles) # Check if input and output image have the same shape input_size = (sf[self.content_feature][0].width, sf[self.content_feature][0].height) @@ -199,8 +206,8 @@ def test_export_coreml(self): if _mac_ver() >= (10, 13): index_data = np.zeros(self.num_styles) index_data[0] = 1 - coreml_output = coreml_model.predict({self.feature: pil_img, 'index':index_data}, usesCPUOnly = True) - img = coreml_output[coreml_output.keys()[0]] + coreml_output = coreml_model.predict({self.content_feature: pil_img, 'index':index_data}, usesCPUOnly = True) + img = next(iter(coreml_output.values())) img = np.asarray(img) img = img[..., 0:3] @@ -215,14 +222,6 @@ def test_export_coreml(self): model2 = tc.style_transfer.create(self.style_sf, self.content_sf, max_iterations=1) model2.export_coreml(filename2) - @unittest.skipIf(sys.platform != 'darwin', 'Only supported on Mac') - def test_no_gpu_mac_support(self): - num_gpus = tc.config.get_num_gpus() - tc.config.set_num_gpus(1) - with self.assertRaises(_ToolkitError): - tc.style_transfer.create(self.style_sf, self.content_sf, max_iterations=1) - tc.config.set_num_gpus(num_gpus) - def test_repr(self): model = self.model self.assertEqual(type(str(model)), str) @@ -239,14 +238,14 @@ def test_save_and_load(self): print("Get styles passed") -@unittest.skipIf(tc.util._num_available_gpus() == 0, 'Requires GPU') +@unittest.skipIf(tc.util._num_available_cuda_gpus() == 0, 'Requires CUDA GPU') @pytest.mark.gpu class StyleTransferGPUTest(unittest.TestCase): @classmethod def setUpClass(self): - self.feature = 'image' - self.style_sf = _get_data(feature=self.feature) - self.content_sf = _get_data(feature=self.feature) + self.style_feature = self.content_feature = 'image' + self.style_sf = _get_data(feature=self.style_feature) + self.content_sf = _get_data(feature=self.content_feature) def test_gpu_save_load_export(self): diff --git a/src/unity/python/turicreate/toolkits/_image_feature_extractor.py b/src/unity/python/turicreate/toolkits/_image_feature_extractor.py index dbd220e941..cbc45df61c 100644 --- a/src/unity/python/turicreate/toolkits/_image_feature_extractor.py +++ b/src/unity/python/turicreate/toolkits/_image_feature_extractor.py @@ -28,12 +28,13 @@ def _create_feature_extractor(model_name): os.makedirs(download_path) if(model_name == 'resnet-50'): - # TODO: save converted model on developer.apple.com - from turicreate.toolkits import _pre_trained_models - mxnetResNet = _pre_trained_models.ResNetImageClassifier() - feature_extractor = MXFeatureExtractor(mxnetResNet) - mlModel = feature_extractor.get_coreml_model() - mlModel.save(download_path + "/Resnet50.mlmodel") + mlmodel_resnet_save_path = download_path + "/Resnet50.mlmodel" + if not os.path.exists(mlmodel_resnet_save_path): + from turicreate.toolkits import _pre_trained_models + mxnetResNet = _pre_trained_models.ResNetImageClassifier() + feature_extractor = MXFeatureExtractor(mxnetResNet) + mlModel = feature_extractor.get_coreml_model() + mlModel.save(mlmodel_resnet_save_path) result = extensions.__dict__["image_deep_feature_extractor"]() result.init_options({'model_name': model_name, 'download_path': download_path}) @@ -103,7 +104,7 @@ def _get_mx_module(mxmodel, data_layer, feature_layer, context, model.set_params(arg_params, aux_params) return model - def extract_features(self, dataset, feature, batch_size=512, verbose=False): + def extract_features(self, dataset, feature, batch_size=64, verbose=False): """ Parameters ---------- @@ -111,48 +112,74 @@ def extract_features(self, dataset, feature, batch_size=512, verbose=False): SFrame of images """ from ..mx import SFrameImageIter as _SFrameImageIter + from six.moves.queue import Queue as _Queue + from threading import Thread as _Thread import turicreate as _tc import array if len(dataset) == 0: return _tc.SArray([], array.array) - # Resize images if needed - preprocessed_dataset = _tc.SFrame() - if verbose: - print("Resizing images...") - preprocessed_dataset[feature] = _tc.image_analysis.resize( - dataset[feature], *tuple(reversed(self.image_shape))) - batch_size = min(len(dataset), batch_size) # Make a data iterator - dataIter = _SFrameImageIter(sframe=preprocessed_dataset, data_field=[feature], batch_size=batch_size) + dataIter = _SFrameImageIter(sframe=dataset, data_field=[feature], batch_size=batch_size, image_shape=self.image_shape) # Setup the MXNet model model = MXFeatureExtractor._get_mx_module(self.ptModel.mxmodel, self.data_layer, self.feature_layer, self.context, self.image_shape, batch_size) out = _tc.SArrayBuilder(dtype = array.array) - num_processed = 0 + progress = { 'num_processed' : 0, 'total' : len(dataset) } if verbose: print("Performing feature extraction on resized images...") - while dataIter.has_next: - if dataIter.data_shape[1:] != self.image_shape: - raise RuntimeError("Expected image of size %s. Got %s instead." % ( - self.image_shape, dataIter.data_shape[1:])) - model.forward(next(dataIter)) + + # Encapsulates the work done by the MXNet model for a single batch + def handle_request(batch): + model.forward(batch) mx_out = [array.array('d',m) for m in model.get_outputs()[0].asnumpy()] - if dataIter.getpad() != 0: + if batch.pad != 0: # If batch size is not evenly divisible by the length, it will loop back around. # We don't want that. - mx_out = mx_out[:-dataIter.getpad()] + mx_out = mx_out[:-batch.pad] + return mx_out + + # Copies the output from MXNet into the SArrayBuilder and emits progress + def consume_response(mx_out): out.append_multiple(mx_out) - num_processed += batch_size - num_processed = min(len(dataset), num_processed) + progress['num_processed'] += len(mx_out) if verbose: print('Completed {num_processed:{width}d}/{total:{width}d}'.format( - num_processed = num_processed, total=len(dataset), width = len(str(len(dataset))))) + width = len(str(progress['total'])), **progress)) + + # Create a dedicated thread for performing MXNet work, using two FIFO + # queues for communication back and forth with this thread, with the + # goal of keeping MXNet busy throughout. + request_queue = _Queue() + response_queue = _Queue() + def mx_worker(): + while True: + batch = request_queue.get() # Consume request + if batch is None: + # No more work remains. Allow the thread to finish. + return + response_queue.put(handle_request(batch)) # Produce response + mx_worker_thread = _Thread(target=mx_worker) + mx_worker_thread.start() + + try: + # Attempt to have two requests in progress at any one time (double + # buffering), so that the iterator is creating one batch while MXNet + # performs inference on the other. + if dataIter.has_next: + request_queue.put(next(dataIter)) # Produce request + while dataIter.has_next: + request_queue.put(next(dataIter)) # Produce request + consume_response(response_queue.get()) + consume_response(response_queue.get()) + finally: + # Tell the worker thread to shut down. + request_queue.put(None) return out.close() diff --git a/src/unity/python/turicreate/toolkits/_mxnet_utils.py b/src/unity/python/turicreate/toolkits/_mxnet_utils.py index 170019e9fa..4ce96ffb19 100644 --- a/src/unity/python/turicreate/toolkits/_mxnet_utils.py +++ b/src/unity/python/turicreate/toolkits/_mxnet_utils.py @@ -127,19 +127,3 @@ def load_net_params_from_state(net_params, state, ctx = None): #net_params[k].set_data(net_params_dict[k]) net_params[k]._load_init(net_params_dict[k], ctx) return net_params - - -# mean subtraction -def subtract_imagenet_mean(batch): - """Subtract ImageNet mean from RGB image""" - from mxnet import nd - batch = batch * 255.0 - batch = nd.swapaxes(batch,0, 1) - (r, g, b) = nd.split(batch, num_outputs=3, axis=0) - mean_values = [123.68, 116.779, 103.939] - r = r - mean_values[0] - g = g - mean_values[1] - b = b - mean_values[2] - batch = nd.concat(r, g, b, dim=0) - batch = nd.swapaxes(batch,0, 1) - return batch diff --git a/src/unity/python/turicreate/toolkits/activity_classifier/_sframe_sequence_iterator.py b/src/unity/python/turicreate/toolkits/activity_classifier/_sframe_sequence_iterator.py index a436928fee..e5b24afd4f 100644 --- a/src/unity/python/turicreate/toolkits/activity_classifier/_sframe_sequence_iterator.py +++ b/src/unity/python/turicreate/toolkits/activity_classifier/_sframe_sequence_iterator.py @@ -51,7 +51,8 @@ def _load_into_numpy(sf, np_array, start, end, strides=None, shape=None): np_array[:] = 0.0 np_array_2d = np_array.reshape((np_array.shape[0], np_array.shape[1] * np_array.shape[2])) _extensions.sframe_load_to_numpy(sf, np_array.ctypes.data, - np_array_2d.strides, np_array_2d.shape[1:], start, end) + np_array_2d.strides, np_array_2d.shape, + start, end) class SFrameSequenceIter(_mx.io.DataIter): diff --git a/src/unity/python/turicreate/toolkits/image_analysis/image_analysis.py b/src/unity/python/turicreate/toolkits/image_analysis/image_analysis.py index 65142a7723..6d881d318c 100644 --- a/src/unity/python/turicreate/toolkits/image_analysis/image_analysis.py +++ b/src/unity/python/turicreate/toolkits/image_analysis/image_analysis.py @@ -56,7 +56,7 @@ def load_images(url, format='auto', with_path=True, recursive=True, ignore_failu """ from ... import extensions as _extensions from ...util import _make_internal_url - return _extensions.load_images(_make_internal_url(url), format, with_path, + return _extensions.load_images(url, format, with_path, recursive, ignore_failure, random_order) diff --git a/src/unity/python/turicreate/toolkits/image_classifier/image_classifier.py b/src/unity/python/turicreate/toolkits/image_classifier/image_classifier.py index 85f6f25e23..feaa3b21a9 100644 --- a/src/unity/python/turicreate/toolkits/image_classifier/image_classifier.py +++ b/src/unity/python/turicreate/toolkits/image_classifier/image_classifier.py @@ -20,6 +20,7 @@ from turicreate.toolkits._main import ToolkitError as _ToolkitError from turicreate.toolkits._model import PythonProxy as _PythonProxy from turicreate import config as _tc_config +from .._internal_utils import _mac_ver from .. import _mxnet_utils from .. import _pre_trained_models from .. import _image_feature_extractor @@ -27,7 +28,8 @@ _numeric_param_check_range) def create(dataset, target, feature = None, model = 'resnet-50', - max_iterations=10, verbose=True, seed=None): + validation_set='auto', max_iterations = 10, verbose = True, + seed = None, batch_size=64): """ Create a :class:`ImageClassifier` model. @@ -53,14 +55,30 @@ def create(dataset, target, feature = None, model = 'resnet-50', feature. model : string optional - Uses a pretrained model to bootstrap an image classifier + Uses a pretrained model to bootstrap an image classifier: - "resnet-50" : Uses a pretrained resnet model. + Exported Core ML model will be ~90M. + - "squeezenet_v1.1" : Uses a pretrained squeezenet model. + Exported Core ML model will be ~4.7M. + + - "VisionFeaturePrint_Screen": Uses an OS internal feature extractor. + Only on available on iOS,tvOS 12.0+, + macOS 10.14+. + Exported Core ML model will be ~41K. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. + validation_set : SFrame, optional + A dataset for monitoring the model's generalization performance. + The format of this SFrame must be the same as the training set. + By default this argument is set to 'auto' and a validation set is + automatically sampled and used for progress printing. If + validation_set is set to None, then no additional metrics + are computed. The default value is 'auto'. + max_iterations : float, optional The maximum number of allowed passes through the data. More passes over the data can result in a more accurately trained model. Consider @@ -74,6 +92,10 @@ def create(dataset, target, feature = None, model = 'resnet-50', Seed for random number generation. Set this value to ensure that the same model is created every time. + batch_size : int, optional + If you are getting memory errors, try decreasing this value. If you + have a powerful computer, increasing this value may improve performance. + Returns ------- out : ImageClassifier @@ -102,7 +124,7 @@ def create(dataset, target, feature = None, model = 'resnet-50', # Check model parameter allowed_models = list(_pre_trained_models.MODELS.keys()) if _mac_ver() >= (10,14): - allowed_models.append('sceneVisionFeaturePrint_v1') + allowed_models.append('VisionFeaturePrint_Screen') _tkutl._check_categorical_option_type('model', model, allowed_models) # Check dataset parameter @@ -113,6 +135,12 @@ def create(dataset, target, feature = None, model = 'resnet-50', if target not in dataset.column_names(): raise _ToolkitError("Target column '%s' does not exist" % target) + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") + + if not (isinstance(validation_set, _tc.SFrame) or validation_set == 'auto' or validation_set is None): + raise TypeError("Unrecognized value for 'validation_set'.") + if feature is None: feature = _tkutl._find_only_image_column(dataset) @@ -121,8 +149,15 @@ def create(dataset, target, feature = None, model = 'resnet-50', # Extract features extracted_features = _tc.SFrame({ target: dataset[target], - '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose), + '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose, batch_size=batch_size), }) + if isinstance(validation_set, _tc.SFrame): + extracted_features_validation = _tc.SFrame({ + target: validation_set[target], + '__image_features__': feature_extractor.extract_features(validation_set, feature, verbose=verbose, batch_size=batch_size), + }) + else: + extracted_features_validation = validation_set # Train a classifier using the extracted features extracted_features[target] = dataset[target] @@ -130,10 +165,15 @@ def create(dataset, target, feature = None, model = 'resnet-50', features=['__image_features__'], target=target, max_iterations=max_iterations, + validation_set=extracted_features_validation, seed=seed, verbose=verbose) - input_image_shape = _pre_trained_models.MODELS[model].input_image_shape + # set input image shape + if model in _pre_trained_models.MODELS: + input_image_shape = _pre_trained_models.MODELS[model].input_image_shape + else: # model == VisionFeaturePrint_Screen + input_image_shape = (3, 299, 299) # Save the model state = { @@ -197,7 +237,11 @@ def _load_version(cls, state, version): state['classes'] = state['classifier'].classes # Load pre-trained model & feature extractor - state['feature_extractor'] = _image_feature_extractor._create_feature_extractor(state['model']) + model_name = state['model'] + if model_name == "VisionFeaturePrint_Screen" and _mac_ver() < (10,14): + raise ToolkitError("Can not load model on this operating system. This model uses VisionFeaturePrint_Screen, " \ + "which is only supported on macOS 10.14 and higher.") + state['feature_extractor'] = _image_feature_extractor._create_feature_extractor(model_name) state['input_image_shape'] = tuple([int(i) for i in state['input_image_shape']]) return ImageClassifier(state) @@ -256,7 +300,7 @@ def _get_summary_struct(self): section_titles = ['Schema', 'Training summary'] return([model_fields, training_fields], section_titles) - def predict(self, dataset, output_type='class'): + def predict(self, dataset, output_type='class', batch_size=64): """ Return predictions for ``dataset``, using the trained logistic regression model. Predictions can be generated as class labels, @@ -293,6 +337,10 @@ class as a vector. The probability of the first class (sorted - 'class': Class prediction. For multi-class classification, this returns the class with maximum probability. + batch_size : int, optional + If you are getting memory errors, try decreasing this value. If you + have a powerful computer, increasing this value may improve performance. + Returns ------- out : SArray @@ -311,16 +359,18 @@ class as a vector. The probability of the first class (sorted """ if not isinstance(dataset, (_tc.SFrame, _tc.SArray, _tc.Image)): raise TypeError('dataset must be either an SFrame, SArray or turicreate.Image') + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") if isinstance(dataset, _tc.SArray): dataset = _tc.SFrame({self.feature: dataset}) elif isinstance(dataset, _tc.Image): dataset = _tc.SFrame({self.feature: [dataset]}) - extracted_features = self._extract_features(dataset) + extracted_features = self._extract_features(dataset, batch_size=batch_size) return self.classifier.predict(extracted_features, output_type=output_type) - def classify(self, dataset): + def classify(self, dataset, batch_size=64): """ Return a classification, for each example in the ``dataset``, using the trained logistic regression model. The output SFrame contains predictions @@ -335,6 +385,10 @@ def classify(self, dataset): names as the features used for model training, but does not require a target column. Additional columns are ignored. + batch_size : int, optional + If you are getting memory errors, try decreasing this value. If you + have a powerful computer, increasing this value may improve performance. + Returns ------- out : SFrame @@ -351,16 +405,18 @@ def classify(self, dataset): """ if not isinstance(dataset, (_tc.SFrame, _tc.SArray, _tc.Image)): raise TypeError('dataset must be either an SFrame, SArray or turicreate.Image') + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") if isinstance(dataset, _tc.SArray): dataset = _tc.SFrame({self.feature: dataset}) elif isinstance(dataset, _tc.Image): dataset = _tc.SFrame({self.feature: [dataset]}) - extracted_features = self._extract_features(dataset) + extracted_features = self._extract_features(dataset, batch_size=batch_size) return self.classifier.classify(extracted_features) - def predict_topk(self, dataset, output_type="probability", k=3): + def predict_topk(self, dataset, output_type="probability", k=3, batch_size=64): """ Return top-k predictions for the ``dataset``, using the trained model. Predictions are returned as an SFrame with three columns: `id`, @@ -417,6 +473,8 @@ def predict_topk(self, dataset, output_type="probability", k=3): """ if not isinstance(dataset, (_tc.SFrame, _tc.SArray, _tc.Image)): raise TypeError('dataset must be either an SFrame, SArray or turicreate.Image') + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") if isinstance(dataset, _tc.SArray): dataset = _tc.SFrame({self.feature: dataset}) @@ -426,7 +484,7 @@ def predict_topk(self, dataset, output_type="probability", k=3): extracted_features = self._extract_features(dataset) return self.classifier.predict_topk(extracted_features, output_type = output_type, k = k) - def evaluate(self, dataset, metric='auto', verbose=True): + def evaluate(self, dataset, metric='auto', verbose=True, batch_size=64): """ Evaluate the model by making predictions of target values and comparing these to actual values. @@ -457,6 +515,10 @@ def evaluate(self, dataset, metric='auto', verbose=True): verbose : bool, optional If True, prints progress updates and model details. + batch_size : int, optional + If you are getting memory errors, try decreasing this value. If you + have a powerful computer, increasing this value may improve performance. + Returns ------- out : dict @@ -475,13 +537,16 @@ def evaluate(self, dataset, metric='auto', verbose=True): >>> results = model.evaluate(data) >>> print results['accuracy'] """ - extracted_features = self._extract_features(dataset, verbose=verbose) + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") + + extracted_features = self._extract_features(dataset, verbose=verbose, batch_size=batch_size) extracted_features[self.target] = dataset[self.target] return self.classifier.evaluate(extracted_features, metric = metric) - def _extract_features(self, dataset, verbose=False): + def _extract_features(self, dataset, verbose=False, batch_size=64): return _tc.SFrame({ - '__image_features__': self.feature_extractor.extract_features(dataset, self.feature, verbose=verbose) + '__image_features__': self.feature_extractor.extract_features(dataset, self.feature, verbose=verbose, batch_size=batch_size) }) def export_coreml(self, filename): @@ -496,79 +561,191 @@ def export_coreml(self, filename): -------- >>> model.export_coreml('myModel.mlmodel') """ - ptModel = _pre_trained_models.MODELS[self.model]() - feature_extractor = _image_feature_extractor.MXFeatureExtractor(ptModel) - - coreml_model = feature_extractor.get_coreml_model() - spec = coreml_model.get_spec() - nn_spec = spec.neuralNetworkClassifier - num_classes = self.num_classes - - # Replace the softmax layer with new coeffients - fc_layer = nn_spec.layers[-2] - fc_layer_params = fc_layer.innerProduct - fc_layer_params.outputChannels = self.classifier.num_classes - inputChannels = fc_layer_params.inputChannels - fc_layer_params.hasBias = True - - coefs = self.classifier.coefficients - weights = fc_layer_params.weights - bias = fc_layer_params.bias - del weights.floatValue[:] - del bias.floatValue[:] - - import numpy as np - W = np.array(coefs[coefs['index'] != None]['value'], ndmin = 2).reshape( - inputChannels, num_classes - 1, order = 'F') - b = coefs[coefs['index'] == None]['value'] - Wa = np.hstack((np.zeros((inputChannels, 1)), W)) - weights.floatValue.extend(Wa.flatten(order = 'F')) - bias.floatValue.extend([0.0] + list(b)) - - # Replace the classifier with the new classes - class_labels = self.classifier.classes - - probOutput = spec.description.output[0] - classLabel = spec.description.output[1] - probOutput.type.dictionaryType.MergeFromString(b'') - if type(class_labels[0]) == int: - nn_spec.ClearField('int64ClassLabels') - probOutput.type.dictionaryType.int64KeyType.MergeFromString(b'') - classLabel.type.int64Type.MergeFromString(b'') - del nn_spec.int64ClassLabels.vector[:] - for c in class_labels: - nn_spec.int64ClassLabels.vector.append(c) - else: - nn_spec.ClearField('stringClassLabels') - probOutput.type.dictionaryType.stringKeyType.MergeFromString(b'') - classLabel.type.stringType.MergeFromString(b'') - del nn_spec.stringClassLabels.vector[:] - for c in class_labels: - nn_spec.stringClassLabels.vector.append(c) - import coremltools - prob_name = self.target + 'Probability' - label_name = self.target - old_output_name = spec.neuralNetworkClassifier.layers[-1].name - coremltools.models.utils.rename_feature(spec, 'classLabel', label_name) - coremltools.models.utils.rename_feature(spec, old_output_name, prob_name) - if spec.neuralNetworkClassifier.layers[-1].name == old_output_name: - spec.neuralNetworkClassifier.layers[-1].name = prob_name - if spec.neuralNetworkClassifier.labelProbabilityLayerName == old_output_name: - spec.neuralNetworkClassifier.labelProbabilityLayerName = prob_name - coremltools.models.utils.rename_feature(spec, 'data', self.feature) - spec.neuralNetworkClassifier.preprocessing[0].featureName = self.feature - - mlmodel = coremltools.models.MLModel(spec) - model_type = 'image classifier (%s)' % self.model - mlmodel.short_description = _coreml_utils._mlmodel_short_description(model_type) - mlmodel.input_description[self.feature] = u'Input image' - mlmodel.output_description[prob_name] = 'Prediction probabilities' - mlmodel.output_description[label_name] = 'Class label of top prediction' - _coreml_utils._set_model_metadata(mlmodel, self.__class__.__name__, { + # First define three internal helper functions + + + # Internal helper function + def _create_vision_feature_print_screen(): + prob_name = self.target + 'Probability' + + # + # Setup the top level (pipeline classifier) spec + # + top_spec = coremltools.proto.Model_pb2.Model() + top_spec.specificationVersion = 3 + + desc = top_spec.description + desc.output.add().name = prob_name + desc.output.add().name = self.target + + desc.predictedFeatureName = self.target + desc.predictedProbabilitiesName = prob_name + + input = desc.input.add() + input.name = self.feature + input.type.imageType.width = 299 + input.type.imageType.height = 299 + BGR_VALUE = coremltools.proto.FeatureTypes_pb2.ImageFeatureType.ColorSpace.Value('BGR') + input.type.imageType.colorSpace = BGR_VALUE + + # + # Scene print feature extractor + # + pipelineClassifier = top_spec.pipelineClassifier + scene_print = pipelineClassifier.pipeline.models.add() + scene_print.specificationVersion = 3 + scene_print.visionFeaturePrint.scene.version = 1 + + input = scene_print.description.input.add() + input.name = self.feature + input.type.imageType.width = 299 + input.type.imageType.height = 299 + input.type.imageType.colorSpace = BGR_VALUE + + output = scene_print.description.output.add() + output.name = "output_name" + DOUBLE_ARRAY_VALUE = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value('DOUBLE') + output.type.multiArrayType.dataType = DOUBLE_ARRAY_VALUE + output.type.multiArrayType.shape.append(2048) + + # + # Neural Network Classifier, which is just logistic regression, in order to use GPUs + # + temp = top_spec.pipelineClassifier.pipeline.models.add() + temp.specificationVersion = 3 + + # Empty inner product layer + nn_spec = temp.neuralNetworkClassifier + feature_layer = nn_spec.layers.add() + feature_layer.name = "feature_layer" + feature_layer.input.append("output_name") + feature_layer.output.append("softmax_input") + fc_layer_params = feature_layer.innerProduct + fc_layer_params.inputChannels = 2048 + + # Softmax layer + softmax = nn_spec.layers.add() + softmax.name = "softmax" + softmax.softmax.MergeFromString(b'') + softmax.input.append("softmax_input") + softmax.output.append(prob_name) + + input = temp.description.input.add() + input.name = "output_name" + input.type.multiArrayType.dataType = DOUBLE_ARRAY_VALUE + input.type.multiArrayType.shape.append(2048) + + # Set outputs + desc = temp.description + prob_output = desc.output.add() + prob_output.name = prob_name + label_output = desc.output.add() + label_output.name = self.target + + if type(self.classifier.classes[0]) == int: + prob_output.type.dictionaryType.int64KeyType.MergeFromString(b'') + label_output.type.int64Type.MergeFromString(b'') + else: + prob_output.type.dictionaryType.stringKeyType.MergeFromString(b'') + label_output.type.stringType.MergeFromString(b'') + + temp.description.predictedFeatureName = self.target + temp.description.predictedProbabilitiesName = prob_name + + return top_spec + + + # Internal helper function + def _update_last_two_layers(nn_spec): + # Replace the softmax layer with new coeffients + num_classes = self.num_classes + fc_layer = nn_spec.layers[-2] + fc_layer_params = fc_layer.innerProduct + fc_layer_params.outputChannels = self.classifier.num_classes + inputChannels = fc_layer_params.inputChannels + fc_layer_params.hasBias = True + + coefs = self.classifier.coefficients + weights = fc_layer_params.weights + bias = fc_layer_params.bias + del weights.floatValue[:] + del bias.floatValue[:] + + import numpy as np + W = np.array(coefs[coefs['index'] != None]['value'], ndmin = 2).reshape( + inputChannels, num_classes - 1, order = 'F') + b = coefs[coefs['index'] == None]['value'] + Wa = np.hstack((np.zeros((inputChannels, 1)), W)) + weights.floatValue.extend(Wa.flatten(order = 'F')) + bias.floatValue.extend([0.0] + list(b)) + + # Internal helper function + def _set_inputs_outputs_and_metadata(spec, nn_spec): + # Replace the classifier with the new classes + class_labels = self.classifier.classes + + probOutput = spec.description.output[0] + classLabel = spec.description.output[1] + probOutput.type.dictionaryType.MergeFromString(b'') + if type(class_labels[0]) == int: + nn_spec.ClearField('int64ClassLabels') + probOutput.type.dictionaryType.int64KeyType.MergeFromString(b'') + classLabel.type.int64Type.MergeFromString(b'') + del nn_spec.int64ClassLabels.vector[:] + for c in class_labels: + nn_spec.int64ClassLabels.vector.append(c) + else: + nn_spec.ClearField('stringClassLabels') + probOutput.type.dictionaryType.stringKeyType.MergeFromString(b'') + classLabel.type.stringType.MergeFromString(b'') + del nn_spec.stringClassLabels.vector[:] + for c in class_labels: + nn_spec.stringClassLabels.vector.append(c) + + prob_name = self.target + 'Probability' + label_name = self.target + old_output_name = nn_spec.layers[-1].name + coremltools.models.utils.rename_feature(spec, 'classLabel', label_name) + coremltools.models.utils.rename_feature(spec, old_output_name, prob_name) + if nn_spec.layers[-1].name == old_output_name: + nn_spec.layers[-1].name = prob_name + if nn_spec.labelProbabilityLayerName == old_output_name: + nn_spec.labelProbabilityLayerName = prob_name + coremltools.models.utils.rename_feature(spec, 'data', self.feature) + if len(nn_spec.preprocessing) > 0: + nn_spec.preprocessing[0].featureName = self.feature + + mlmodel = coremltools.models.MLModel(spec) + model_type = 'image classifier (%s)' % self.model + mlmodel.short_description = _coreml_utils._mlmodel_short_description(model_type) + mlmodel.input_description[self.feature] = u'Input image' + mlmodel.output_description[prob_name] = 'Prediction probabilities' + mlmodel.output_description[label_name] = 'Class label of top prediction' + _coreml_utils._set_model_metadata(mlmodel, self.__class__.__name__, { 'model': self.model, 'target': self.target, 'features': self.feature, 'max_iterations': str(self.max_iterations), }, version=ImageClassifier._PYTHON_IMAGE_CLASSIFIER_VERSION) + + return mlmodel + + + # main part of the export_coreml function + if self.model in _pre_trained_models.MODELS: + ptModel = _pre_trained_models.MODELS[self.model]() + feature_extractor = _image_feature_extractor.MXFeatureExtractor(ptModel) + + coreml_model = feature_extractor.get_coreml_model() + spec = coreml_model.get_spec() + nn_spec = spec.neuralNetworkClassifier + else: # model == VisionFeaturePrint_Screen + spec = _create_vision_feature_print_screen() + nn_spec = spec.pipelineClassifier.pipeline.models[1].neuralNetworkClassifier + + _update_last_two_layers(nn_spec) + mlmodel = _set_inputs_outputs_and_metadata(spec, nn_spec) mlmodel.save(filename) + + diff --git a/src/unity/python/turicreate/toolkits/image_similarity/image_similarity.py b/src/unity/python/turicreate/toolkits/image_similarity/image_similarity.py index 64f42930f6..8dda98d8d8 100644 --- a/src/unity/python/turicreate/toolkits/image_similarity/image_similarity.py +++ b/src/unity/python/turicreate/toolkits/image_similarity/image_similarity.py @@ -23,7 +23,8 @@ _numeric_param_check_range) -def create(dataset, label = None, feature = None, model = 'resnet-50', verbose = True): +def create(dataset, label = None, feature = None, model = 'resnet-50', verbose = True, + batch_size = 64): """ Create a :class:`ImageSimilarityModel` model. @@ -47,6 +48,7 @@ def create(dataset, label = None, feature = None, model = 'resnet-50', verbose = Uses a pretrained model to bootstrap an image similarity model - "resnet-50" : Uses a pretrained resnet model. + - "squeezenet_v1.1" : Uses a pretrained squeezenet model. Models are downloaded from the internet if not available locally. Once downloaded, the models are cached for future use. @@ -54,6 +56,10 @@ def create(dataset, label = None, feature = None, model = 'resnet-50', verbose = verbose : bool, optional If True, print progress updates and model details. + batch_size : int, optional + If you are getting memory errors, try decreasing this value. If you + have a powerful computer, increasing this value may improve performance. + Returns ------- out : ImageSimilarityModel @@ -98,6 +104,8 @@ def create(dataset, label = None, feature = None, model = 'resnet-50', verbose = raise _ToolkitError("Row label column '%s' does not exist" % label) if (feature is not None) and (feature not in dataset.column_names()): raise _ToolkitError("Image feature column '%s' does not exist" % feature) + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") # Set defaults if feature is None: @@ -107,7 +115,8 @@ def create(dataset, label = None, feature = None, model = 'resnet-50', verbose = # Extract features extracted_features = _tc.SFrame({ - '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose), + '__image_features__': feature_extractor.extract_features(dataset, feature, verbose=verbose, + batch_size=batch_size), }) # Train a similarity model using the extracted features @@ -249,12 +258,13 @@ def _get_summary_struct(self): section_titles = ['Schema', 'Training summary'] return([model_fields, training_fields], section_titles) - def _extract_features(self, dataset, verbose): + def _extract_features(self, dataset, verbose, batch_size = 64): return _tc.SFrame({ - '__image_features__': self.feature_extractor.extract_features(dataset, self.feature, verbose=verbose) + '__image_features__': self.feature_extractor.extract_features(dataset, self.feature, verbose=verbose, + batch_size=batch_size) }) - def query(self, dataset, label=None, k=5, radius=None, verbose=True): + def query(self, dataset, label=None, k=5, radius=None, verbose=True, batch_size=64): """ For each image, retrieve the nearest neighbors from the model's stored data. In general, the query dataset does not need to be the same as @@ -288,6 +298,10 @@ def query(self, dataset, label=None, k=5, radius=None, verbose=True): verbose: bool, optional If True, print progress updates and model details. + batch_size : int, optional + If you are getting memory errors, try decreasing this value. If you + have a powerful computer, increasing this value may improve performance. + Returns ------- out : SFrame @@ -325,13 +339,15 @@ def query(self, dataset, label=None, k=5, radius=None, verbose=True): """ if not isinstance(dataset, (_tc.SFrame, _tc.SArray, _tc.Image)): raise TypeError('dataset must be either an SFrame, SArray or turicreate.Image') + if(batch_size < 1): + raise ValueError("'batch_size' must be greater than or equal to 1") if isinstance(dataset, _tc.SArray): dataset = _tc.SFrame({self.feature: dataset}) elif isinstance(dataset, _tc.Image): dataset = _tc.SFrame({self.feature: [dataset]}) - extracted_features = self._extract_features(dataset, verbose=verbose) + extracted_features = self._extract_features(dataset, verbose=verbose, batch_size=batch_size) if label is not None: extracted_features[label] = dataset[label] return self.similarity_model.query(extracted_features, label, k, radius, verbose) diff --git a/src/unity/python/turicreate/toolkits/style_transfer/_model.py b/src/unity/python/turicreate/toolkits/style_transfer/_model.py index 598edae406..91536e53dd 100644 --- a/src/unity/python/turicreate/toolkits/style_transfer/_model.py +++ b/src/unity/python/turicreate/toolkits/style_transfer/_model.py @@ -17,13 +17,11 @@ class InstanceNorm(HybridBlock): """ Conditional Instance Norm """ - def __init__(self, epsilon=1e-5, center=True, scale=True, - beta_initializer='zeros', gamma_initializer='ones', - in_channels=0, num_styles=0, **kwargs): + def __init__(self, in_channels, num_styles, batch_size, epsilon=1e-5, + center=True, scale=True, beta_initializer='zeros', + gamma_initializer='ones', **kwargs): super(InstanceNorm, self).__init__(**kwargs) self._kwargs = {'eps': epsilon} - if in_channels != 0: - self.in_channels = in_channels self.gamma = self.params.get('gamma', grad_req='write' if scale else 'null', shape=(num_styles, in_channels, ), init=gamma_initializer, allow_deferred_init=True) @@ -32,19 +30,33 @@ def __init__(self, epsilon=1e-5, center=True, scale=True, allow_deferred_init=True) self.num_styles = num_styles self.in_channels = in_channels - + self.batch_size = batch_size def hybrid_forward(self, F, X, style_idx, gamma, beta): - if F == _mx.sym: # for coreml + if F == _mx.sym and self.batch_size == 0: # for coreml gamma = _mx.sym.Embedding(data=style_idx, input_dim=self.num_styles, output_dim=self.in_channels) beta = _mx.sym.Embedding(data=style_idx, input_dim=self.num_styles, output_dim=self.in_channels) return F.InstanceNorm(X, gamma, beta, name='_fwd', **self._kwargs) - res = [] - for idx, style in enumerate(style_idx): - res.append(F.InstanceNorm(X[idx:idx+1], gamma[int(style)], beta[int(style)], name='_fwd', **self._kwargs)) + em_gamma = F.take(gamma, indices=style_idx, axis=0) + em_beta = F.take(beta, indices=style_idx, axis=0) + + sp_gammas = F.split(em_gamma, axis=0, num_outputs=self.batch_size, squeeze_axis=True) + sp_betas = F.split(em_beta, axis=0, num_outputs=self.batch_size, squeeze_axis=True) + + if self.batch_size == 1: + return F.InstanceNorm(X, sp_gammas, sp_betas, name='_fwd', **self._kwargs) + else: + Xs = F.split(X, axis=0, num_outputs=self.batch_size) + + res = [] + for idx in range(self.batch_size): + gamma0 = sp_gammas[idx] + beta0 = sp_betas[idx] + X_slice = Xs[idx] + res.append(F.InstanceNorm(X_slice, gamma0, beta0, name='_fwd', **self._kwargs)) - return _mx.nd.concat(*res, dim=0) + return F.concat(*res, dim=0) class ResidualBlock(HybridBlock): @@ -52,14 +64,26 @@ class ResidualBlock(HybridBlock): Residual network """ - def __init__(self, num_styles): + def __init__(self, num_styles, batch_size): super(ResidualBlock, self).__init__() with self.name_scope(): self.conv1 = _nn.Conv2D(128, 3, 1, 1, in_channels=128, use_bias=False) - self.inst_norm1 = InstanceNorm(in_channels=128, num_styles=num_styles) + self.inst_norm1 = InstanceNorm(in_channels=128, num_styles=num_styles, batch_size=batch_size) self.conv2 = _nn.Conv2D(128, 3, 1, 1, in_channels=128, use_bias=False) - self.inst_norm2 = InstanceNorm(in_channels=128, num_styles=num_styles) + self.inst_norm2 = InstanceNorm(in_channels=128, num_styles=num_styles, batch_size=batch_size) + + self._batch_size = batch_size + + @property + def batch_size(self): + return self._batch_size + + @batch_size.setter + def batch_size(self, batch_size): + self.inst_norm1.batch_size = batch_size + self.inst_norm2.batch_size = batch_size + self._batch_size = batch_size def hybrid_forward(self, F, x, style_idx): h1 = self.conv1(x) @@ -80,35 +104,51 @@ def gram_matrix(y): class Transformer(HybridBlock): - def __init__(self, num_styles): + def __init__(self, num_styles, batch_size): super(Transformer, self).__init__(prefix='transformer_') self.num_styles = num_styles block = ResidualBlock + self.scale255 = False with self.name_scope(): self.conv1 = _nn.Conv2D(32, 9, 1, 4, in_channels=3, use_bias=False) - self.inst_norm1 = InstanceNorm(in_channels=32, num_styles=num_styles) + self.inst_norm1 = InstanceNorm(in_channels=32, num_styles=num_styles, batch_size=batch_size) self.conv2 = _nn.Conv2D(64, 3, 2, 1, in_channels=32, use_bias=False) - self.inst_norm2 = InstanceNorm(in_channels=64, num_styles=num_styles) + self.inst_norm2 = InstanceNorm(in_channels=64, num_styles=num_styles, batch_size=batch_size) self.conv3 = _nn.Conv2D(128, 3, 2, 1, in_channels=64, use_bias=False) - self.inst_norm3 = InstanceNorm(in_channels=128, num_styles=num_styles) + self.inst_norm3 = InstanceNorm(in_channels=128, num_styles=num_styles, batch_size=batch_size) - self.residual1 = block(num_styles) - self.residual2 = block(num_styles) - self.residual3 = block(num_styles) - self.residual4 = block(num_styles) - self.residual5 = block(num_styles) + self.residual1 = block(num_styles, batch_size=batch_size) + self.residual2 = block(num_styles, batch_size=batch_size) + self.residual3 = block(num_styles, batch_size=batch_size) + self.residual4 = block(num_styles, batch_size=batch_size) + self.residual5 = block(num_styles, batch_size=batch_size) self.decoder_conv1 = _nn.Conv2D(64, 3, 1, 1, in_channels=128, use_bias=False) - self.inst_norm4 = InstanceNorm(in_channels=64, num_styles=num_styles) + self.inst_norm4 = InstanceNorm(in_channels=64, num_styles=num_styles, batch_size=batch_size) self.decoder_conv2 = _nn.Conv2D(32, 3, 1, 1, in_channels=64, use_bias=False) - self.inst_norm5 = InstanceNorm(in_channels=32, num_styles=num_styles) + self.inst_norm5 = InstanceNorm(in_channels=32, num_styles=num_styles, batch_size=batch_size) self.decoder_conv3 = _nn.Conv2D(3, 9, 1, 4, in_channels=32, use_bias=False) - self.inst_norm6 = InstanceNorm(in_channels=3, num_styles=num_styles) + self.inst_norm6 = InstanceNorm(in_channels=3, num_styles=num_styles, batch_size=batch_size) + + @property + def batch_size(self): + return self._batch_size + + @batch_size.setter + def batch_size(self, batch_size): + inst_norm_layers = [ + self.inst_norm1, self.inst_norm2, self.inst_norm3, + self.inst_norm4, self.inst_norm5, self.inst_norm6, + self.residual1, self.residual2, self.residual3, + self.residual4, self.residual5, + ] + for layer in inst_norm_layers: + layer.batch_size = batch_size def hybrid_forward(self, F, X, style_idx): h1 = self.conv1(X) @@ -141,10 +181,12 @@ def hybrid_forward(self, F, X, style_idx): d3 = self.decoder_conv3(d2) d3 = self.inst_norm6(d3, style_idx) - if F == _mx.sym: - return F.Activation(d3, 'sigmoid') * 255.0 - return F.Activation(d3, 'sigmoid') + z = F.Activation(d3, 'sigmoid') + if self.scale255: + return z * 255 + else: + return z class Vgg16(HybridBlock): @@ -166,7 +208,6 @@ def __init__(self): self.conv4_2 = _nn.Conv2D(in_channels=512, channels=512, kernel_size=3, padding=1) self.conv4_3 = _nn.Conv2D(in_channels=512, channels=512, kernel_size=3, padding=1) - def hybrid_forward(self, F, X): h = F.Activation(self.conv1_1(X), act_type='relu') h = F.Activation(self.conv1_2(h), act_type='relu') diff --git a/src/unity/python/turicreate/toolkits/style_transfer/_sframe_loader.py b/src/unity/python/turicreate/toolkits/style_transfer/_sframe_loader.py index b814c164d5..e57ffab7ee 100644 --- a/src/unity/python/turicreate/toolkits/style_transfer/_sframe_loader.py +++ b/src/unity/python/turicreate/toolkits/style_transfer/_sframe_loader.py @@ -43,7 +43,7 @@ class SFrameSTIter(_mx.io.DataIter): def __init__(self, sframe, batch_size, shuffle, feature_column, input_shape, num_epochs=None, repeat_each_image=1, - loader_type='stretch', aug_params={}): + loader_type='stretch', aug_params={}, sequential=True): if sframe[feature_column].dtype != _tc.Image: raise _ToolkitError('Feature column must be of type Image') @@ -92,7 +92,13 @@ def __init__(self, sframe, batch_size, shuffle, feature_column, self.sframe = sframe.copy() # Convert images to raw to eliminate overhead of decoding - self.sframe[_TMP_COL_PREP_IMAGE] = self.sframe[self.feature_column].apply(img_prep_fn) + if sequential: + builder = _tc.SArrayBuilder(_tc.Image) + for img in self.sframe[self.feature_column]: + builder.append(img_prep_fn(img)) + self.sframe[_TMP_COL_PREP_IMAGE] = builder.close() + else: + self.sframe[_TMP_COL_PREP_IMAGE] = self.sframe[self.feature_column].apply(img_prep_fn) self._provide_data = [ _mx.io.DataDesc(name='image', diff --git a/src/unity/python/turicreate/toolkits/style_transfer/style_transfer.py b/src/unity/python/turicreate/toolkits/style_transfer/style_transfer.py index 1ee4fbbb64..1ba88c12cb 100644 --- a/src/unity/python/turicreate/toolkits/style_transfer/style_transfer.py +++ b/src/unity/python/turicreate/toolkits/style_transfer/style_transfer.py @@ -13,22 +13,26 @@ from turicreate.toolkits import _coreml_utils from turicreate.toolkits._internal_utils import _raise_error_if_not_sframe from .. import _mxnet_utils -from ._model import Transformer as _Transformer -from ._model import Vgg16 as _Vgg16 -from ._model import gram_matrix as _gram_matrix from ._utils import _seconds_as_string from .. import _pre_trained_models from turicreate.toolkits._model import CustomModel as _CustomModel from turicreate.toolkits._main import ToolkitError as _ToolkitError -from mxnet import gluon as _gluon from turicreate.toolkits._model import PythonProxy as _PythonProxy import turicreate as _tc import numpy as _np -import mxnet as _mx import math as _math import six as _six +def _vgg16_data_prep(batch): + """ + Takes images scaled to [0, 1] and returns them appropriately scaled and + mean-subtracted for VGG-16 + """ + from mxnet import nd + mean = nd.array([123.68, 116.779, 103.939], ctx=batch.context) + return nd.broadcast_sub(255 * batch, mean.reshape((-1, 1, 1))) + def create(style_dataset, content_dataset, style_feature=None, content_feature=None, max_iterations=None, model='resnet-16', verbose=True, batch_size = 6, **kwargs): @@ -40,20 +44,25 @@ def create(style_dataset, content_dataset, style_feature=None, style_dataset: SFrame Input style images. The columns named by the ``style_feature`` parameters will be extracted for training the model. + content_dataset : SFrame Input content images. The columns named by the ``content_feature`` parameters will be extracted for training the model. + style_feature: string Name of the column containing the input images in style SFrame. 'None' (the default) indicates the only image column in the style SFrame should be used as the feature. + content_feature: string Name of the column containing the input images in content SFrame. 'None' (the default) indicates the only image column in the content SFrame should be used as the feature. + max_iterations : int The number of training iterations. If 'None' (the default), then it will be automatically determined based on the amount of data you provide. + model : string optional Style transfer model to use: @@ -82,6 +91,10 @@ def create(style_dataset, content_dataset, style_feature=None, -------- .. sourcecode:: python + # Create datasets + >>> content_dataset = turicreate.image_analysis.load_images('content_images/') + >>> style_dataset = turicreate.image_analysis.load_images('style_images/') + # Train a style transfer model >>> model = turicreate.style_transfer.create(content_dataset, style_dataset) @@ -91,7 +104,6 @@ def create(style_dataset, content_dataset, style_feature=None, # Visualize the stylized images >>> stylized_images.explore() - """ if len(style_dataset) == 0: raise _ToolkitError("style_dataset SFrame cannot be empty") @@ -120,11 +132,12 @@ def create(style_dataset, content_dataset, style_feature=None, 'lr': 0.001, 'content_loss_mult': 1.0, 'style_loss_mult': [1e-4, 1e-4, 1e-4, 1e-4], # conv 1-4 layers - 'fine_tune_all_params': False, + 'finetune_all_params': False, 'print_loss_breakdown': False, 'input_shape': (256, 256), 'training_content_loader_type': 'stretch', 'use_augmentation': False, + 'sequential_image_processing': False, # Only used if use_augmentaion is True 'aug_resize': 0, 'aug_rand_crop': 0.9, @@ -175,25 +188,29 @@ def create(style_dataset, content_dataset, style_feature=None, content_images_loader = _SFrameSTIter(content_dataset, batch_size, shuffle=True, feature_column=content_feature, input_shape=input_shape, num_epochs=max_iterations, - loader_type='stretch', aug_params=params) - style_images_loader = _SFrameSTIter(style_dataset, batch_size, shuffle=False, - feature_column=style_feature, input_shape=input_shape, - loader_type='stretch') + loader_type='stretch', aug_params=params, + sequential=params['sequential_image_processing']) ctx = _mxnet_utils.get_mxnet_context(max_devices=params['batch_size']) num_styles = len(style_dataset) # TRANSFORMER MODEL + from ._model import Transformer as _Transformer transformer_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS[model]().get_model_path() - transformer = _Transformer(num_styles) + transformer = _Transformer(num_styles, batch_size_each) transformer.collect_params().initialize(ctx=ctx) transformer.load_params(transformer_model_path, ctx, allow_missing=True) + # For some reason, the transformer fails to hybridize for training, so we + # avoid this until resolved + # transformer.hybridize() # VGG MODEL + from ._model import Vgg16 as _Vgg16 vgg_model_path = _pre_trained_models.STYLE_TRANSFER_BASE_MODELS['Vgg16']().get_model_path() vgg_model = _Vgg16() vgg_model.collect_params().initialize(ctx=ctx) vgg_model.load_params(vgg_model_path, ctx=ctx, ignore_extra=True) + vgg_model.hybridize() # TRAINER from mxnet import gluon as _gluon @@ -219,60 +236,102 @@ def create(style_dataset, content_dataset, style_feature=None, else: print('Using CPU to create model') - # Print progress table header - column_names = ['Iteration', 'Loss', 'Elapsed Time'] - num_columns = len(column_names) - column_width = max(map(lambda x: len(x), column_names)) + 2 - hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+' - print(hr) - print(('| {:<{width}}' * num_columns + '|').format(*column_names, width=column_width-1)) - print(hr) + # + # Pre-compute gram matrices for style images + # + if verbose: + print('Analyzing visual features of the style images') + + style_images_loader = _SFrameSTIter(style_dataset, batch_size, shuffle=False, num_epochs=1, + feature_column=style_feature, input_shape=input_shape, + loader_type='stretch', + sequential=params['sequential_image_processing']) + num_layers = len(params['style_loss_mult']) + gram_chunks = [[] for _ in range(num_layers)] + for s_batch in style_images_loader: + s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0) + results = [] + for s in s_data: + vgg16_s = _vgg16_data_prep(s) + ret = vgg_model(vgg16_s) + grams = [_gram_matrix(x) for x in ret] + for i, gram in enumerate(grams): + if gram.context != _mx.cpu(0): + gram = gram.as_in_context(_mx.cpu(0)) + gram_chunks[i].append(gram) + del style_images_loader + + grams = [ + # The concatenated styles may be padded, so we slice overflow + _mx.nd.concat(*chunks, dim=0)[:num_styles] + for chunks in gram_chunks + ] + + # A context->grams look-up table, where all the gram matrices have been + # distributed + ctx_grams = {} + if ctx[0] == _mx.cpu(0): + ctx_grams[_mx.cpu(0)] = grams + else: + for ctx0 in ctx: + ctx_grams[ctx0] = [gram.as_in_context(ctx0) for gram in grams] + + # + # Training loop + # + vgg_content_loss_layer = params['vgg16_content_loss_layer'] + rs = _np.random.RandomState(1234) while iterations < max_iterations: content_images_loader.reset() for c_batch in content_images_loader: - s_batch = style_images_loader.next() c_data = _gluon.utils.split_and_load(c_batch.data[0], ctx_list=ctx, batch_axis=0) - s_data = _gluon.utils.split_and_load(s_batch.data[0], ctx_list=ctx, batch_axis=0) - indices_data = _gluon.utils.split_and_load(_mx.nd.array(s_batch.indices, dtype=_np.int64), - ctx_list=[_mx.cpu(0)]*len(ctx), batch_axis=0) Ls = [] curr_content_loss = [] curr_style_loss = [] with _mx.autograd.record(): - for c, s, indices in zip(c_data, s_data, indices_data): - stylized = transformer(c, indices.asnumpy()) + for c in c_data: + # Randomize styles to train + indices = _mx.nd.array(rs.randint(num_styles, size=batch_size_each), + dtype=_np.int64, ctx=c.context) + + # Generate pastiche + p = transformer(c, indices) # mean subtraction - s = _mxnet_utils.subtract_imagenet_mean(s) - stylized = _mxnet_utils.subtract_imagenet_mean(stylized) - c = _mxnet_utils.subtract_imagenet_mean(c) + vgg16_p = _vgg16_data_prep(p) + vgg16_c = _vgg16_data_prep(c) # vgg forward - style_vgg_outputs = vgg_model(s) - stylized_vgg_outputs = vgg_model(stylized) - content_vgg_outputs = vgg_model(c) + p_vgg_outputs = vgg_model(vgg16_p) + + c_vgg_outputs = vgg_model(vgg16_c) + c_content_layer = c_vgg_outputs[vgg_content_loss_layer] + p_content_layer = p_vgg_outputs[vgg_content_loss_layer] # Calculate Loss # Style Loss between style image and stylized image # Ls = sum of L2 norm of gram matrix of vgg16's conv layers - style_loss = 0.0 - for style_vgg_output, stylized_vgg_output, style_loss_mult in zip(style_vgg_outputs, stylized_vgg_outputs, _style_loss_mult): - gram_style_vgg = _gram_matrix(style_vgg_output) - gram_stylized_vgg = _gram_matrix(stylized_vgg_output) + style_losses = [] + for gram, p_vgg_output, style_loss_mult in zip(ctx_grams[c.context], p_vgg_outputs, _style_loss_mult): + gram_s_vgg = gram[indices] + gram_p_vgg = _gram_matrix(p_vgg_output) - style_loss = style_loss + style_loss_mult * mse_loss(gram_style_vgg, gram_stylized_vgg) + style_losses.append(style_loss_mult * mse_loss(gram_s_vgg, gram_p_vgg)) + + style_loss = _mx.nd.add_n(*style_losses) # Content Loss between content image and stylized image - # Lc = L2 norm of vgg16's 3rd conv layer - vgg_content_loss_layer = params['vgg16_content_loss_layer'] - content_loss = _content_loss_mult * mse_loss(content_vgg_outputs[vgg_content_loss_layer], - stylized_vgg_outputs[vgg_content_loss_layer]) + # Lc = L2 norm at a single layer in vgg16 + content_loss = _content_loss_mult * mse_loss(c_content_layer, + p_content_layer) curr_content_loss.append(content_loss) curr_style_loss.append(style_loss) - total_loss = content_loss + style_loss + # Divide loss by large number to get into a more legible + # range + total_loss = (content_loss + style_loss) / 10000.0 Ls.append(total_loss) for L in Ls: L.backward() @@ -285,8 +344,19 @@ def create(style_dataset, content_dataset, style_feature=None, smoothed_loss = 0.9 * smoothed_loss + 0.1 * cur_loss iterations += 1 trainer.step(batch_size) + + if verbose and iterations == 1: + # Print progress table header + column_names = ['Iteration', 'Loss', 'Elapsed Time'] + num_columns = len(column_names) + column_width = max(map(lambda x: len(x), column_names)) + 2 + hr = '+' + '+'.join(['-' * column_width] * num_columns) + '+' + print(hr) + print(('| {:<{width}}' * num_columns + '|').format(*column_names, width=column_width-1)) + print(hr) + cur_time = _time.time() - if verbose and cur_time > last_time + 10: + if verbose and (cur_time > last_time + 10 or iterations == max_iterations): # Print progress table row elapsed_time = cur_time - start_time print("| {cur_iter:<{width}}| {loss:<{width}.3f}| {time:<{width}.1f}|".format( @@ -295,15 +365,13 @@ def create(style_dataset, content_dataset, style_feature=None, if params['print_loss_breakdown']: print_content_loss = _np.mean([L.asnumpy()[0] for L in curr_content_loss]) print_style_loss = _np.mean([L.asnumpy()[0] for L in curr_style_loss]) - print('Total Loss: {:6.3f}| Content Loss: {:6.3f} | Style Loss: {:6.3f}'.format(cur_loss, print_content_loss, print_style_loss)) + print('Total Loss: {:6.3f} | Content Loss: {:6.3f} | Style Loss: {:6.3f}'.format(cur_loss, print_content_loss, print_style_loss)) last_time = cur_time if iterations == max_iterations: + print(hr) break training_time = _time.time() - start_time - if verbose: - print(hr) - style_sa = style_dataset[style_feature] idx_column = _tc.SArray(range(0, style_sa.shape[0])) style_sframe = _tc.SFrame({"style": idx_column, style_feature: style_sa}) @@ -369,9 +437,10 @@ def _get_version(self): @classmethod def _load_version(cls, state, version): + from ._model import Transformer as _Transformer _tkutl._model_version_check(version, cls._PYTHON_STYLE_TRANSFER_VERSION) - net = _Transformer(state['num_styles']) + net = _Transformer(state['num_styles'], state['batch_size']) ctx = _mxnet_utils.get_mxnet_context(max_devices=state['batch_size']) net_params = net.collect_params() @@ -587,6 +656,9 @@ def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4 # for smaller images loader_type = 'pad' + self._model.batch_size = batch_size_each + self._model.hybridize() + ctx = _mxnet_utils.get_mxnet_context(max_devices=batch_size_each) batch_size = max(num_mxnet_gpus, 1) * batch_size_each last_time = 0 @@ -609,6 +681,10 @@ def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4 if input_shape[1] > max_w: input_shape = (input_shape[0], max_w) + # If we find large images, let's switch to sequential iterator + # pre-processing, to prevent memory issues. + sequential = max(max_h, max_w) > 2000 + if verbose and output_size != 1: print('Stylizing {} image(s) using {} style(s)'.format(dataset_size, len(style))) if oversized_count > 0: @@ -620,7 +696,8 @@ def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4 input_shape=input_shape, num_epochs=1, loader_type=loader_type, - repeat_each_image=len(style)) + repeat_each_image=len(style), + sequential=sequential) sb = _tc.SFrameBuilder([int, int, _tc.Image], column_names=['row_id', 'style', 'stylized_{}'.format(self.content_feature)]) @@ -632,10 +709,11 @@ def stylize(self, images, style=None, verbose=True, max_size=800, batch_size = 4 else: c_data = _gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0) indices_data = _gluon.utils.split_and_load(_mx.nd.array(batch.repeat_indices, dtype=_np.int64), - ctx_list=[_mx.cpu(0)]*len(ctx), batch_axis=0) + ctx_list=ctx, batch_axis=0) outputs = [] for b_img, b_indices in zip(c_data, indices_data): - b_batch_styles = [style[idx] for idx in b_indices.asnumpy()] + mx_style = _mx.nd.array(style, dtype=_np.int64, ctx=b_indices.context) + b_batch_styles = mx_style[b_indices] output = self._model(b_img, b_batch_styles) outputs.append(output) @@ -715,7 +793,12 @@ def export_coreml(self, path, image_shape=(256, 256)): c_image = _mx.sym.Variable('image', shape=image_shape, dtype=_np.float32) + # signal that we want the transformer to prepare for coreml export + # using a zero batch size + transformer.batch_size = 0 + transformer.scale255 = True sym_out = transformer(c_image, index) + mod = _mx.mod.Module(symbol=sym_out, data_names=["image", "index"], label_names=None) mod.bind(data_shapes=zip(["image", "index"], [image_shape, (1,)]), for_training=False, @@ -732,10 +815,10 @@ def export_coreml(self, path, image_shape=(256, 256)): mod.set_params(sym_weight_dict, sym_weight_dict) index_dim = (1, self.num_styles) + coreml_model = _mxnet_converter.convert(mod, input_shape=[(self.content_feature, image_shape), ('index', index_dim)], + mode=None, preprocessor_args=None, builder=None, verbose=False) - coreml_model = _mxnet_converter.convert(mod, input_shape=[('image', image_shape), ('index', index_dim)], - mode=None, preprocessor_args=None, builder=None, verbose=True) - + transformer.scale255 = False spec = coreml_model.get_spec() image_input = spec.description.input[0] image_output = spec.description.output[0] diff --git a/src/unity/python/turicreate/version_info.py b/src/unity/python/turicreate/version_info.py index 63f71317bf..5e754a0593 100644 --- a/src/unity/python/turicreate/version_info.py +++ b/src/unity/python/turicreate/version_info.py @@ -11,7 +11,7 @@ from __future__ import absolute_import as _ # python egg version -__version__ = '4.3.2'#{{VERSION_STRING}} +__version__ = '5.0b1'#{{VERSION_STRING}} version = __version__ build_number = '0'#{{BUILD_NUMBER}} git_sha = 'NA'#{{GIT_SHA}} diff --git a/src/unity/toolkits/supervised_learning/supervised_learning.cpp b/src/unity/toolkits/supervised_learning/supervised_learning.cpp index fefd7ef47f..a6d5065f08 100644 --- a/src/unity/toolkits/supervised_learning/supervised_learning.cpp +++ b/src/unity/toolkits/supervised_learning/supervised_learning.cpp @@ -145,21 +145,24 @@ void supervised_learning_model_base::init(const sframe& X, const sframe& y, this->state["num_features"] = feature_column_names.size(); this->state["num_unpacked_features"] = feature_names.size(); + // Turned off temporarily until we can find a better way to hide for image classification + bool simple_mode = true; + + // Check the number of dimensions in this dataset is small, otherwise warn the // user. (see #3001 for context) - /* - // Turned off temporarily until we can find a better way to hide for image classification. - size_t num_dims = get_number_of_coefficients(this->ml_mdata); - if(num_dims >= X.num_rows()) { - std::stringstream ss; - ss << "WARNING: The number of feature dimensions in this problem is " - << "very large in comparison with the number of examples. Unless " - << "an appropriate regularization value is set, this model " - << "may not provide accurate predictions for a validation/test set." - << std::endl; - logprogress_stream << ss.str() << std::endl; + if (not simple_mode) { + size_t num_dims = get_number_of_coefficients(this->ml_mdata); + if(num_dims >= X.num_rows()) { + std::stringstream ss; + ss << "WARNING: The number of feature dimensions in this problem is " + << "very large in comparison with the number of examples. Unless " + << "an appropriate regularization value is set, this model " + << "may not provide accurate predictions for a validation/test set." + << std::endl; + logprogress_stream << ss.str() << std::endl; + } } - */ ml_data valid_data; if (valid_X.num_rows() > 0) { @@ -175,8 +178,9 @@ void supervised_learning_model_base::init(const sframe& X, const sframe& y, // Finally call the model-specific init function. model_specific_init(data, valid_data); - // Raise error if mean and variance are not finite. - check_feature_means_and_variances(this->ml_mdata, show_extra_warnings); + // Raise error if mean and variance are not finite + check_feature_means_and_variances(this->ml_mdata, + show_extra_warnings && (not simple_mode)); // One class classification error message. if(this->is_classifier()) { diff --git a/src/visualization/Turi Create Visualization.xcodeproj/project.pbxproj b/src/visualization/Turi Create Visualization.xcodeproj/project.pbxproj index 51820f848a..610f3ddf88 100644 --- a/src/visualization/Turi Create Visualization.xcodeproj/project.pbxproj +++ b/src/visualization/Turi Create Visualization.xcodeproj/project.pbxproj @@ -16,29 +16,12 @@ FC245BED1F4F541D009F54C6 /* VegaContainer.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC245BEC1F4F541D009F54C6 /* VegaContainer.swift */; }; FC245BF31F4F7FFB009F54C6 /* JSON.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC245BF21F4F7FFB009F54C6 /* JSON.swift */; }; FC245BF51F4F8110009F54C6 /* Pipe.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC245BF41F4F8110009F54C6 /* Pipe.swift */; }; - FC3E34CD206EEF3100B6B69A /* package.json in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34CC206EEF3100B6B69A /* package.json */; }; - FC3E34D1206EF08500B6B69A /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34D0206EF08500B6B69A /* index.js */; }; - FC3E34D3206EF09800B6B69A /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34D2206EF09800B6B69A /* index.css */; }; - FC3E34D5206EF12300B6B69A /* build in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34D4206EF12300B6B69A /* build */; }; - FC3E34DD206EF4EB00B6B69A /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34DC206EF4EB00B6B69A /* index.js */; }; - FC3E34DF206EF4F600B6B69A /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34DE206EF4F600B6B69A /* index.css */; }; - FC3E34E1206EF50100B6B69A /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34E0206EF50100B6B69A /* index.js */; }; - FC3E34E3206EF50E00B6B69A /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34E2206EF50E00B6B69A /* index.js */; }; - FC3E34E6206EF52000B6B69A /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC3E34E5206EF52000B6B69A /* index.css */; }; FC5F0B3C1F5F3A3E0001BCEE /* Error.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5F0B3B1F5F3A3E0001BCEE /* Error.swift */; }; - FC78046020729FF9004BE45B /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC78045F20729FF9004BE45B /* index.js */; }; - FC7804672072A061004BE45B /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC7804662072A061004BE45B /* index.js */; }; - FC7804692072A075004BE45B /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC7804682072A075004BE45B /* index.js */; }; - FC78046C2072A0F6004BE45B /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FC78046B2072A0F6004BE45B /* index.js */; }; - FC78046E2072A106004BE45B /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC78046D2072A106004BE45B /* index.css */; }; - FC7804702072A115004BE45B /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC78046F2072A115004BE45B /* index.css */; }; - FC7804722072A14B004BE45B /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC7804712072A14B004BE45B /* index.css */; }; - FC7804742072A168004BE45B /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FC7804732072A168004BE45B /* index.css */; }; FC9066011F425B4800AE3881 /* AppData.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9066001F425B4800AE3881 /* AppData.swift */; }; - FCB8DFDE206EF548001A7089 /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FCB8DFDD206EF548001A7089 /* index.css */; }; FCB8DFE1206EF950001A7089 /* index.html in Resources */ = {isa = PBXBuildFile; fileRef = FCB8DFE0206EF950001A7089 /* index.html */; }; FCB8DFE3206F1756001A7089 /* index.css in Resources */ = {isa = PBXBuildFile; fileRef = FCB8DFE2206F1756001A7089 /* index.css */; }; FCB8DFE5206F187D001A7089 /* index.js in Resources */ = {isa = PBXBuildFile; fileRef = FCB8DFE4206F187D001A7089 /* index.js */; }; + FCE3954420B4C2CB0029FDB5 /* build in Resources */ = {isa = PBXBuildFile; fileRef = FCE3954320B4C2CB0029FDB5 /* build */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -56,7 +39,6 @@ FC3E34CC206EEF3100B6B69A /* package.json */ = {isa = PBXFileReference; lastKnownFileType = text.json; path = package.json; sourceTree = ""; }; FC3E34D0206EF08500B6B69A /* index.js */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.javascript; path = index.js; sourceTree = ""; }; FC3E34D2206EF09800B6B69A /* index.css */ = {isa = PBXFileReference; lastKnownFileType = text.css; path = index.css; sourceTree = ""; }; - FC3E34D4206EF12300B6B69A /* build */ = {isa = PBXFileReference; lastKnownFileType = folder; path = build; sourceTree = ""; }; FC3E34DC206EF4EB00B6B69A /* index.js */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.javascript; path = index.js; sourceTree = ""; }; FC3E34DE206EF4F600B6B69A /* index.css */ = {isa = PBXFileReference; lastKnownFileType = text.css; path = index.css; sourceTree = ""; }; FC3E34E0206EF50100B6B69A /* index.js */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.javascript; path = index.js; sourceTree = ""; }; @@ -76,6 +58,7 @@ FCB8DFE0206EF950001A7089 /* index.html */ = {isa = PBXFileReference; lastKnownFileType = text.html; path = index.html; sourceTree = ""; }; FCB8DFE2206F1756001A7089 /* index.css */ = {isa = PBXFileReference; lastKnownFileType = text.css; path = index.css; sourceTree = ""; }; FCB8DFE4206F187D001A7089 /* index.js */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.javascript; path = index.js; sourceTree = ""; }; + FCE3954320B4C2CB0029FDB5 /* build */ = {isa = PBXFileReference; lastKnownFileType = folder; path = build; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -136,7 +119,7 @@ FC1A87821FB1464D00A67DAD /* user_interface */ = { isa = PBXGroup; children = ( - FC3E34D4206EF12300B6B69A /* build */, + FCE3954320B4C2CB0029FDB5 /* build */, FCB8DFDF206EF93A001A7089 /* public */, FC3E34CF206EF04F00B6B69A /* src */, FC3E34CC206EEF3100B6B69A /* package.json */, @@ -318,27 +301,10 @@ buildActionMask = 2147483647; files = ( FCB8DFE5206F187D001A7089 /* index.js in Resources */, - FC7804742072A168004BE45B /* index.css in Resources */, - FC3E34D5206EF12300B6B69A /* build in Resources */, - FC7804722072A14B004BE45B /* index.css in Resources */, FCB8DFE1206EF950001A7089 /* index.html in Resources */, - FC3E34CD206EEF3100B6B69A /* package.json in Resources */, - FC3E34D1206EF08500B6B69A /* index.js in Resources */, FCB8DFE3206F1756001A7089 /* index.css in Resources */, - FC3E34E6206EF52000B6B69A /* index.css in Resources */, + FCE3954420B4C2CB0029FDB5 /* build in Resources */, 9226E7C81F366F2E00C33A64 /* Assets.xcassets in Resources */, - FC3E34D3206EF09800B6B69A /* index.css in Resources */, - FC7804672072A061004BE45B /* index.js in Resources */, - FC3E34E1206EF50100B6B69A /* index.js in Resources */, - FC7804702072A115004BE45B /* index.css in Resources */, - FC78046C2072A0F6004BE45B /* index.js in Resources */, - FC7804692072A075004BE45B /* index.js in Resources */, - FCB8DFDE206EF548001A7089 /* index.css in Resources */, - FC3E34E3206EF50E00B6B69A /* index.js in Resources */, - FC78046020729FF9004BE45B /* index.js in Resources */, - FC78046E2072A106004BE45B /* index.css in Resources */, - FC3E34DF206EF4F600B6B69A /* index.css in Resources */, - FC3E34DD206EF4EB00B6B69A /* index.js in Resources */, 9226E7CB1F366F2E00C33A64 /* Main.storyboard in Resources */, ); runOnlyForDeploymentPostprocessing = 0; diff --git a/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/index.js b/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/index.js index 3da67081d7..e5b6c9b1c4 100644 --- a/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/index.js +++ b/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/index.js @@ -590,13 +590,24 @@ class TcTable extends Component { rows.push( {empty_cells_2} ); + + var empty_cells_3 = []; + empty_cells_3.push( ); + + for(var x = 1; x < cells.length;x++){ + empty_cells_3.push( ); + } + + rows.push( + {empty_cells_3} + ); } } var n = Math.floor(Math.min(...row_ids)/this.step_size); - this.set_higher = n + 2; + this.set_higher = n + 3; this.set_lower = n; var parent_context = this; diff --git a/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/sticky-table/index.js b/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/sticky-table/index.js index 085b8be567..daba80fc92 100644 --- a/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/sticky-table/index.js +++ b/src/visualization/Turi Create Visualization/src/user_interface/src/elements/Explore/Table/sticky-table/index.js @@ -329,7 +329,7 @@ class StickyTable extends PureComponent { if(document.getElementById("data_container")){ - document.getElementById("data_container").style.height = this.getModeHeights()*3 - 30 + "px" + document.getElementById("data_container").style.height = this.getModeHeights()*4 - 30 + "px" document.getElementById("data_container").style.width = (this.xScrollbar.clientWidth - 30) + "px"; document.getElementById("data_container").style.left = 15 + "px"; document.getElementById("data_container").style.top = column_offset_top + this.getModeHeights() + 15 + "px"; diff --git a/userguide/style_transfer/export-coreml.md b/userguide/style_transfer/export-coreml.md index 3c625f5530..dc239c9788 100644 --- a/userguide/style_transfer/export-coreml.md +++ b/userguide/style_transfer/export-coreml.md @@ -39,7 +39,7 @@ Now, you can stylize your images using: let mlModel = MyStyleTransferModel() let visionModel = try VNCoreMLModel(for: mlModel) -let styleTranser = VNCoreMLRequest(model: visionModel, completionHandler: { (request, error) in +let styleTransfer = VNCoreMLRequest(model: visionModel, completionHandler: { (request, error) in guard let results = request.results else { return } for case let styleTransferedImage as VNPixelBufferObservation in results { @@ -47,4 +47,4 @@ let styleTranser = VNCoreMLRequest(model: visionModel, completionHandler: { (req imageLayer.contents = CIImage(cvPixelBuffer: styleTransferedImage.pixelBuffer, options: [:]) } }) -``` +``` \ No newline at end of file