diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index dfc5d019051a9..58b6aa05afed6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -732,9 +732,6 @@ static void setSpecializationConstants(const std::shared_ptr &InputImpl, ur_program_handle_t Prog, const AdapterPtr &Adapter) { - // Set ITT annotation specialization constant if needed. - enableITTAnnotationsIfNeeded(Prog, Adapter); - std::lock_guard Lock{InputImpl->get_spec_const_data_lock()}; const std::map> &SpecConstData = InputImpl->get_spec_const_data_ref(); @@ -769,15 +766,6 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( const ContextImplPtr &ContextImpl, const DeviceImplPtr &DeviceImpl, const std::string &KernelName, const NDRDescT &NDRDesc, bool JITCompilationIsRequired) { - KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); - - std::string CompileOpts; - std::string LinkOpts; - - applyOptionsFromEnvironment(CompileOpts, LinkOpts); - - SerializedObj SpecConsts; - // Check if we can optimize program builds for sub-devices by using a program // built for the root device DeviceImplPtr RootDevImpl = DeviceImpl; @@ -824,19 +812,36 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( AllImages.push_back(&Img); std::copy(ImageDeps.begin(), ImageDeps.end(), std::back_inserter(AllImages)); - auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, &CompileOpts, + return getBuiltURProgram(Img, Context, {Device}, DeviceImagesToLink, + AllImages); +} + +ur_program_handle_t ProgramManager::getBuiltURProgram( + const RTDeviceBinaryImage &Img, const context &Context, + const std::vector &Devs, + const std::set &DeviceImagesToLink, + const std::vector &AllImages, + const std::shared_ptr &DeviceImageImpl, + const SerializedObj &SpecConsts) { + std::string CompileOpts; + std::string LinkOpts; + applyOptionsFromEnvironment(CompileOpts, LinkOpts); + auto BuildF = [this, &Img, &DeviceImageImpl, &Context, &Devs, &CompileOpts, &LinkOpts, SpecConsts, &DeviceImagesToLink, &AllImages] { + const ContextImplPtr &ContextImpl = getSyclObjImpl(Context); const AdapterPtr &Adapter = ContextImpl->getAdapter(); - applyOptionsFromImage(CompileOpts, LinkOpts, Img, {Device}, Adapter); + applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Adapter); // Should always come last! appendCompileEnvironmentVariablesThatAppend(CompileOpts); appendLinkEnvironmentVariablesThatAppend(LinkOpts); + auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram( - Img, AllImages, Context, {Device}, CompileOpts + LinkOpts, SpecConsts); + Img, {AllImages}, Context, Devs, CompileOpts + LinkOpts, SpecConsts); - if (!DeviceCodeWasInCache) { - if (Img.supportsSpecConstants()) - enableITTAnnotationsIfNeeded(NativePrg, Adapter); + if (!DeviceCodeWasInCache && Img.supportsSpecConstants()) { + enableITTAnnotationsIfNeeded(NativePrg, Adapter); + if (DeviceImageImpl) + setSpecializationConstants(DeviceImageImpl, NativePrg, Adapter); } UrFuncInfo programReleaseInfo; @@ -864,34 +869,28 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) { if (UseDeviceLibs) DeviceLibReqMask |= getDeviceLibReqMask(*BinImg); - device_image_plain DevImagePlain = - getDeviceImageFromBinaryImage(BinImg, Context, Device); - const std::shared_ptr &DeviceImageImpl = - detail::getSyclObjImpl(DevImagePlain); - - SerializedObj ImgSpecConsts = - DeviceImageImpl->get_spec_const_blob_ref(); - ur_program_handle_t NativePrg = - createURProgram(*BinImg, Context, {Device}); + ur_program_handle_t NativePrg = createURProgram(*BinImg, Context, Devs); if (BinImg->supportsSpecConstants()) - setSpecializationConstants(DeviceImageImpl, NativePrg, Adapter); + enableITTAnnotationsIfNeeded(NativePrg, Adapter); ProgramsToLink.push_back(NativePrg); } } - std::vector Devs = { - getSyclObjImpl(Device).get()->getHandleRef()}; - ; + + std::vector URDevices; + for (auto Dev : Devs) + URDevices.push_back(getSyclObjImpl(Dev).get()->getHandleRef()); + ProgramPtr BuiltProgram = build( - std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, Devs, - DeviceLibReqMask, ProgramsToLink, + std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, + URDevices, DeviceLibReqMask, ProgramsToLink, /*CreatedFromBinary*/ Img.getFormat() != SYCL_DEVICE_BINARY_TYPE_SPIRV); + // Those extra programs won't be used anymore, just the final linked result for (ur_program_handle_t Prg : ProgramsToLink) Adapter->call(Prg); - emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl); { @@ -902,29 +901,37 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( } } - ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), {Device}, &Img); + ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img); // Save program to persistent cache if it is not there if (!DeviceCodeWasInCache) { - PersistentDeviceCodeCache::putItemToDisc({Device}, AllImages, SpecConsts, + PersistentDeviceCodeCache::putItemToDisc(Devs, AllImages, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get()); } + return BuiltProgram.release(); }; + if (!SYCLConfig::get()) + return BuildF(); + uint32_t ImgId = Img.getImageID(); - const ur_device_handle_t UrDevice = Dev->getHandleRef(); - auto CacheKey = std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), - std::set{UrDevice}); + std::set URDevicesSet; + std::transform(Devs.begin(), Devs.end(), + std::inserter(URDevicesSet, URDevicesSet.begin()), + [](const device &Dev) { + return getSyclObjImpl(Dev).get()->getHandleRef(); + }); + auto CacheKey = + std::make_pair(std::make_pair(SpecConsts, ImgId), URDevicesSet); + const ContextImplPtr &ContextImpl = getSyclObjImpl(Context); + KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); auto GetCachedBuildF = [&Cache, &CacheKey]() { return Cache.getOrInsertProgram(CacheKey); }; - if (!SYCLConfig::get()) - return BuildF(); - auto EvictFunc = [&Cache, &CacheKey](ur_program_handle_t Program, bool isBuilt) { return Cache.registerProgramFetch(CacheKey, Program, isBuilt); @@ -932,35 +939,69 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( auto BuildResult = Cache.getOrBuild(GetCachedBuildF, BuildF, EvictFunc); + // getOrBuild is not supposed to return nullptr assert(BuildResult != nullptr && "Invalid build result"); ur_program_handle_t ResProgram = BuildResult->Val; - auto Adapter = ContextImpl->getAdapter(); + // Here we have multiple devices a program is built for, so add the program to + // the cache for all subsets of provided list of devices. + const AdapterPtr &Adapter = ContextImpl->getAdapter(); // If we linked any extra device images, then we need to // cache them as well. - for (const RTDeviceBinaryImage *BImg : DeviceImagesToLink) { - // CacheKey is captured by reference by GetCachedBuildF, so we can simply - // update it here and re-use that lambda. - CacheKey.first.second = BImg->getImageID(); - bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); + auto CacheLinkedImages = [&Adapter, &Cache, &CacheKey, &ResProgram, + &DeviceImagesToLink] { + for (const RTDeviceBinaryImage *BImg : DeviceImagesToLink) { + // CacheKey is captured by reference by GetCachedBuildF, so we can simply + // update it here and re-use that lambda. + CacheKey.first.second = BImg->getImageID(); + bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); + // Add to the eviction list. + Cache.registerProgramFetch(CacheKey, ResProgram, DidInsert); + if (DidInsert) { + // For every cached copy of the program, we need to increment its + // refcount + Adapter->call(ResProgram); + } + } + }; + CacheLinkedImages(); - // Add to the eviction list. - Cache.registerProgramFetch(CacheKey, ResProgram, DidInsert); - if (DidInsert) - // For every cached copy of the program, we need to increment its refcount - Adapter->call(ResProgram); + if (URDevicesSet.size() > 1) { + // emplace all subsets of the current set of devices into the cache. + // Set of all devices is not included in the loop as it was already added + // into the cache. + for (int Mask = 1; Mask < (1 << URDevicesSet.size()) - 1; ++Mask) { + std::set Subset; + int Index = 0; + for (auto It = URDevicesSet.begin(); It != URDevicesSet.end(); + ++It, ++Index) { + if (Mask & (1 << Index)) { + Subset.insert(*It); + } + } + // Change device in the cache key to reduce copying of spec const data. + CacheKey.second = Subset; + bool DidInsert = Cache.insertBuiltProgram(CacheKey, ResProgram); + if (DidInsert) { + // For every cached copy of the program, we need to increment its + // refcount + Adapter->call(ResProgram); + } + CacheLinkedImages(); + // getOrBuild is not supposed to return nullptr + assert(BuildResult != nullptr && "Invalid build result"); + } } // If caching is enabled, one copy of the program handle will be // stored in the cache, and one handle is returned to the // caller. In that case, we need to increase the ref count of the // program. - ContextImpl->getAdapter()->call(ResProgram); + Adapter->call(ResProgram); return ResProgram; } - // When caching is enabled, the returned UrProgram and UrKernel will // already have their ref count incremented. std::tupleget_context())->getAdapter(); - // Device is not used when creating program from SPIRV, so passing only one - // device is OK. ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs); @@ -2605,160 +2644,23 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, getSyclObjImpl(DeviceImage); const context Context = InputImpl->get_context(); - const ContextImplPtr ContextImpl = getSyclObjImpl(Context); - KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); - - std::string CompileOpts; - std::string LinkOpts; - applyOptionsFromEnvironment(CompileOpts, LinkOpts); - const RTDeviceBinaryImage *ImgPtr = InputImpl->get_bin_image_ref(); const RTDeviceBinaryImage &Img = *ImgPtr; SerializedObj SpecConsts = InputImpl->get_spec_const_blob_ref(); - // TODO: Unify this code with getBuiltPIProgram - auto BuildF = [this, &Context, &Img, &Devs, &CompileOpts, &LinkOpts, - &InputImpl, SpecConsts] { - ContextImplPtr ContextImpl = getSyclObjImpl(Context); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - applyOptionsFromImage(CompileOpts, LinkOpts, Img, Devs, Adapter); - // Should always come last! - appendCompileEnvironmentVariablesThatAppend(CompileOpts); - appendLinkEnvironmentVariablesThatAppend(LinkOpts); - - // Device is not used when creating program from SPIRV, so passing only one - // device is OK. - auto [NativePrg, DeviceCodeWasInCache] = getOrCreateURProgram( - Img, {&Img}, Context, Devs, CompileOpts + LinkOpts, SpecConsts); - - if (!DeviceCodeWasInCache && - InputImpl->get_bin_image_ref()->supportsSpecConstants()) - setSpecializationConstants(InputImpl, NativePrg, Adapter); - - UrFuncInfo programReleaseInfo; - auto programRelease = - programReleaseInfo.getFuncPtrFromModule(ur::getURLoaderLibrary()); - ProgramPtr ProgramManaged(NativePrg, programRelease); - - // Link a fallback implementation of device libraries if they are not - // supported by a device compiler. - // Pre-compiled programs are supposed to be already linked. - // If device image is not SPIR-V, DeviceLibReqMask will be 0 which means - // no fallback device library will be linked. - uint32_t DeviceLibReqMask = 0; - if (Img.getFormat() == SYCL_DEVICE_BINARY_TYPE_SPIRV && - !SYCLConfig::get()) - DeviceLibReqMask = getDeviceLibReqMask(Img); - - // TODO: Add support for dynamic linking with kernel bundles - std::vector ExtraProgramsToLink; - std::vector URDevices; - for (auto Dev : Devs) { - URDevices.push_back(getSyclObjImpl(Dev).get()->getHandleRef()); - } - ProgramPtr BuiltProgram = - build(std::move(ProgramManaged), ContextImpl, CompileOpts, LinkOpts, - URDevices, DeviceLibReqMask, ExtraProgramsToLink); - - emitBuiltProgramInfo(BuiltProgram.get(), ContextImpl); - - { - std::lock_guard Lock(MNativeProgramsMutex); - NativePrograms.insert({BuiltProgram.get(), &Img}); - } - - ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &Img); - - // Save program to persistent cache if it is not there - if (!DeviceCodeWasInCache) - PersistentDeviceCodeCache::putItemToDisc( - Devs, {&Img}, SpecConsts, CompileOpts + LinkOpts, BuiltProgram.get()); - - return BuiltProgram.release(); - }; - - if (!SYCLConfig::get()) { - auto ResProgram = BuildF(); - DeviceImageImplPtr ExecImpl = std::make_shared( - InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, - InputImpl->get_kernel_ids_ptr(), ResProgram, - InputImpl->get_spec_const_data_ref(), - InputImpl->get_spec_const_blob_ref()); - - return createSyclObjFromImpl(ExecImpl); - } - - uint32_t ImgId = Img.getImageID(); - std::set URDevicesSet; - std::transform(Devs.begin(), Devs.end(), - std::inserter(URDevicesSet, URDevicesSet.begin()), - [](const device &Dev) { - return getSyclObjImpl(Dev).get()->getHandleRef(); - }); - auto CacheKey = std::make_pair(std::make_pair(std::move(SpecConsts), ImgId), - URDevicesSet); - - // CacheKey is captured by reference so when we overwrite it later we can - // reuse this function. - auto GetCachedBuildF = [&Cache, &CacheKey]() { - return Cache.getOrInsertProgram(CacheKey); - }; - - auto EvictFunc = [&Cache, &CacheKey](ur_program_handle_t Program, - bool isBuilt) { - return Cache.registerProgramFetch(CacheKey, Program, isBuilt); - }; - - auto BuildResult = - Cache.getOrBuild(GetCachedBuildF, BuildF, EvictFunc); - // getOrBuild is not supposed to return nullptr - assert(BuildResult != nullptr && "Invalid build result"); - - ur_program_handle_t ResProgram = BuildResult->Val; - - // Here we have multiple devices a program is built for, so add the program to - // the cache for all subsets of provided list of devices. - const AdapterPtr &Adapter = ContextImpl->getAdapter(); - auto CacheSubsets = [ResProgram, &Adapter]() { - Adapter->call(ResProgram); - return ResProgram; - }; - - if (URDevicesSet.size() > 1) { - // emplace all subsets of the current set of devices into the cache. - // Set of all devices is not included in the loop as it was already added - // into the cache. - for (int Mask = 1; Mask < (1 << URDevicesSet.size()) - 1; ++Mask) { - std::set Subset; - int Index = 0; - for (auto It = URDevicesSet.begin(); It != URDevicesSet.end(); - ++It, ++Index) { - if (Mask & (1 << Index)) { - Subset.insert(*It); - } - } - // Change device in the cache key to reduce copying of spec const data. - CacheKey.second = Subset; - Cache.getOrBuild(GetCachedBuildF, CacheSubsets, EvictFunc); - // getOrBuild is not supposed to return nullptr - assert(BuildResult != nullptr && "Invalid build result"); - } - } - - // devive_image_impl shares ownership of PIProgram with, at least, program - // cache. The ref counter will be descremented in the destructor of - // device_image_impl - Adapter->call(ResProgram); + // TODO: Add support for dynamic linking with kernel bundles + ur_program_handle_t ResProgram = + getBuiltURProgram(Img, Context, Devs, /*DeviceImagesToLink*/ {}, {&Img}, + InputImpl, SpecConsts); DeviceImageImplPtr ExecImpl = std::make_shared( InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, InputImpl->get_kernel_ids_ptr(), ResProgram, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref()); - return createSyclObjFromImpl(ExecImpl); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 0586a41a83540..75689aadfb290 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -147,11 +147,25 @@ class ProgramManager { const NDRDescT &NDRDesc = {}, bool JITCompilationIsRequired = false); - ur_program_handle_t getBuiltURProgram(const context &Context, - const device &Device, - const std::string &KernelName, - const property_list &PropList, - bool JITCompilationIsRequired = false); + /// Builds a program from a given set of images or retrieves that program from + /// cache. + /// \param Img is the main image the program is built with. + /// \param Context is the context the program is built for. + /// \param Devs is a vector of devices the program is built for. + /// \param DeviceImagesToLink is a set of image dependencies required by the + /// main image. + /// \param AllImages is a vector of all images the program is built with. + /// \param DeviceImageImpl is an optional device_image_impl pointer that + /// represents the main image. + /// \param SpecConsts is an optional parameter containing spec constant values + /// the program should be built with. + ur_program_handle_t getBuiltURProgram( + const RTDeviceBinaryImage &Img, const context &Context, + const std::vector &Devs, + const std::set &DeviceImagesToLink, + const std::vector &AllImages, + const std::shared_ptr &DeviceImageImpl = nullptr, + const SerializedObj &SpecConsts = {}); std::tuple