From 066bd1841323a3a688f1b98f5a6dbcf10073e0d7 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 31 Oct 2024 15:14:40 +0000 Subject: [PATCH 1/7] [SYCL] Add dummy image generation for virtual functions --- .../sycl-post-link/virtual-functions/dummy.ll | 37 +++++++++++++ llvm/tools/sycl-post-link/sycl-post-link.cpp | 54 +++++++++++++++++-- 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll b/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll new file mode 100644 index 0000000000000..8361ed22c3537 --- /dev/null +++ b/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll @@ -0,0 +1,37 @@ +; RUN: sycl-post-link -split=auto -properties -S < %s -o %t.table +; RUN: FileCheck %s --input-file=%t.table --check-prefix=CHECK-TABLE +; RUN: FileCheck %s --input-file=%t_0.ll --check-prefix=CHECK-FP64-SPLIT +; RUN: FileCheck %s --input-file=%t_1.ll --check-prefix=CHECK-FP64-DUMMY +; RUN: FileCheck %s --input-file=%t_1.prop --check-prefix=CHECK-FP64-DUMMY-PROPS +; RUN: FileCheck %s --input-file=%t_2.ll --check-prefix=CHECK-FP32-SPLIT + +; CHECK-TABLE: _0.prop +; CHECK-TABLE-NEXT: _1.prop +; CHECK-TABLE-NEXT: _2.prop + +; CHECK-FP64-SPLIT: define spir_func void @bar() +; CHECK-FP32-SPLIT: define spir_func void @foo() + +; CHECK-FP64-DUMMY: define spir_func void @bar() +; CHECK-FP64-DUMMY-NEXT: entry: +; CHECK-FP64-DUMMY-NEXT: ret void + +; CHECK-FP64-DUMMY-PROPS: dummy=1 + +define spir_func void @foo() #1 { + %x = alloca float + ret void +} + +define spir_func void @bar() #1 !sycl_used_aspects !1 { + %x = alloca double + %d = load double, ptr %x + %res = fadd double %d, %d + ret void +} + +attributes #1 = { "sycl-module-id"="v.cpp" "indirectly-callable"="setA" } + +!sycl_aspects = !{!0} +!0 = !{!"fp64", i32 6} +!1 = !{i32 6} diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 3800c5875e44f..c68c5ebf46953 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -306,7 +306,8 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) { std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, - StringRef Suff, StringRef Target = "") { + StringRef Suff, StringRef Target = "", + bool IsDummy = false) { auto PropSet = computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); @@ -318,6 +319,10 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, NewSuff += Target; } + if (IsDummy) { + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, "dummy", 1); + } + std::error_code EC; std::string SCFile = makeResultFileName(".prop", I, NewSuff); raw_fd_ostream SCOut(SCFile, EC); @@ -416,7 +421,8 @@ void addTableRow(util::SimpleTable &Table, // IR component saving is skipped, and this file name is recorded as such in // the result. void saveModule(std::vector> &OutTables, - module_split::ModuleDesc &MD, int I, StringRef IRFilename) { + module_split::ModuleDesc &MD, int I, StringRef IRFilename, + bool IsDummy = false) { IrPropSymFilenameTriple BaseTriple; StringRef Suffix = getModuleSuffix(MD); MD.saveSplitInformationAsMetadata(); @@ -440,8 +446,8 @@ void saveModule(std::vector> &OutTables, GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols, EmitImportedSymbols, DeviceGlobals}; - CopyTriple.Prop = - saveModuleProperties(MD, Props, I, Suffix, OutputFile.Target); + CopyTriple.Prop = saveModuleProperties(MD, Props, I, Suffix, + OutputFile.Target, IsDummy); } addTableRow(*Table, CopyTriple); } @@ -741,6 +747,36 @@ bool isTargetCompatibleWithModule(const std::string &Target, return true; } +std::optional +makeDummy(module_split::ModuleDesc &MD) { + bool hasVirtualFunctions = false; + bool hasOptionalKernelFeatures = false; + for (Function &F : MD.getModule().functions()) { + if (F.hasFnAttribute("indirectly-callable")) + hasVirtualFunctions = true; + if (F.getMetadata("sycl_used_aspects")) + hasOptionalKernelFeatures = true; + if (hasVirtualFunctions && hasOptionalKernelFeatures) + break; + } + if (!hasVirtualFunctions || !hasOptionalKernelFeatures) + return {}; + + auto MDCopy = MD.clone(); + + for (Function &F : MDCopy.getModule().functions()) { + if (!F.hasFnAttribute("indirectly-callable")) + continue; + + F.erase(F.begin(), F.end()); + BasicBlock *newBB = BasicBlock::Create(F.getContext(), "entry", &F); + IRBuilder<> builder(newBB); + builder.CreateRetVoid(); + } + + return MDCopy; +} + std::vector> processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. @@ -893,6 +929,16 @@ processInputModule(std::unique_ptr M) { ++ID; } + + bool dummyEmitted = false; + for (module_split::ModuleDesc &IrMD : MMs) { + if (auto Dummy = makeDummy(IrMD)) { + saveModule(Tables, *Dummy, ID, OutIRFileName, /*IsDummy*/ true); + dummyEmitted = true; + } + } + if (dummyEmitted) + ++ID; } return Tables; } From 1f932ebef2f2589fd4711065ebb47ae04f0706ae Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 14 Nov 2024 00:13:57 -0800 Subject: [PATCH 2/7] [SYCL] Add runtime support for dummy images for virtual functions --- sycl/source/detail/device_binary_image.cpp | 6 +- sycl/source/detail/device_binary_image.hpp | 2 +- .../program_manager/program_manager.cpp | 18 ++- .../VirtualFunctions/RuntimeLinking.cpp | 137 +++++++++++++++++- sycl/unittests/helpers/MockDeviceImage.hpp | 2 +- 5 files changed, 149 insertions(+), 16 deletions(-) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 2be48d4a38fce..a0e099fd5f6fa 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -52,7 +52,7 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { break; } case SYCL_PROPERTY_TYPE_STRING: - Out << P.asCString(); + Out << P.asStringView(); break; default: assert(false && "Unsupported property"); @@ -77,14 +77,14 @@ ByteArray DeviceBinaryProperty::asByteArray() const { return {Data, Prop->ValSize}; } -const char *DeviceBinaryProperty::asCString() const { +std::string_view DeviceBinaryProperty::asStringView() const { assert((Prop->Type == SYCL_PROPERTY_TYPE_STRING || Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY) && "property type mismatch"); assert(Prop->ValSize > 0 && "property size mismatch"); // Byte array stores its size in first 8 bytes size_t Shift = Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY ? 8 : 0; - return ur::cast(Prop->ValAddr) + Shift; + return {ur::cast(Prop->ValAddr) + Shift, Prop->ValSize}; } void RTDeviceBinaryImage::PropertyRange::init(sycl_device_binary Bin, diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 203427b89ca45..6c773c0ea47af 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -73,7 +73,7 @@ class DeviceBinaryProperty { uint32_t asUint32() const; ByteArray asByteArray() const; - const char *asCString() const; + std::string_view asStringView() const; protected: friend std::ostream &operator<<(std::ostream &Out, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 0c188b9fef718..60f51a1cbaa3e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -671,10 +671,11 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( std::set HandledSets; std::queue WorkList; for (const sycl_device_binary_property &VFProp : Img.getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + std::string_view StrValue = DeviceBinaryProperty(VFProp).asStringView(); // Device image passed to this function is expected to contain SYCL kernels // and therefore it may only use virtual function sets, but cannot provide - // them. We expect to see just a single property here + // them. Additionally, it cannot be a dummy image. + // We expect to see just a single property here assert(std::string(VFProp->Name) == "uses-virtual-functions-set" && "Unexpected virtual function property"); for (const auto &SetName : detail::split_string(StrValue, ',')) { @@ -695,9 +696,14 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // virtual-functions-set properties, but their handling is the same: we // just grab all sets they reference and add them for consideration if // we haven't done so already. + bool isDummyImage = false; for (const sycl_device_binary_property &VFProp : BinImage->getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + if (VFProp->Name == std::string_view("dummy-image")) { + isDummyImage = true; + continue; + } + std::string_view StrValue = DeviceBinaryProperty(VFProp).asStringView(); for (const auto &SetName : detail::split_string(StrValue, ',')) { if (HandledSets.insert(SetName).second) WorkList.push(SetName); @@ -710,7 +716,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // However, if device image provides virtual function set and it is // incompatible, then we should link its "dummy" version to avoid link // errors about unresolved external symbols. - if (doesDevSupportDeviceRequirements(Dev, *BinImage)) + if (doesDevSupportDeviceRequirements(Dev, *BinImage) + isDummyImage == 1) DeviceImagesToLink.insert(BinImage); } } @@ -1797,7 +1803,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { // Record mapping between virtual function sets and device images for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { - std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); + if (VFProp->Name == std::string_view("dummy-image")) + continue; + std::string_view StrValue = DeviceBinaryProperty(VFProp).asStringView(); for (const auto &SetName : detail::split_string(StrValue, ',')) m_VFSet2BinImage[SetName].insert(Img.get()); } diff --git a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp index 61e0c5f5f32a5..aa3c1c413ea2e 100644 --- a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp +++ b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp @@ -18,6 +18,7 @@ class KernelD; class KernelE; class KernelF; class KernelG; +class KernelH; } // namespace VirtualFunctionsTest @@ -39,6 +40,7 @@ KERNEL_INFO(KernelD) KERNEL_INFO(KernelE) KERNEL_INFO(KernelF) KERNEL_INFO(KernelG) +KERNEL_INFO(KernelH) #undef KERNEL_INFO @@ -48,9 +50,13 @@ KERNEL_INFO(KernelG) static sycl::unittest::MockDeviceImage generateImage(std::initializer_list KernelNames, - const std::string &VFSets, bool UsesVFSets, unsigned char Magic) { + const std::string &VFSets, bool UsesVFSets, unsigned char Magic, + bool IsDummyImage = false, + std::vector Aspects = {}) { sycl::unittest::MockPropertySet PropSet; - std::vector Props; + + // Construct virtual function properties + std::vector VFProps; uint64_t PropSize = VFSets.size(); std::vector Storage(/* bytes for size */ 8 + PropSize + /* null terminator */ 1); @@ -64,9 +70,22 @@ generateImage(std::initializer_list KernelNames, sycl::unittest::MockProperty Prop(PropName, Storage, SYCL_PROPERTY_TYPE_BYTE_ARRAY); - Props.push_back(Prop); - PropSet.insert(__SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS, std::move(Props)); + VFProps.push_back(Prop); + if (IsDummyImage) + VFProps.emplace_back("dummy-image", std::vector(4), + SYCL_PROPERTY_TYPE_UINT32); + + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_VIRTUAL_FUNCTIONS, + std::move(VFProps)); + + // Construct device requirement properties + std::vector DeviceRequirmentsProps; + DeviceRequirmentsProps.emplace_back(sycl::unittest::makeAspectsProp(Aspects)); + PropSet.insert(__SYCL_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS, + std::move(DeviceRequirmentsProps)); + + // Assemble final device image std::vector Bin{Magic}; std::vector Entries = @@ -99,6 +118,9 @@ static constexpr unsigned PROGRAM_E0 = 37; static constexpr unsigned PROGRAM_F = 41; static constexpr unsigned PROGRAM_F0 = 47; static constexpr unsigned PROGRAM_F1 = 53; +static constexpr unsigned PROGRAM_H = 59; +static constexpr unsigned PROGRAM_H0 = 61; +static constexpr unsigned PROGRAM_H0d = 67; // Device images with no entires are ignored by SYCL RT during registration. // Therefore, we have to provide some kernel names to make the test work, even @@ -128,10 +150,16 @@ static sycl::unittest::MockDeviceImage Imgs[] = { generateImage({"KernelF"}, "set-f", /* uses vf set */ true, PROGRAM_F), generateImage({"DummyKernel7"}, "set-f", /* provides vf set */ false, PROGRAM_F0), - generateImage({"KernelG"}, "set-f", /* uses vf set */ true, PROGRAM_F1)}; + generateImage({"KernelG"}, "set-f", /* uses vf set */ true, PROGRAM_F1), + generateImage({"KernelH"}, "set-h", /* uses vf set */ true, PROGRAM_H, + false, {}), + generateImage({"DummyKernel7"}, "set-h", /* provides vf set */ false, + PROGRAM_H0, false, {sycl::aspect::fp64}), + generateImage({"DummyKernel7d"}, "set-h", /* provides vf set */ false, + PROGRAM_H0d, true, {sycl::aspect::fp64})}; // Registers mock devices images in the SYCL RT -static sycl::unittest::MockDeviceImageArray<15> ImgArray{Imgs}; +static sycl::unittest::MockDeviceImageArray ImgArray{Imgs}; TEST(VirtualFunctions, SingleKernelUsesSingleVFSet) { sycl::unittest::UrMock<> Mock; @@ -262,4 +290,101 @@ TEST(VirtualFunctions, TwoKernelsShareTheSameSet) { PROGRAM_F * PROGRAM_F0 * PROGRAM_F1); } +struct MockDeviceData { + std::string Extensions; + ur_device_handle_t getHandle() { + return reinterpret_cast(this); + } + static MockDeviceData *fromHandle(ur_device_handle_t handle) { + return reinterpret_cast(handle); + } +}; + +MockDeviceData MockDevices[] = { + {"cl_khr_fp64"}, + {""}, +}; + +static ur_result_t redefinedDeviceGet(void *pParams) { + auto params = *static_cast(pParams); + if (*params.ppNumDevices) { + **params.ppNumDevices = static_cast(std::size(MockDevices)); + return UR_RESULT_SUCCESS; + } + + if (*params.pphDevices) { + assert(*params.pNumEntries <= std::size(MockDevices)); + for (uint32_t i = 0; i < *params.pNumEntries; ++i) { + (*params.pphDevices)[i] = MockDevices[i].getHandle(); + } + } + + return UR_RESULT_SUCCESS; +} + +static ur_result_t redefinedDeviceGetInfo(void *pParams) { + auto *params = reinterpret_cast(pParams); + if (*params->ppropName == UR_DEVICE_INFO_EXTENSIONS) { + const std::string &Extensions = + MockDeviceData::fromHandle(*params->phDevice)->Extensions; + if (*params->ppPropValue) { + assert(*params->ppropSize >= Extensions.size() + 1); + std::memcpy(*params->ppPropValue, Extensions.data(), + Extensions.size() + 1); + } + if (*params->ppPropSizeRet && + **params->ppPropSizeRet < Extensions.size() + 1) + **params->ppPropSizeRet = Extensions.size() + 1; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_SUCCESS; +} + +TEST(VirtualFunctions, DummyImages) { + sycl::unittest::UrMock<> Mock; + setupRuntimeLinkingMock(); + mock::getCallbacks().set_after_callback("urDeviceGet", &redefinedDeviceGet); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + &redefinedDeviceGetInfo); + + sycl::platform Plt = sycl::platform(); + sycl::queue Q(sycl::aspect_selector({sycl::aspect::fp64})); + EXPECT_TRUE(Q.get_device().has(sycl::aspect::fp64)); + + CapturedLinkingData.clear(); + + // KernelF uses set "set-h" that is also used by KernelG + Q.single_task([=]() {}); + // When we submit this kernel, we expect that two programs were created (one + // for KernelH, another providing "set-h" + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); + // Both programs should be linked together. + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); + // The module providing set-h is set up to use fp64, + // and since the device support fp64, we link the + // non-dummy version that provides set-h. + EXPECT_TRUE( + CapturedLinkingData.LinkedProgramsContains({PROGRAM_H, PROGRAM_H0})); + EXPECT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, + PROGRAM_H * PROGRAM_H0); + + CapturedLinkingData.clear(); + + EXPECT_EQ(Plt.get_devices().size(), 2); + sycl::queue Q2(sycl::aspect_selector({}, {sycl::aspect::fp64})); + + // We now repeat what we did launching KernelH but on another + // device that does not support fp64. + Q2.single_task([=]() {}); + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramCreateCalls, 2u); + EXPECT_EQ(CapturedLinkingData.NumOfUrProgramLinkCalls, 1u); + + // However, this time, we expect the dummy image to be linked + // as the device does not support fp64. + EXPECT_TRUE( + CapturedLinkingData.LinkedProgramsContains({PROGRAM_H, PROGRAM_H0d})); + EXPECT_EQ(CapturedLinkingData.ProgramUsedToCreateKernel, + PROGRAM_H * PROGRAM_H0d); +} + // TODO: Add test cases for kernel_bundle usage diff --git a/sycl/unittests/helpers/MockDeviceImage.hpp b/sycl/unittests/helpers/MockDeviceImage.hpp index fea80d6b08c3e..0c2d375c3e810 100644 --- a/sycl/unittests/helpers/MockDeviceImage.hpp +++ b/sycl/unittests/helpers/MockDeviceImage.hpp @@ -558,7 +558,7 @@ inline MockProperty makeAspectsProp(const std::vector &Aspects) { uint64_t ValDataSize = ValData.size(); std::uninitialized_copy(&ValDataSize, &ValDataSize + sizeof(uint64_t), ValData.data()); - auto *AspectsPtr = reinterpret_cast(&Aspects[0]); + auto *AspectsPtr = reinterpret_cast(Aspects.data()); std::uninitialized_copy(AspectsPtr, AspectsPtr + Aspects.size(), ValData.data() + BYTES_FOR_SIZE); return {"aspects", ValData, SYCL_PROPERTY_TYPE_BYTE_ARRAY}; From 85de9eee6185f9bfa11849f70b9907afa65d0ac7 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 14 Nov 2024 15:38:40 +0000 Subject: [PATCH 3/7] Add comment --- .../detail/program_manager/program_manager.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 60f51a1cbaa3e..b9abc06ae566e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -710,12 +710,23 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( } } - // TODO: Complete this part about handling of incompatible device images. // If device image uses the same virtual function set, then we only // link it if it is compatible. // However, if device image provides virtual function set and it is // incompatible, then we should link its "dummy" version to avoid link // errors about unresolved external symbols. + // Note: we only link when exactly one of + // doesDevSupportDeviceRequirements(Dev, *BinImage) and + // isDummyImage is true. We don't want to link every dummy image, + // otherwise we could run into linking errors defining the same symbol + // multiple times. For every image providing virtual functions that has + // a dummy image, the dummy image will have the same device requirements + // as the original image. So when the dummy image does support the + // device requirements, we know that the corresponding image providing + // actual definitions will be linked and not the dummy. And vice versa: + // when the dummy image does not support the device requirements, we + // know the corresponding image providing virtual functions was not + // linked and we must link the dummy image. if (doesDevSupportDeviceRequirements(Dev, *BinImage) + isDummyImage == 1) DeviceImagesToLink.insert(BinImage); } From 0bb5394270c44c504168ef8c774a41298268f6bf Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 14 Nov 2024 15:40:23 +0000 Subject: [PATCH 4/7] Add comment --- sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp index aa3c1c413ea2e..82cdb2c622fa8 100644 --- a/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp +++ b/sycl/unittests/Extensions/VirtualFunctions/RuntimeLinking.cpp @@ -154,9 +154,9 @@ static sycl::unittest::MockDeviceImage Imgs[] = { generateImage({"KernelH"}, "set-h", /* uses vf set */ true, PROGRAM_H, false, {}), generateImage({"DummyKernel7"}, "set-h", /* provides vf set */ false, - PROGRAM_H0, false, {sycl::aspect::fp64}), + PROGRAM_H0, /* isDummy */ false, {sycl::aspect::fp64}), generateImage({"DummyKernel7d"}, "set-h", /* provides vf set */ false, - PROGRAM_H0d, true, {sycl::aspect::fp64})}; + PROGRAM_H0d, /* isDummy */ true, {sycl::aspect::fp64})}; // Registers mock devices images in the SYCL RT static sycl::unittest::MockDeviceImageArray ImgArray{Imgs}; From 132793913f8673deab325c2e6c40a8735961df46 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 22 Nov 2024 14:36:57 -0800 Subject: [PATCH 5/7] [SYCL] Add e2e test for optional kernel features and virtual functions --- .../include/llvm/SYCLLowerIR/ModuleSplitter.h | 4 + llvm/lib/SYCLLowerIR/ModuleSplitter.cpp | 9 ++ .../SYCLLowerIR/SYCLPropagateAspectsUsage.cpp | 16 +++ .../sycl-post-link/virtual-functions/dummy.ll | 2 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 114 ++++++++++++------ sycl/source/detail/device_binary_image.cpp | 45 ++++++- .../program_manager/program_manager.cpp | 7 +- .../vf-optional-kernel-features.cpp | 68 +++++++++++ 8 files changed, 227 insertions(+), 38 deletions(-) create mode 100644 sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp diff --git a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h index e622db50dd364..1f6ecb54a0cce 100644 --- a/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h +++ b/llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h @@ -130,6 +130,7 @@ class ModuleDesc { EntryPointGroup EntryPoints; bool IsTopLevel = false; mutable std::optional Reqs; + bool IsDummyImage = false; public: struct Properties { @@ -225,6 +226,9 @@ class ModuleDesc { void saveSplitInformationAsMetadata(); + ModuleDesc makeDummy() const; + bool isDummyImage() { return IsDummyImage; } + #ifndef NDEBUG void verifyESIMDProperty() const; void dump() const; diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index 904424f93dae6..ade3955600564 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -816,6 +816,15 @@ void ModuleDesc::saveSplitInformationAsMetadata() { SpecConstantsPass::SPEC_CONST_DEFAULT_VAL_MODULE_MD_STRING); } +ModuleDesc ModuleDesc::makeDummy() const { + ModuleDesc MD(CloneModule(getModule())); + MD.EntryPoints = EntryPoints; + MD.IsTopLevel = IsTopLevel; + MD.Reqs = Reqs; + MD.IsDummyImage = true; + return MD; +} + void EntryPointGroup::saveNames(std::vector &Dest) const { Dest.reserve(Dest.size() + Functions.size()); std::transform(Functions.begin(), Functions.end(), diff --git a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp index 126a03bdf03bf..122fc10bfd6e6 100644 --- a/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLPropagateAspectsUsage.cpp @@ -665,6 +665,22 @@ void processDeclaredVirtualFunctionSets( StringMap> &VirtualFunctionSets) { if (!F->hasFnAttribute("calls-indirectly")) return; + + // "Construction" kernels which reference the vtable + // can be marked with calls-indirectly attribute by SYCLVirtualFunctionAnalysis pass. + bool hasVirtualCall = false; + for (const Instruction &I : instructions(F)) { + const auto *CI = dyn_cast(&I); + if (!CI) + continue; + if (CI->isIndirectCall() && CI->hasFnAttr("virtual-call")) { + hasVirtualCall = true; + break; + } + } + if (!hasVirtualCall) + return; + Attribute CallsIndirectlyAttr = F->getFnAttribute("calls-indirectly"); SmallVector DeclaredVirtualFunctionSetNames; CallsIndirectlyAttr.getValueAsString().split(DeclaredVirtualFunctionSetNames, diff --git a/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll b/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll index 8361ed22c3537..c2f7a3f81f85b 100644 --- a/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll +++ b/llvm/test/tools/sycl-post-link/virtual-functions/dummy.ll @@ -16,7 +16,7 @@ ; CHECK-FP64-DUMMY-NEXT: entry: ; CHECK-FP64-DUMMY-NEXT: ret void -; CHECK-FP64-DUMMY-PROPS: dummy=1 +; CHECK-FP64-DUMMY-PROPS: dummy-image=1 define spir_func void @foo() #1 { %x = alloca float diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 28a644aad1873..7edaf6fab8228 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -58,6 +58,7 @@ #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include @@ -295,18 +296,41 @@ void saveModuleIR(Module &M, StringRef OutFilename) { MPM.run(M, MAM); } -std::string saveModuleIR(Module &M, int I, StringRef Suff) { - DUMP_ENTRY_POINTS(M, EmitOnlyKernelsAsEntryPoints, "saving IR"); +std::unique_ptr makeDummyImageIR(const Module &M) { + auto MCopy = CloneModule(M); + for (Function &F : MCopy->functions()) { + if (!F.hasFnAttribute("indirectly-callable")) + continue; + + F.erase(F.begin(), F.end()); + BasicBlock *newBB = BasicBlock::Create(F.getContext(), "entry", &F); + IRBuilder<> builder(newBB); + if (F.getReturnType()->isVoidTy()) + builder.CreateRetVoid(); + else + builder.CreateRet(UndefValue::get(F.getReturnType())); + } + return MCopy; +} + +std::string saveModuleIR(module_split::ModuleDesc &MD, int I, StringRef Suff) { + std::unique_ptr Storage; + Module *M = &MD.getModule(); + if (MD.isDummyImage()) { + Storage = makeDummyImageIR(MD.getModule()); + M = Storage.get(); + } + + DUMP_ENTRY_POINTS(*M, EmitOnlyKernelsAsEntryPoints, "saving IR"); StringRef FileExt = (OutputAssembly) ? ".ll" : ".bc"; std::string OutFilename = makeResultFileName(FileExt, I, Suff); - saveModuleIR(M, OutFilename); + saveModuleIR(*M, OutFilename); return OutFilename; } std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, - StringRef Suff, StringRef Target = "", - bool IsDummy = false) { + StringRef Suff, StringRef Target = "") { auto PropSet = computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); @@ -318,9 +342,8 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, NewSuff += Target; } - if (IsDummy) { - PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, "dummy", 1); - } + if (MD.isDummyImage()) + PropSet.add(PropSetRegTy::SYCL_VIRTUAL_FUNCTIONS, "dummy-image", 1); std::error_code EC; std::string SCFile = makeResultFileName(".prop", I, NewSuff); @@ -420,8 +443,7 @@ void addTableRow(util::SimpleTable &Table, // IR component saving is skipped, and this file name is recorded as such in // the result. void saveModule(std::vector> &OutTables, - module_split::ModuleDesc &MD, int I, StringRef IRFilename, - bool IsDummy = false) { + module_split::ModuleDesc &MD, int I, StringRef IRFilename) { IrPropSymFilenameTriple BaseTriple; StringRef Suffix = getModuleSuffix(MD); MD.saveSplitInformationAsMetadata(); @@ -430,7 +452,7 @@ void saveModule(std::vector> &OutTables, BaseTriple.Ir = IRFilename.str(); } else { MD.cleanup(); - BaseTriple.Ir = saveModuleIR(MD.getModule(), I, Suffix); + BaseTriple.Ir = saveModuleIR(MD, I, Suffix); } if (DoSymGen) { // save the names of the entry points - the symbol table @@ -445,8 +467,8 @@ void saveModule(std::vector> &OutTables, GlobalBinImageProps Props = {EmitKernelParamInfo, EmitProgramMetadata, EmitExportedSymbols, EmitImportedSymbols, DeviceGlobals}; - CopyTriple.Prop = saveModuleProperties(MD, Props, I, Suffix, - OutputFile.Target, IsDummy); + CopyTriple.Prop = + saveModuleProperties(MD, Props, I, Suffix, OutputFile.Target); } addTableRow(*Table, CopyTriple); } @@ -746,11 +768,10 @@ bool isTargetCompatibleWithModule(const std::string &Target, return true; } -std::optional -makeDummy(module_split::ModuleDesc &MD) { +bool hasVirtualFunctionsAndOptionalKernelFeatures(const Module &M) { bool hasVirtualFunctions = false; bool hasOptionalKernelFeatures = false; - for (Function &F : MD.getModule().functions()) { + for (const Function &F : M.functions()) { if (F.hasFnAttribute("indirectly-callable")) hasVirtualFunctions = true; if (F.getMetadata("sycl_used_aspects")) @@ -758,24 +779,42 @@ makeDummy(module_split::ModuleDesc &MD) { if (hasVirtualFunctions && hasOptionalKernelFeatures) break; } - if (!hasVirtualFunctions || !hasOptionalKernelFeatures) - return {}; - - auto MDCopy = MD.clone(); - - for (Function &F : MDCopy.getModule().functions()) { - if (!F.hasFnAttribute("indirectly-callable")) - continue; - - F.erase(F.begin(), F.end()); - BasicBlock *newBB = BasicBlock::Create(F.getContext(), "entry", &F); - IRBuilder<> builder(newBB); - builder.CreateRetVoid(); - } - - return MDCopy; + return hasVirtualFunctions && hasOptionalKernelFeatures; } +// std::optional +// makeDummy(module_split::ModuleDesc &MD) { +// bool hasVirtualFunctions = false; +// bool hasOptionalKernelFeatures = false; +// for (Function &F : M.functions()) { +// if (F.hasFnAttribute("indirectly-callable")) +// hasVirtualFunctions = true; +// if (F.getMetadata("sycl_used_aspects")) +// hasOptionalKernelFeatures = true; +// if (hasVirtualFunctions && hasOptionalKernelFeatures) +// break; +// } +// if (!hasVirtualFunctions || !hasOptionalKernelFeatures) +// return {}; + +// auto MDCopy = MD.clone(); + +// for (Function &F : MDCopy.getModule().functions()) { +// if (!F.hasFnAttribute("indirectly-callable")) +// continue; + +// F.erase(F.begin(), F.end()); +// BasicBlock *newBB = BasicBlock::Create(F.getContext(), "entry", &F); +// IRBuilder<> builder(newBB); +// if (F.getReturnType()->isVoidTy()) +// builder.CreateRetVoid(); +// else +// builder.CreateRet(UndefValue::get(F.getReturnType())); +// } + +// return MDCopy; +// } + std::vector> processInputModule(std::unique_ptr M) { // Construct the resulting table which will accumulate all the outputs. @@ -924,11 +963,16 @@ processInputModule(std::unique_ptr M) { ++ID; } + // For kernels with virtual functions and optional kernel features, generate + // a dummy image to avoid link errors. A dummy image for a set of virtual + // functions is a module with the same set of virtual functions, but with + // those function bodies replaced with just a return. bool dummyEmitted = false; for (module_split::ModuleDesc &IrMD : MMs) { - if (auto Dummy = makeDummy(IrMD)) { - saveModule(Tables, *Dummy, ID, OutIRFileName, /*IsDummy*/ true); - dummyEmitted = true; + if ((dummyEmitted = hasVirtualFunctionsAndOptionalKernelFeatures( + IrMD.getModule()))) { + auto DummyImage = IrMD.makeDummy(); + saveModule(Tables, DummyImage, ID, OutIRFileName); } } if (dummyEmitted) diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index a0e099fd5f6fa..41a5088c8ea39 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -20,6 +20,35 @@ namespace sycl { inline namespace _V1 { namespace detail { +void printAspects(std::ostream &Out, ByteArray BA) { + BA.dropBytes(8); + Out << "["; + for (int i = 0; !BA.empty(); ++i) { + auto Aspect = BA.consume(); + switch (Aspect) { +#define __SYCL_ASPECT(ASPECT, ID) \ + case sycl::aspect::ASPECT: \ + Out << #ASPECT; \ + break; +#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) \ + case sycl::aspect::ASPECT: \ + Out << #ASPECT; \ + break; +#include +#include +#undef __SYCL_ASPECT +#undef __SYCL_ASPECT_DEPRECATED + default: + Out << "unknown (" << static_cast(Aspect) << ")"; + break; + } + if (i != 0) + Out << ", "; + } + Out << "]"; + return; +} + std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { switch (P.Prop->Type) { case SYCL_PROPERTY_TYPE_UINT32: @@ -42,6 +71,20 @@ std::ostream &operator<<(std::ostream &Out, const DeviceBinaryProperty &P) { Out << P.asUint32(); break; case SYCL_PROPERTY_TYPE_BYTE_ARRAY: { + // Special case for aspects, print out the aspect names + if (P.Prop->Name == std::string_view("aspects")) { + printAspects(Out, P.asByteArray()); + break; + } + + // Special case for these properties, print out the value as a string + if (P.Prop->Name == std::string_view("virtual-functions-set") || + P.Prop->Name == std::string_view("uses-virtual-functions-set")) { + Out << P.asStringView(); + break; + } + + // Otherwise, print out the byte array as hex ByteArray BA = P.asByteArray(); std::ios_base::fmtflags FlagsBackup = Out.flags(); Out << std::hex; @@ -84,7 +127,7 @@ std::string_view DeviceBinaryProperty::asStringView() const { assert(Prop->ValSize > 0 && "property size mismatch"); // Byte array stores its size in first 8 bytes size_t Shift = Prop->Type == SYCL_PROPERTY_TYPE_BYTE_ARRAY ? 8 : 0; - return {ur::cast(Prop->ValAddr) + Shift, Prop->ValSize}; + return {ur::cast(Prop->ValAddr) + Shift, Prop->ValSize - Shift}; } void RTDeviceBinaryImage::PropertyRange::init(sycl_device_binary Bin, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 639359a5a87dd..e73dbc371b432 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -732,8 +732,10 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // when the dummy image does not support the device requirements, we // know the corresponding image providing virtual functions was not // linked and we must link the dummy image. - if (doesDevSupportDeviceRequirements(Dev, *BinImage) + isDummyImage == 1) + if (doesDevSupportDeviceRequirements(Dev, *BinImage) + isDummyImage == + 1) { DeviceImagesToLink.insert(BinImage); + } } } @@ -1765,6 +1767,9 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { else Img = std::make_unique(RawImg); + if (std::getenv("SYCL_PRINT_IMAGES")) + Img->print(); + static uint32_t SequenceID = 0; // Fill the kernel argument mask map diff --git a/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp new file mode 100644 index 0000000000000..a86bc8153f71e --- /dev/null +++ b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp @@ -0,0 +1,68 @@ +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +struct set_fp64; + +struct Base { + bool fooCalled = false; + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(syclext::indirectly_callable) + virtual void foo() { fooCalled = true; } + + bool barCalled = false; + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(syclext::indirectly_callable_in) + virtual void bar() { + // this virtual function uses double + volatile double d = 3.14; + barCalled = true; + } +}; + +class Constructor; +class Use; +class UseFP64; + +int main() { + // Selected device may not support 'fp64' aspect + sycl::queue Q; + + Base *Obj = sycl::malloc_device(1, Q); + + Q.single_task([=]() { + // Even though at LLVM IR level this kernel does reference 'Base::foo' + // and 'Base::bar' through global variable containing `vtable` for `Base`, + // we do not consider the kernel to be using `fp64` optional feature. + new (Obj) Base; + }).wait(); + + Q.single_task(syclext::properties{syclext::assume_indirect_calls}, [=]() { + // This kernel is not considered to be using any optional features, because + // virtual functions in default set do not use any. + Obj->foo(); + }).wait(); + + if (Q.get_device().has(sycl::aspect::fp64)) { + Q.single_task(syclext::properties{syclext::assume_indirect_calls_to}, + [=]() { + // This kernel is considered to be using 'fp64' optional feature, because + // there is a virtual function in 'set_fp64' which uses double. + Obj->bar(); + }).wait(); + } + + int nfails = 0; + if (!Obj->fooCalled) { + std::cerr << "Error: 'foo' was not called\n"; + ++nfails; + } + if (Q.get_device().has(sycl::aspect::fp64) && !Obj->barCalled) { + std::cerr << "Error: 'bar' was not called\n"; + ++nfails; + } + if (!Q.get_device().has(sycl::aspect::fp64) && Obj->barCalled) { + std::cerr << "Error: 'bar' was called, but should not have been\n"; + ++nfails; + } + + return 0; +} \ No newline at end of file From 11dba6b70d90eec05151936add1ee303e31cca5e Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 22 Nov 2024 22:40:53 +0000 Subject: [PATCH 6/7] Add run lines --- .../VirtualFunctions/vf-optional-kernel-features.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp index a86bc8153f71e..83cdfc7b5a94e 100644 --- a/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp +++ b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp @@ -1,4 +1,7 @@ -#include +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +#include +#include namespace syclext = sycl::ext::oneapi::experimental; From bbde10204dd4fb39844f76d07650f0c7eaf4f31e Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 26 Nov 2024 13:23:47 -0800 Subject: [PATCH 7/7] malloc_device -> malloc_shared --- sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp index 83cdfc7b5a94e..5a1dfbc0912a6 100644 --- a/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp +++ b/sycl/test-e2e/VirtualFunctions/vf-optional-kernel-features.cpp @@ -29,7 +29,7 @@ int main() { // Selected device may not support 'fp64' aspect sycl::queue Q; - Base *Obj = sycl::malloc_device(1, Q); + Base *Obj = sycl::malloc_shared(1, Q); Q.single_task([=]() { // Even though at LLVM IR level this kernel does reference 'Base::foo'