Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SWDEV-438299 - Remove unused varibles #56

Open
wants to merge 27 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
4bc515a
SWDEV-441487 - Strix Halo Support and Strix support in staging
jiabaxie Jan 17, 2024
031addf
SWDEV-431367 - fix float compare for atomicMax/Min where -0.0 < +0.0
cjatin Nov 16, 2023
de1c5b2
SWDEV-441065 - Fix device accessable host memory copy path
Jan 18, 2024
4254e38
SWDEV-441201,SWDEV-441727 - Move BlitProgram creation after trap hand…
iassiour Jan 18, 2024
7edfd70
SWDEV-439419 - Unlock host ptr when device ptr equals host ptr
Jan 3, 2024
551cdcd
SWDEV-425605 - Add new comgr compile to reloc
cjatin Jan 18, 2024
251727e
SWDEV-422207 - Added debug env to dump graph during Instantiation
Dec 8, 2023
2856566
SWDEV-437832 - Adding device property to check if the device is accel…
kjayapra-amd Dec 15, 2023
fdcd9d5
SWDEV-442126 - Fix use_after_free case in ExtractFatBinaryUsingCOMGR
iassiour Jan 19, 2024
56ebdf2
Revert "SWDEV-425605 - Add new comgr compile to reloc"
cjatin Jan 22, 2024
77e059c
SWDEV-433312 - Return invalidArgument in cuMemSetAccess for hipMemAcc…
iassiour Jan 22, 2024
b181dbb
SWDEV-420140 - Prefer forward slashes for path seperators in compiler…
vikramRH Jan 11, 2024
4440bac
SWDEV-442421 - Fixed case where hipIpcGetMemHandle erroneously return…
iassiour Jan 22, 2024
c87ac8c
SWDEV-311271 - Add extra logic to reduce memory usage
gandryey Jan 22, 2024
1416b9f
SWDEV-433820 - Optimize queue initialization (ROCM)
AlexXAmd Nov 29, 2023
7ff3663
SWDEV-441937 - Use the disable linemarker option for generating the p…
raramakr Jan 18, 2024
7681681
SWDEV-311271 - Move mempool clean-up into hipEventSynchronize
gandryey Jan 22, 2024
b5224f2
SWDEV-425605 - Add new comgr compile to reloc
cjatin Jan 18, 2024
e657b5a
SWDEV-441482 - Add missing entry point
gandryey Jan 22, 2024
74edd40
SWDEV-425605 - remove exe_options, it was unused
cjatin Jan 19, 2024
eeece54
SWDEV-439637 - Fix undefined symbol error(github id- #3382)
Jan 3, 2024
a99b163
SWDEV-439628 - hipGraphExecKernelNodeSetParams to update graph kernel…
Jan 8, 2024
47f90e1
SWDEV-440718 - Suffix hip bin name with _6.
agunashe Jan 11, 2024
dc16383
SWDEV-440029 - Fix hipMemcpyFrom/ToSymbol directions allowed
satyanveshd Jan 22, 2024
84ff2da
SWDEV-368741 - Check for uninstalled hsa-amd-aqlprofile to avoid Segm…
Oct 4, 2023
8ff39a5
SWDEV-295298: hide warp sync builtins with a macro in ROCm 6.1
ssahasra Jan 24, 2024
bb23603
SWDEV-438299 - Remove unused variables
ipanfilo Feb 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hipamd/hip-config-amd.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ if( CLANGRT_Error )
else()
# Add support for __fp16 and _Float16, explicitly link with compiler-rt
if( "${CLANGRT_BUILTINS_FETCH_EXIT_CODE}" STREQUAL "0" )
# The HIP_CXX_COMPILER by default prefers backward slashes for path seperators on windows.
# Prefer forward slashes here to avoid escaping issues on certain build systems.
if(WIN32)
string(REPLACE "\\" "/" CLANGRT_BUILTINS ${CLANGRT_BUILTINS})
endif()

# CLANG_RT Builtins found Successfully Set interface link libraries property
set_property(TARGET hip::host APPEND PROPERTY INTERFACE_LINK_LIBRARIES "${CLANGRT_BUILTINS}")
set_property(TARGET hip::device APPEND PROPERTY INTERFACE_LINK_LIBRARIES "${CLANGRT_BUILTINS}")
Expand Down
42 changes: 34 additions & 8 deletions hipamd/include/hip/amd_detail/amd_hip_atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,11 +612,17 @@ float atomicMin(float* addr, float val) {
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
return unsafeAtomicMin(addr, val);
#else
typedef union u_hold {
float a;
unsigned int b;
} u_hold_t;
u_hold_t u{val};
bool neg_zero = 0x80000000U == u.b;
#if __has_builtin(__hip_atomic_load) && \
__has_builtin(__hip_atomic_compare_exchange_strong)
float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
bool done = false;
while (!done && value > val) {
while (!done && (value > val || (neg_zero && value == 0.0f))) {
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
Expand All @@ -625,7 +631,7 @@ float atomicMin(float* addr, float val) {
unsigned int *uaddr = (unsigned int *)addr;
unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
bool done = false;
while (!done && __uint_as_float(value) > val) {
while (!done && (__uint_as_float(value) > val || (neg_zero && __uint_as_float(value) == 0.0f))) {
done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
Expand Down Expand Up @@ -658,11 +664,17 @@ double atomicMin(double* addr, double val) {
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
return unsafeAtomicMin(addr, val);
#else
typedef union u_hold {
double a;
unsigned long long b;
} u_hold_t;
u_hold_t u{val};
bool neg_zero = 0x8000000000000000ULL == u.b;
#if __has_builtin(__hip_atomic_load) && \
__has_builtin(__hip_atomic_compare_exchange_strong)
double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
bool done = false;
while (!done && value > val) {
while (!done && (value > val || (neg_zero && value == 0.0))) {
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
Expand All @@ -671,7 +683,8 @@ double atomicMin(double* addr, double val) {
unsigned long long *uaddr = (unsigned long long *)addr;
unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
bool done = false;
while (!done && __longlong_as_double(value) > val) {
while (!done &&
(__longlong_as_double(value) > val || (neg_zero && __longlong_as_double(value) == 0.0))) {
done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
Expand Down Expand Up @@ -856,11 +869,17 @@ float atomicMax(float* addr, float val) {
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
return unsafeAtomicMax(addr, val);
#else
typedef union u_hold {
float a;
unsigned int b;
} u_hold_t;
u_hold_t u{val};
bool neg_zero = 0x80000000U == u.b;
#if __has_builtin(__hip_atomic_load) && \
__has_builtin(__hip_atomic_compare_exchange_strong)
float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
bool done = false;
while (!done && value < val) {
while (!done && (value < val || (neg_zero && value == 0.0f))) {
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
Expand All @@ -869,7 +888,7 @@ float atomicMax(float* addr, float val) {
unsigned int *uaddr = (unsigned int *)addr;
unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
bool done = false;
while (!done && __uint_as_float(value) < val) {
while (!done && (__uint_as_float(value) < val || (neg_zero && __uint_as_float(value) == 0.0f))) {
done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val), false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
Expand Down Expand Up @@ -902,11 +921,17 @@ double atomicMax(double* addr, double val) {
#if defined(__AMDGCN_UNSAFE_FP_ATOMICS__)
return unsafeAtomicMax(addr, val);
#else
typedef union u_hold {
double a;
unsigned long long b;
} u_hold_t;
u_hold_t u{val};
bool neg_zero = 0x8000000000000000ULL == u.b;
#if __has_builtin(__hip_atomic_load) && \
__has_builtin(__hip_atomic_compare_exchange_strong)
double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
bool done = false;
while (!done && value < val) {
while (!done && (value < val || (neg_zero && value == 0.0))) {
done = __hip_atomic_compare_exchange_strong(addr, &value, val,
__ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
}
Expand All @@ -915,7 +940,8 @@ double atomicMax(double* addr, double val) {
unsigned long long *uaddr = (unsigned long long *)addr;
unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
bool done = false;
while (!done && __longlong_as_double(value) < val) {
while (!done &&
(__longlong_as_double(value) < val || (neg_zero && __longlong_as_double(value) == 0.0))) {
done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val), false,
__ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
Expand Down
4 changes: 3 additions & 1 deletion hipamd/include/hip/amd_detail/amd_warp_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,16 @@ unsigned long long int __ballot64(int predicate) {
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
}

// See amd_warp_sync_functions.h for an explanation of this preprocessor flag.
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
// Since threads in a wave do not make independent progress, __activemask()
// always returns the exact active mask, i.e, all active threads in the wave.

__device__
inline
unsigned long long __activemask() {
return __ballot(true);
}
#endif // HIP_ENABLE_WARP_SYNC_BUILTINS

__device__ static inline unsigned int __lane_id() {
return __builtin_amdgcn_mbcnt_hi(
Expand Down
11 changes: 11 additions & 0 deletions hipamd/include/hip/amd_detail/amd_warp_sync_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ THE SOFTWARE.

#pragma once

// Warp sync builtins (with explicit mask argument) introduced in ROCm 6.1 as a
// preview to allow end-users to adapt to the new interface involving 64-bit
// masks. These are disabled by default, and can be enabled by setting the macro
// below. The builtins will be enabled unconditionally in ROCm 6.2.
//
// This arrangement also applies to the __activemask() builtin defined in
// amd_warp_functions.h.
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS

#if !defined(__HIPCC_RTC__)
#include "amd_warp_functions.h"
#include "hip_assert.h"
Expand Down Expand Up @@ -259,3 +268,5 @@ T __shfl_xor_sync(MaskT mask, T var, int laneMask,

#undef __hip_do_sync
#undef __hip_check_mask

#endif // HIP_ENABLE_WARP_SYNC_BUILTINS
2 changes: 2 additions & 0 deletions hipamd/include/hip/amd_detail/hip_api_trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,7 @@ typedef hipError_t (*t_hipGraphExecExternalSemaphoresWaitNodeSetParams)(hipGraph
typedef hipError_t (*t_hipGraphAddNode)(hipGraphNode_t *pGraphNode, hipGraph_t graph,
const hipGraphNode_t *pDependencies, size_t numDependencies,
hipGraphNodeParams *nodeParams);
typedef hipError_t (*t_hipExtGetLastError)();

// HIP Compiler dispatch table
struct HipCompilerDispatchTable {
Expand Down Expand Up @@ -1396,4 +1397,5 @@ struct HipDispatchTable {
t_hipGraphExecExternalSemaphoresSignalNodeSetParams hipGraphExecExternalSemaphoresSignalNodeSetParams_fn;
t_hipGraphExecExternalSemaphoresWaitNodeSetParams hipGraphExecExternalSemaphoresWaitNodeSetParams_fn;
t_hipGraphAddNode hipGraphAddNode_fn;
t_hipExtGetLastError hipExtGetLastError_fn;
};
3 changes: 3 additions & 0 deletions hipamd/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake")

if(BUILD_SHARED_LIBS)
add_library(amdhip64 SHARED)
if(WIN32)
set_target_properties(amdhip64 PROPERTIES RUNTIME_OUTPUT_NAME "amdhip64_${HIP_VERSION_MAJOR}")
endif()
# Windows doesn't have a strip utility, so CMAKE_STRIP won't be set.
if((CMAKE_BUILD_TYPE STREQUAL "Release") AND NOT ("${CMAKE_STRIP}" STREQUAL ""))
add_custom_command(TARGET amdhip64 POST_BUILD COMMAND ${CMAKE_STRIP} $<TARGET_FILE:amdhip64>)
Expand Down
1 change: 0 additions & 1 deletion hipamd/src/amdhip.def
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,6 @@ hipMemcpy2DAsync_spt
hipMemcpyFromSymbolAsync_spt
hipMemcpyToSymbolAsync_spt
hipMemcpyFromArray_spt
hipMemcpy2DToArray_spt
hipMemcpy2DFromArrayAsync_spt
hipMemcpy2DToArrayAsync_spt
hipDrvGetErrorName
Expand Down
3 changes: 3 additions & 0 deletions hipamd/src/hip_api_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ hipError_t hipGraphExternalSemaphoresWaitNodeSetParams(
hipGraphNode_t hNode, const hipExternalSemaphoreWaitNodeParams* nodeParams);
hipError_t hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList,
unsigned int numDevices, unsigned int flags);
hipError_t hipExtGetLastError();
} // namespace hip

namespace hip {
Expand Down Expand Up @@ -808,6 +809,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipCtxSetSharedMemConfig_fn = hip::hipCtxSetSharedMemConfig;
ptrDispatchTable->hipCtxSynchronize_fn = hip::hipCtxSynchronize;
ptrDispatchTable->hipDestroyExternalMemory_fn = hip::hipDestroyExternalMemory;
ptrDispatchTable->hipDestroyExternalSemaphore_fn = hip::hipDestroyExternalSemaphore;
ptrDispatchTable->hipDestroySurfaceObject_fn = hip::hipDestroySurfaceObject;
ptrDispatchTable->hipDestroyTextureObject_fn = hip::hipDestroyTextureObject;
ptrDispatchTable->hipDeviceCanAccessPeer_fn = hip::hipDeviceCanAccessPeer;
Expand Down Expand Up @@ -1224,6 +1226,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipGetStreamDeviceId_fn = hip::hipGetStreamDeviceId;
ptrDispatchTable->hipDrvGraphAddMemsetNode_fn = hip::hipDrvGraphAddMemsetNode;
ptrDispatchTable->hipGetDevicePropertiesR0000_fn = hip::hipGetDevicePropertiesR0000;
ptrDispatchTable->hipExtGetLastError_fn = hip::hipExtGetLastError;
}

#if HIP_ROCPROFILER_REGISTER > 0
Expand Down
6 changes: 3 additions & 3 deletions hipamd/src/hip_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ void setCurrentDevice(unsigned int index) {

hip::Stream* getStream(hipStream_t stream, bool wait) {
if (stream == nullptr) {
return getNullStream();
return getNullStream(wait);
} else {
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(stream);
if (wait && !(hip_stream->Flags() & hipStreamNonBlocking)) {
Expand Down Expand Up @@ -128,9 +128,9 @@ int getDeviceID(amd::Context& ctx) {
}

// ================================================================================================
hip::Stream* getNullStream() {
hip::Stream* getNullStream(bool wait ) {
Device* device = getCurrentDevice();
return device ? device->NullStream() : nullptr;
return device ? device->NullStream(wait) : nullptr;
}

hipError_t hipInit(unsigned int flags) {
Expand Down
10 changes: 7 additions & 3 deletions hipamd/src/hip_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@
namespace hip {

// ================================================================================================
hip::Stream* Device::NullStream() {
hip::Stream* Device::NullStream(bool wait) {
if (null_stream_ == nullptr) {
null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true);
}

if (null_stream_ == nullptr) {
return nullptr;
}
// Wait for all active streams before executing commands on the default
iHipWaitActiveStreams(null_stream_);
if (wait == true) {
// Wait for all active streams before executing commands on the default
iHipWaitActiveStreams(null_stream_);
}
return null_stream_;
}

Expand Down Expand Up @@ -463,6 +465,8 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_tR0600* props, int device) {
deviceProps.timelineSemaphoreInteropSupported = 0;
deviceProps.unifiedFunctionPointers = 0;

deviceProps.integrated = info.accelerator_;

*props = deviceProps;
return hipSuccess;
}
Expand Down
2 changes: 1 addition & 1 deletion hipamd/src/hip_embed_pch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ __hipRTC_header_size:
EOF

set -x
$LLVM_DIR/bin/clang -O3 --hip-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib --hip-version=4.4 -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -D__HIPCC_RTC__ -x hip $tmp/hipRTC_header.h -E -o $tmp/hiprtc &&
$LLVM_DIR/bin/clang -O3 --hip-path=$HIP_INC_DIR/.. -std=c++14 -nogpulib --hip-version=4.4 -isystem $HIP_INC_DIR -isystem $HIP_BUILD_INC_DIR -isystem $HIP_AMD_INC_DIR --cuda-device-only -D__HIPCC_RTC__ -x hip $tmp/hipRTC_header.h -E -P -o $tmp/hiprtc &&
cat $macroFile >> $tmp/hiprtc &&
$LLVM_DIR/bin/llvm-mc -o $tmp/hiprtc_header.o $tmp/hipRTC_header.mcin --filetype=obj &&
$LLVM_DIR/bin/clang $tmp/hiprtc_header.o -o $rtc_shared_lib_out -shared &&
Expand Down
9 changes: 6 additions & 3 deletions hipamd/src/hip_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ hipError_t Event::synchronize() {
event_->awaitCompletion();
}
}
// Release freed memory for all memory pools on the device
hip_device->ReleaseFreedMemory();
return hipSuccess;
}

Expand Down Expand Up @@ -443,7 +441,12 @@ hipError_t hipEventSynchronize(hipEvent_t event) {
if (hip::Stream::StreamCaptureOngoing(e->GetCaptureStream()) == true) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
HIP_RETURN(e->synchronize());

hipError_t status = e->synchronize();
// Release freed memory for all memory pools on the device
g_devices[e->deviceId()]->ReleaseFreedMemory();

HIP_RETURN(status);
}

hipError_t ihipEventQuery(hipEvent_t event) {
Expand Down
13 changes: 4 additions & 9 deletions hipamd/src/hip_fatbin.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -115,10 +115,9 @@ void ListAllDeviceWithNoCOFromBundle(const std::unordered_map<std::string,
}

hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Device*>& devices) {
amd_comgr_data_t data_object;
amd_comgr_data_t data_object {0};
amd_comgr_status_t comgr_status = AMD_COMGR_STATUS_SUCCESS;
hipError_t hip_status = hipSuccess;
amd_comgr_code_object_info_t* query_list_array = nullptr;

// If image was passed as a pointer to our hipMod* api, we can try to extract the file name
// if it was mapped by the app. Otherwise use the COMGR data API.
Expand Down Expand Up @@ -266,12 +265,6 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
fatbin_dev_info_[device->deviceId()]->program_
= new amd::Program(*(device->asContext()));
}

if ((comgr_status = amd_comgr_release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status);
return hipErrorInvalidValue;
}

} while(0);

// Clean up file and memory resouces if hip_status failed for some reason.
Expand All @@ -292,7 +285,9 @@ hipError_t FatBinaryInfo::ExtractFatBinaryUsingCOMGR(const std::vector<hip::Devi
fdesc_ = 0;
fsize_ = 0;
}
}

if (data_object.handle) {
if ((comgr_status = amd_comgr_release_data(data_object)) != AMD_COMGR_STATUS_SUCCESS) {
LogPrintfError("Releasing COMGR data failed with status %d ", comgr_status);
return hipErrorInvalidValue;
Expand Down
Loading