Skip to content

Commit

Permalink
Fixed 5xxx bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
tmrlvi committed Jan 18, 2022
1 parent 2427f08 commit 2ff278a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
2 changes: 1 addition & 1 deletion plugins/opencl/resources/kaspa-opencl.cl
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ uint32_t STATIC inline amul4bit(constant uint32_t packed_vec1[32], uint32_t pack
for (int i=0; i<QUARTER_MATRIX_SIZE; i++) {
#if __PLATFORM__ == NVIDIA_CUDA && (__COMPUTE_MAJOR__ > 6 || (__COMPUTE_MAJOR__ == 6 && __COMPUTE_MINOR__ >= 1))
asm("dp4a.u32.u32" " %0, %1, %2, %3;": "=r" (res): "r" (packed_vec1[i]), "r" (packed_vec2[i]), "r" (res));
#elif (__FORCE_AMD_V_DOT4_U32_U8__ == 1) || ((__GFXIP_MAJOR__ == 9) && (__GFXIP_MINOR__ == 6 || __GFXIP_MINOR__ == 8)) || ((__GFXIP_MAJOR__ == 10) && (__GFXIP_MINOR__ == 11 || __GFXIP_MINOR__ == 12 || __GFXIP_MINOR__ >= 3)) || __GFXIP_MAJOR__ > 10
#elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx1011__) || defined(__gfx1012__) || defined(__gfx1030__) || defined(__gfx1031__) || defined(__gfx1032__)
__asm__("v_dot4_u32_u8" " %0, %1, %2, %3;": "=v" (res): "r" (packed_vec1[i]), "r" (packed_vec2[i]), "r" (res));
#else
res += ((constant char4 *)packed_vec1)[i].x*((char4 *)packed_vec2)[i].x;
Expand Down
15 changes: 15 additions & 0 deletions plugins/opencl/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ impl OpenCLGPUWorker {
"",
)
.unwrap_or_else(|_| panic!("{}::Program::create_and_build_from_binary failed", name)),
"gfx1010" => Program::create_and_build_from_binary(
&context,
&[include_bytes!("../resources/bin/gfx1010_kaspa-opencl.bin")],
"",
)
.unwrap_or_else(|_| panic!("{}::Program::create_and_build_from_binary failed", name)),
"gfx1011" => Program::create_and_build_from_binary(
&context,
&[include_bytes!("../resources/bin/gfx1011_kaspa-opencl.bin")],
Expand Down Expand Up @@ -384,6 +390,15 @@ fn from_source(context: &Context, device: &Device, options: &str) -> Result<Prog
Err(_) => String::new(),
};

// Hack to recreate the AMD flags
compile_options += &match device.pcie_id_amd() {
Ok(_) => {
let device_name = device.name().unwrap_or_else(|_| "Unknown".into());
format!("-D OPENCL_PLATFORM_AMD -D __{}__", device_name)
},
Err(_) => String::new(),
};

info!("Build OpenCL with {}", compile_options);

Program::create_and_build_from_source(context, PROGRAM_SOURCE, compile_options.as_str())
Expand Down

0 comments on commit 2ff278a

Please sign in to comment.