Skip to content

Commit

Permalink
workaround bug around inline asm
Browse files Browse the repository at this point in the history
  • Loading branch information
kali committed Jan 31, 2024
1 parent d3991af commit c057192
Showing 1 changed file with 8 additions and 12 deletions.
20 changes: 8 additions & 12 deletions linalg/src/arm64/arm64fp16/max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ reduce_impl_wrap!(
unsafe fn run(buf: &[f16]) -> f16 {
let len = buf.len();
let ptr = buf.as_ptr();
let mut out: u16 = f16::MIN.to_bits();
/*
let mut out: u16;
std::arch::asm!("
ins v0.h[0], {min:w}
dup v0.8h, v0.h[0]
dup v1.8h, v0.h[0]
dup v2.8h, v0.h[0]
Expand All @@ -36,17 +37,12 @@ reduce_impl_wrap!(
fmax v2.8h, v2.8h, v3.8h
fmax v0.8h, v0.8h, v2.8h
fmaxv h0, v0.8h
*/
std::arch::asm!("
dup v0.8h, v0.h[0]
2:
subs {len}, {len}, 32
bne 2b
",
// using v0 as inout triggers https://github.com/rust-lang/rust/issues/120374
min = in(reg) f16::MIN.to_bits(),
ptr = inout(reg) ptr => _,
len = inout(reg) len => _,
// ptr = inout(reg) ptr => _,
inout("v0") out, out("v1") _, out("v2") _, out("v3") _,
out("v0") out, out("v1") _, out("v2") _, out("v3") _,
out("v4") _, out("v5") _, out("v6") _, out("v7") _,);
f16::from_bits(out)
}
Expand All @@ -59,7 +55,7 @@ reduce_impl_wrap!(
);

#[cfg(test)]
mod test_arm64simd_max_f16_32n {
mod test_arm64fp16_max_f16_32n {
use super::*;
max_frame_tests!(crate::arm64::has_fp16(), f16, arm64fp16_max_f16_32n);
}

0 comments on commit c057192

Please sign in to comment.