Skip to content

Commit

Permalink
Add rcp, sqrt, rsqrt
Browse files Browse the repository at this point in the history
  • Loading branch information
vosen committed Aug 21, 2024
1 parent fc713f2 commit c16bae3
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 0 deletions.
50 changes: 50 additions & 0 deletions ptx_parser/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,30 @@ gen::generate_instruction_type!(
src2: T,
}
},
Rcp {
type: { Type::from(data.type_) },
data: RcpData,
arguments<T>: {
dst: T,
src: T,
}
},
Sqrt {
type: { Type::from(data.type_) },
data: RcpData,
arguments<T>: {
dst: T,
src: T,
}
},
Rsqrt {
type: { Type::from(data.type_) },
data: RsqrtData,
arguments<T>: {
dst: T,
src: T,
}
},
Trap { }
}
);
Expand Down Expand Up @@ -1117,3 +1141,29 @@ pub struct MinMaxFloat {
pub nan: bool,
pub type_: ScalarType,
}

#[derive(Copy, Clone, Eq, PartialEq)]
pub enum DivFloatKind {
Approx,
Full,
Rounding(RoundingMode),
}

#[derive(Copy, Clone)]
pub struct RcpData {
pub kind: RcpKind,
pub flush_to_zero: Option<bool>,
pub type_: ScalarType,
}

#[derive(Copy, Clone, Eq, PartialEq)]
pub enum RcpKind {
Approx,
Full(RoundingMode),
}

#[derive(Copy, Clone)]
pub struct RsqrtData {
pub flush_to_zero: Option<bool>,
pub type_: ScalarType,
}
101 changes: 101 additions & 0 deletions ptx_parser/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2244,6 +2244,107 @@ derive_parser!(
}
ScalarType = { .f16, .f16x2, .bf16, .bf16x2 };

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rcp-approx-ftz-f64
rcp.approx{.ftz}.type d, a => {
ast::Instruction::Rcp {
data: ast::RcpData {
kind: ast::RcpKind::Approx,
flush_to_zero: Some(ftz),
type_
},
arguments: RcpArgs { dst: d, src: a }
}
}
rcp.rnd{.ftz}.f32 d, a => {
ast::Instruction::Rcp {
data: ast::RcpData {
kind: ast::RcpKind::Full(rnd.into()),
flush_to_zero: Some(ftz),
type_: f32
},
arguments: RcpArgs { dst: d, src: a }
}
}
rcp.rnd.f64 d, a => {
ast::Instruction::Rcp {
data: ast::RcpData {
kind: ast::RcpKind::Full(rnd.into()),
flush_to_zero: None,
type_: f64
},
arguments: RcpArgs { dst: d, src: a }
}
}
.type: ScalarType = { .f32, .f64 };
.rnd: RawRoundingMode = { .rn, .rz, .rm, .rp };
ScalarType = { .f32, .f64 };

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-sqrt
sqrt.approx{.ftz}.f32 d, a => {
ast::Instruction::Sqrt {
data: ast::RcpData {
kind: ast::RcpKind::Approx,
flush_to_zero: Some(ftz),
type_: f32
},
arguments: SqrtArgs { dst: d, src: a }
}
}
sqrt.rnd{.ftz}.f32 d, a => {
ast::Instruction::Sqrt {
data: ast::RcpData {
kind: ast::RcpKind::Full(rnd.into()),
flush_to_zero: Some(ftz),
type_: f32
},
arguments: SqrtArgs { dst: d, src: a }
}
}
sqrt.rnd.f64 d, a => {
ast::Instruction::Sqrt {
data: ast::RcpData {
kind: ast::RcpKind::Full(rnd.into()),
flush_to_zero: None,
type_: f64
},
arguments: SqrtArgs { dst: d, src: a }
}
}
.rnd: RawRoundingMode = { .rn, .rz, .rm, .rp };
ScalarType = { .f32, .f64 };

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-rsqrt-approx-ftz-f64
rsqrt.approx{.ftz}.f32 d, a => {
ast::Instruction::Rsqrt {
data: ast::RsqrtData {
flush_to_zero: Some(ftz),
type_: f32
},
arguments: RsqrtArgs { dst: d, src: a }
}
}
rsqrt.approx.f64 d, a => {
ast::Instruction::Rsqrt {
data: ast::RsqrtData {
flush_to_zero: None,
type_: f64
},
arguments: RsqrtArgs { dst: d, src: a }
}
}
rsqrt.approx.ftz.f64 d, a => {
ast::Instruction::Rsqrt {
data: ast::RsqrtData {
flush_to_zero: None,
type_: f64
},
arguments: RsqrtArgs { dst: d, src: a }
}
}
ScalarType = { .f32, .f64 };

// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#control-flow-instructions-ret
ret{.uni} => {
Instruction::Ret { data: RetData { uniform: uni } }
Expand Down

0 comments on commit c16bae3

Please sign in to comment.