Skip to content

Commit

Permalink
Update PE skeleton
Browse files Browse the repository at this point in the history
  • Loading branch information
minseongg committed Nov 1, 2024
1 parent 5c048b8 commit 2f20e3c
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 51 deletions.
128 changes: 83 additions & 45 deletions hazardflow-designs/src/gemmini/execute/systolic_array/pe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,86 +8,84 @@ use super::*;
#[derive(Debug, Clone, Copy)]
pub struct PeRowData {
/// A.
///
/// Represents the activation value.
pub a: U<INPUT_BITS>,
}

/// PE column data signals.
#[derive(Debug, Clone, Copy)]
pub struct PeColData {
/// B.
///
/// Represents the weight value (in OS dataflow) or the above PE's MAC result (in WS dataflow).
pub b: U<OUTPUT_BITS>,

/// D.
///
/// Represents the preloading bias value (in OS dataflow) or the preloading weight value (in WS dataflow).
pub d: U<OUTPUT_BITS>,
}

/// PE column control signals.
///
/// NOTE: The column data and control signals should be separated to handle the `flush` operation.
/// <https://github.com/ucb-bar/gemmini/blob/be2e9f26181658895ebc7ca7f7d6be6210f5cdef/src/main/scala/gemmini/ExecuteController.scala#L189-L207>
#[derive(Debug, Clone, Copy)]
pub struct PeColControl {
/// ID.
/// Identifier for the matrix multiplication operation (not used in the PE logic).
pub id: U<ID_BITS>,

/// Is this last row?
/// Indicates whether the current row is the last row (not used in the PE logic).
pub last: bool,

/// PE control signals.
pub control: PeControl,
}

/// Represents which register to use to preload the value.
#[derive(Debug, Default, Clone, Copy, HEq)]
pub enum Propagate {
/// Use `Reg1` for preloading and `Reg2` for computation.
#[default]
Reg1,
/// PE control signals.
#[derive(Debug, Clone, Copy)]
pub struct PeControl {
/// Represents the dataflow.
pub dataflow: Dataflow,

/// Use `Reg2` for preloading and `Reg1` for computation.
Reg2,
/// Indicates which register to use for preloading the value.
pub propagate: Propagate,

/// The number of bits by which the accumulated result of matrix multiplication is right-shifted when leaving the
/// systolic array, used to scale down the result.
pub shift: U<5>,
}

/// Represents the dataflow.
#[derive(Debug, Default, Clone, Copy, HEq)]
pub enum Dataflow {
/// Output Stationary.
/// Output stationary.
#[default]
OS,

/// Weight Stationary.
/// Weight stationary.
WS,
}

impl From<U<1>> for Dataflow {
fn from(value: U<1>) -> Self {
Dataflow::from(value[0])
}
}

impl From<bool> for Dataflow {
fn from(value: bool) -> Self {
match value {
false => Self::OS,
true => Self::WS,
}
}
}

/// PE control data.
#[derive(Debug, Clone, Copy)]
pub struct PeControl {
/// Dataflow.
pub dataflow: Dataflow,

/// Propagate.
pub propagate: Propagate,
/// Indicates which register to use for preloading the value.
#[derive(Debug, Default, Clone, Copy, HEq)]
pub enum Propagate {
/// Use register 1 for preloading (and register 2 for the MAC unit input).
#[default]
Reg1,

/// Shift.
pub shift: U<5>,
/// Use register 2 for preloading (and register 1 for the MAC unit input).
Reg2,
}

/// PE state.
/// PE state registers.
///
/// Each register stores values based on the dataflow and propagate signal:
///
/// - WS dataflow, preload: weight value for the next operation.
/// - WS dataflow, compute: weight value for the current operation.
/// - OS dataflow, preload: bias value for the next operation.
/// - OS dataflow, compute: partial sum value for the current operation.
///
/// NOTE: In OS dataflow, it outputs the matmul result when a change in the propagate value is detected.
#[derive(Debug, Default, Clone, Copy)]
pub struct PeS {
/// Register 1.
Expand All @@ -96,7 +94,9 @@ pub struct PeS {
/// Register 2.
pub reg2: U<32>,

/// Propagate.
/// The propagate value comes from the previous input.
///
/// NOTE: In the PE logic, it is only used to check whether the current propagate value differs from the previous one.
pub propagate: Propagate,
}

Expand All @@ -105,20 +105,58 @@ impl PeS {
pub fn new(reg1: U<32>, reg2: U<32>, propagate: Propagate) -> Self {
Self { reg1, reg2, propagate }
}

/// Creates a new PE state for OS dataflow.
///
/// # Arguments
///
/// - `preload`: Bias value for the next operation.
/// - `partial_sum`: MAC result of the current operation.
/// - `propagate`: Propagate value.
pub fn new_os(preload: U<32>, partial_sum: U<32>, propagate: Propagate) -> Self {
match propagate {
Propagate::Reg1 => PeS::new(preload, partial_sum, propagate),
Propagate::Reg2 => PeS::new(partial_sum, preload, propagate),
}
}

/// Creates a new PE state for WS dataflow.
///
/// # Arguments
///
/// - `preload`: Weight value for the next operation.
/// - `weight`: Weight value for the current operation.
/// - `propagate`: Propagate value.
pub fn new_ws(preload: U<32>, weight: U<32>, propagate: Propagate) -> Self {
match propagate {
Propagate::Reg1 => PeS::new(preload, weight, propagate),
Propagate::Reg2 => PeS::new(weight, preload, propagate),
}
}
}

/// MAC unit (computes `a * b + c`).
///
/// It preserves the signedness of operands.
fn mac(a: U<8>, b: U<8>, c: U<32>) -> U<OUTPUT_BITS> {
todo!("assignment 4")
let a = u32::from(a.sext::<32>()) as i32;
let b = u32::from(b.sext::<32>()) as i32;
let c = u32::from(c) as i32;

(a * b + c).into_u()
}

/// Same as `(val >> shamt).clippedToWidthOf(20)`.
/// Performs right-shift (`val >> shamt`) and then clips to `OUTPUT_BITS`.
///
/// It preserves the signedness of `val`.
fn shift_and_clip(val: U<32>, shamt: U<5>) -> U<OUTPUT_BITS> {
let shifted = rounding_shift(val, shamt);
super::arithmetic::clip_with_saturation::<32, 20>(shifted)
super::arithmetic::clip_with_saturation::<32, OUTPUT_BITS>(shifted)
}

/// PE.
///
/// NOTE: It is assumed that all valid signals for the input interfaces have the same value.
#[synthesize]
pub fn pe(
_in_left: Valid<PeRowData>,
Expand Down
2 changes: 1 addition & 1 deletion hazardflow-designs/src/gemmini/isa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl<const ACC_SCALE_BITS: usize> ConfigExRs1<ACC_SCALE_BITS> {
transpose_a: rs1s.clip_const::<CONFIG_EX_RS1_TRANSPOSE_A_WIDTH>(transpose_a_offset)[0],
set_only_strides: rs1s.clip_const::<CONFIG_EX_RS1_SET_ONLY_STRIDES_WIDTH>(set_only_strides_offset)[0],
activation: rs1s.clip_const::<CONFIG_EX_RS1_ACTIVATION_WIDTH>(activation_offset),
dataflow: Dataflow::from(rs1s.clip_const::<CONFIG_EX_RS1_DATAFLOW_WIDTH>(dataflow_offset)),
dataflow: if rs1s[dataflow_offset] { Dataflow::WS } else { Dataflow::OS },
cmd_type: ConfigCmd::from(rs1s.clip_const::<CONFIG_EX_RS1_CMD_TYPE_WIDTH>(0)),
}
}
Expand Down
4 changes: 2 additions & 2 deletions hazardflow-designs/src/std/value/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use crate::prelude::*;

mod array;
mod bounded;
mod integer;
mod option;
mod uint;

pub use array::*;
pub use bounded::*;
pub use integer::*;
pub use option::*;
pub use uint::*;

/// Don't care value.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use hazardflow_macro::magic;
use super::Array;
use crate::prelude::*;

/// An integer with bitwidth `N`.
/// An unsigned integer with bitwidth `N`.
///
/// The lower bits of the integer are represented by the lower index of the array, and vice versa. In other words, the
/// least significant bit of the integer is the 0th element of the array, and the most significant bit is the
Expand Down
2 changes: 0 additions & 2 deletions scripts/make_submission.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,4 @@ rm -rf hw2.zip hw3.zip
# Creates new submissions.
zip hw2.zip -j hazardflow-designs/src/cpu/fetch.rs hazardflow-designs/src/cpu/decode.rs hazardflow-designs/src/cpu/exe.rs hazardflow-designs/src/cpu/branch_predictor/bht.rs hazardflow-designs/src/cpu/branch_predictor/btb.rs
zip hw3.zip -j hazardflow-designs/src/cpu/fetch.rs hazardflow-designs/src/cpu/decode.rs hazardflow-designs/src/cpu/exe.rs hazardflow-designs/src/cpu/branch_predictor/bht.rs hazardflow-designs/src/cpu/branch_predictor/btb.rs hazardflow-designs/src/cpu/riscv_isa.rs
zip hw4.zip -j hazardflow-designs/src/gemmini/execute/systolic_array/pe.rs
zip hw5.zip -j hazardflow-designs/src/gemmini/execute/systolic_array/mesh.rs hazardflow-designs/src/gemmini/execute/systolic_array/transposer.rs
zip hw6.zip -j hazardflow-designs/src/gemmini/execute/systolic_array/mesh_with_delays.rs

0 comments on commit 2f20e3c

Please sign in to comment.