Skip to content

Commit

Permalink
#3579: add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ntarafdar committed Nov 21, 2023
1 parent 259327b commit 36a571b
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 4 deletions.
3 changes: 3 additions & 0 deletions docs/source/tt_metal/apis/kernel_apis/compute/add_tiles.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
add_tiles
=========


.. doxygenfunction:: add_tiles_init_nof()
.. doxygenfunction:: add_tiles_init()
.. doxygenfunction:: add_tiles(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
add_tiles_bcast
===============

.. doxygenfunction:: add_bcast_cols_init_short()
.. doxygenfunction:: add_bcast_rows_init_short()
.. doxygenfunction:: add_tiles_bcast(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
6 changes: 6 additions & 0 deletions docs/source/tt_metal/apis/kernel_apis/compute/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,11 @@ Compute APIs
gez_tile
nez_tile

cb_wait_front
cb_pop_front
cb_reserve_back
cb_push_back

binary_op_init_funcs

.. only:: not html
2 changes: 2 additions & 0 deletions docs/source/tt_metal/apis/kernel_apis/compute/mul_tiles.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
mul_tiles
=========

.. doxygenfunction:: mul_tiles_init_f()
.. doxygenfunction:: mul_tiles_init()
.. doxygenfunction:: mul_tiles(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
mul_tiles_bcast
===============

.. doxygenfunction:: mul_bcast_cols_init_short()
.. doxygenfunction:: mul_bcast_rows_init_short()
.. doxygenfunction:: mul_tiles_bcast(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
.. doxygenfunction:: mul_tiles_bcast_scalar_init_short()
.. doxygenfunction:: mul_tiles_bcast_scalar(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
2 changes: 2 additions & 0 deletions docs/source/tt_metal/apis/kernel_apis/compute/sub_tiles.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
sub_tiles
=========

.. doxygenfunction:: sub_tiles_init_nof()
.. doxygenfunction:: sub_tiles_init()
.. doxygenfunction:: sub_tiles( uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
sub_tiles_bcast
===============


.. doxygenfunction:: sub_bcast_cols_init_short()
.. doxygenfunction:: sub_tiles_bcast(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
20 changes: 20 additions & 0 deletions tt_metal/include/compute_kernel_api/bcast.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ ALWI void add_tiles_bcast_cols(uint32_t icb0, uint32_t icb1, uint32_t itile0, ui
UNPACK(( llk_unpack_AB<BroadcastType::COL>(icb0, icb1, itile0, itile1) ));
}



/**
* Associated init function that must be called before calling a bcast op.
*
* Return value: None
*
*
* | Argument | Description | Type | Valid Range | Required |
* |----------------|---------------------------------------------------------------|---------------|------------------------------------------------|----------|
* | icb0 | The identifier of the circular buffer (CB) containing A | uint32_t | 0 to 31 | True |
* | icb1 | The indentifier of the circular buffer (CB) containing B | uint32_t | 0 to 31 | True |
* | ocb | The indentifier of the circular buffer (CB) containing output | uint32_t | 0 to 31 | False |
*/
template<EltwiseBinaryType tBcastOp, BroadcastType tBcastDim>
void init_bcast(uint32_t icb0, uint32_t icb1, uint32_t ocb = 16)
{
Expand Down Expand Up @@ -119,6 +133,10 @@ void init_bcast(uint32_t icb0, uint32_t icb1, uint32_t ocb = 16)
MATH(( llk_math_pack_sync_init<SyncHalf>() ));
}


/*
Internal helper function for all broadcast ops
*/
template<EltwiseBinaryType tBcastOp, BroadcastType tBcastDim>
ALWI void any_tiles_bcast(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t itile1, uint32_t idst)
{
Expand Down Expand Up @@ -190,6 +208,7 @@ ALWI void mul_tiles_bcast(uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_

/**
* Performs a first-call or switch-from-another-op tile hw reconfiguration step needed for add_bcast_rows to be executed correctly.
* Required to be called before add_tiles_bcast if using column as broadcast type
*/
ALWI void add_bcast_rows_init_short()
{
Expand Down Expand Up @@ -232,6 +251,7 @@ ALWI void add_bcast_rows_init_short_post_matmul()

/**
* Performs a first-call or switch-from-another-op tile hw reconfiguration step needed for add_bcast_cols to be executed correctly.
* Required to be called before add_tiles_bcast if using column as broadcast type
*/
ALWI void add_bcast_cols_init_short()
{
Expand Down
83 changes: 79 additions & 4 deletions tt_metal/include/compute_kernel_api/cb_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,100 @@
#include "compute_kernel_api/common_globals.h"
namespace ckernel {

// documented in dataflow_api.h
/**
* A blocking call that waits for the specified number of tiles to be available in the specified circular buffer (CB).
* This call is used by the consumer of the CB to wait for the producer to fill the CB with at least the specfied number
* of tiles. Important note: in case multiple calls of cb_wait_front(n) are issued without a paired cb_pop_front() call,
* n is expected to be incremented by the user to be equal to a cumulative total of tiles. Example: 4 calls of
* cb_wait_front(8) followed by a cb_pop_front(32) would produce incorrect behavior. Instead 4 calls of cb_wait_front()
* waiting on 8, 16, 24, 32 tiles should be issued.
*
* Important note: number of tiles used in all cb_* calls must evenly divide the cb size and must be the same number in
* all cb_wait_front calls in the same kernel. Example 1: cb_wait_front(32), cb_wait_front(40), cb_pop_front(32+8) tiles
* on a CB of size 64 would produce incorrect behavior. Example 2: cb_wait_front(3) on a cb of size 32 would also
* produce incorrect behavior. These limitations are due to performance optimizations in the CB implementation.
*
* Important note: CB total size must be an even multiple of the argument passed to this call.
*
* Return value: None
*
* | Argument | Description | Type | Valid Range | Required |
* |-----------|--------------------------------------|----------|---------------------------------------------------------------------------------------------------|----------|
* | cb_id | The index of the cirular buffer (CB) | uint32_t | 0 to 31 | True |
* | ntiles | The number of tiles to wait for | uint32_t | It must be less or equal than the size of the CB (the total number of tiles that fit into the CB) | True |
* */
ALWI void cb_wait_front(uint32_t cbid, uint32_t ntiles) {
UNPACK(( llk_wait_tiles(cbid, ntiles) ));
}

// documented in dataflow_api.h
/**
* Pops a specified number of tiles from the front of the specified CB. This
* also frees this number of tiles in the circular buffer. This call is used by
* the consumer to free up the space in the CB.
*
* We use the convention that the producer pushes tiles into the “back” of the
* CB queue and the consumer consumes tiles from the “front” of the CB queue.
*
* Note that the act of reading of the tile data from the CB does not free up
* the space in the CB. Waiting on available tiles and popping them is
* separated in order to allow the consumer to: 1) read the tile data from the
* CB via multiple reads of sub-tiles 2) access the tiles (or their sub-tiles)
* that are visible to the consumer by random access of the valid section of
* the CB
*
* Return value: None
*
* | Argument | Description | Type | Valid Range | Required |
* |-----------|--------------------------------------|----------|---------------------------------------------------------------------------------------------------|----------|
* | cb_id | The index of the cirular buffer (CB) | uint32_t | 0 to 31 | True |
* | ntiles | The number of tiles to be popped | uint32_t | It must be less or equal than the size of the CB (the total number of tiles that fit into the CB) | True |
*/
ALWI void cb_pop_front(uint32_t cbid, uint32_t ntiles) {
UNPACK(( llk_pop_tiles(cbid, ntiles) ));
}


// documented in dataflow_api.h
/**
* A blocking call that waits for the specified number of tiles to be free in the specified circular buffer. This call
* is used by the producer to wait for the consumer to consume (ie. free up) the specified number of tiles.
*
* CB total size must be an even multiple of the argument passed to this call.
*
* Return value: None
*
* | Argument | Description | Type | Valid Range | Required |
* |-----------|--------------------------------------|----------|---------------------------------------------------------------------------------------------------|----------|
* | cb_id | The index of the cirular buffer (CB) | uint32_t | 0 to 31 | True |
* | ntiles | The number of free tiles to wait for | uint32_t | It must be less or equal than the size of the CB (the total number of tiles that fit into the CB) | True |
*/
ALWI void cb_reserve_back(uint32_t cbid, uint32_t ntiles)
{
PACK(( llk_wait_for_free_tiles<false,false,false>(cbid,ntiles) ));
}


// documented in dataflow_api.h
/**
* Pushes a given number of tiles in the back of the specified CB’s queue.
* Decreases the available space in the circular buffer by this number of
* tiles. This call is used by the producer to make the tiles visible to the
* consumer of the CB.
*
* We use the convention that the producer pushes tiles into the “back” of the
* CB queue and the consumer consumes tiles from the “front” of the CB queue.
*
* Note that the act of writing the tile data into the CB does not make the
* tiles visible to the consumer. Writing of the tiles and pushing is separated
* to allow the producer to: 1) write the tile data to the CB via multiple
* writes of sub-tiles 2) modify tiles (or sub-tiles) by random access of the
* valid section of the CB
*
* Return value: None
*
* | Argument | Description | Type | Valid Range | Required |
* |-----------|--------------------------------------|----------|---------------------------------------------------------------------------------------------------|----------|
* | cb_id | The index of the cirular buffer (CB) | uint32_t | 0 to 31 | True |
* | ntiles | The number of tiles to be pushed | uint32_t | It must be less or equal than the size of the CB (the total number of tiles that fit into the CB) | True |
*/
ALWI void cb_push_back(uint32_t cbid, uint32_t ntiles)
{
PACK(( llk_push_tiles<false,false>(cbid, ntiles) ));
Expand Down
43 changes: 43 additions & 0 deletions tt_metal/include/compute_kernel_api/eltwise_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@

namespace ckernel {

/**
* Init function for all binary ops
* Followed by the specific init required with an opcode (binrary_op_specific_init)
* | Argument | Description | Type | Valid Range | Required |
* |----------------|----------------------------------------------------------|----------|------------------------------------------------|----------|
* | icb0 | The identifier of the circular buffer (CB) containing A | uint32_t | 0 to 31 | True |
* | icb1 | The identifier of the circular buffer (CB) containing B | uint32_t | 0 to 31 | True |
*/
ALWI void binary_op_init_common(uint32_t icb0, uint32_t icb1)
{
UNPACK(( llk_setup_operands() ));
Expand All @@ -38,7 +46,16 @@ ALWI void binary_op_init_common(uint32_t icb0, uint32_t icb1)
}


/**
* Please refer to documentation for any_init.
* f means high fidelity with resepect to accuracy
* this is set during createprogram
*/
ALWI void mul_tiles_init_f() { MATH(( llk_math_eltwise_binary_init<ELWMUL, NONE, MATH_FIDELITY>() )); }

/**
* Please refer to documentation for any_init.
*/
ALWI void mul_tiles_init() {
MATH(( llk_math_eltwise_binary_init<ELWMUL, NONE, MATH_FIDELITY>() ));
PACK(( llk_init_packer_dest_offset_registers<SyncHalf,DstTileFaceLayout::RowMajor,false>() ));
Expand All @@ -50,7 +67,17 @@ ALWI void mul_tiles_init() {
#endif
}

/**
* Please refer to documentation for any_init.
* nof means low fidelity with resepect to accuracy
* this is set during createprogram
*/
ALWI void add_tiles_init_nof() { MATH(( llk_math_eltwise_binary_init<ELWADD, NONE>() )); }


/**
* Please refer to documentation for any_init.
*/
ALWI void add_tiles_init() {
MATH(( llk_math_eltwise_binary_init<ELWADD, NONE>() ));
PACK(( llk_init_packer_dest_offset_registers<SyncHalf,DstTileFaceLayout::RowMajor,false>() ));
Expand All @@ -62,7 +89,17 @@ ALWI void add_tiles_init() {
#endif
}

/**
* Please refer to documentation for any_init.
* nof means low fidelity with respect to accuracy
* this is set during createprogram
*/
ALWI void sub_tiles_init_nof() { MATH(( llk_math_eltwise_binary_init<ELWSUB, NONE>() )); }


/**
* Please refer to documentation for any_init.
*/
ALWI void sub_tiles_init() {
MATH(( llk_math_eltwise_binary_init<ELWSUB, NONE>() ));
PACK(( llk_init_packer_dest_offset_registers<SyncHalf,DstTileFaceLayout::RowMajor,false>() ));
Expand Down Expand Up @@ -165,6 +202,12 @@ ALWI void sub_tiles( uint32_t icb0, uint32_t icb1, uint32_t itile0, uint32_t iti
}

template<bool full_init = false>
/**
* Init function with a specified op
* | Argument | Description | Type | Valid Range | Required |
* |----------------|----------------------------------------------------------|----------|------------------------------------------------|----------|
* | op_code | op code corresponding to op | uint32_t | 0 to 31 | True |
*/
ALWI void binary_op_specific_init(int op_code) // TODO(AP): better naming
{
#ifdef ELTWISE_OP
Expand Down

0 comments on commit 36a571b

Please sign in to comment.