-
Notifications
You must be signed in to change notification settings - Fork 31
C Interface
The TBLIS C interface packages the tensor (or matrix, vector, or scalar) specification into an object:
typedef struct
{
type_t type;
/* implementation-defined (16 bytes) */ scalar __attribute__((aligned(8)));
} tblis_scalar;
typedef struct
{
type_t type;
int conj;
/* implementation-defined (16 bytes) */ scalar __attribute__((aligned(8)));
void* data;
len_type n;
stride_type inc;
} tblis_vector;
typedef struct
{
type_t type;
int conj;
/* implementation-defined (16 bytes) */ scalar __attribute__((aligned(8)));
void* data;
len_type m, n;
stride_type rs, cs;
} tblis_matrix;
typedef struct
{
type_t type;
int conj;
/* implementation-defined (16 bytes) */ scalar __attribute__((aligned(8)));
void* data;
int ndim;
len_type* len;
stride_type* stride;
} tblis_tensor;
The scalar
field holds a scalar value of any real or complex single- or double-precision number, and denotes the scalar parameter or , or the actual contents of a scalar object. The type
parameter may have one of the following values, specifying the actual type of *(void*)&scalar
and data[i]
: TYPE_SINGLE = TYPE_FLOAT
, TYPE_DOUBLE
, TYPE_SCOMPLEX
, or TYPE_DCOMPLEX
. The conj
field is non-zero if the operand is to be conjugated during the operation or zero otherwise.
The tensor data and structure are specified by the data
, ndim
, len
, and stride
fields. data
is a pointer to the origin element of the tensor (i.e. the element with all index values zero). The ndim
parameter gives the number of dimensions (i.e. the number of indices), which is also the length of the len
and stride
arrays. The len
array gives the length for each index in order. Finally, the stride
array gives the layout of the tensor in memory. In general, stride[k]
is the number of positions (in units of the data type) in memory between entries with consecutive values in index k
. Usually, either the first (or the last) index has a stride of 1, and successive (preceding) indices have strides which are the product of the lengths of all preceding (successive) indices. For example a tensor with len = {10,2,6,32}
would usually have either stride = {1,10,20,120}
or stride = {384,192,32,1}
. The first method is called generalized column-major order and the second is generalized row-major order. However, the strides may also be any arbitrary values such that each element of the tensor has a distinct location.
For matrices and vectors the parameters are essentially the same with the specializations ndim = 1
, n = len[0]
, and inc = stride[0]
for a vector and ndim = 2
, m = len[0]
, n = len[1]
, rs = stride[0]
, and cs = stride[1]
for a matrix.
Scalar, vector, matrix, and tensor objects can be initialized with one of the following helper functions:
void tblis_init_scalar_[sdcz](tblis_scalar* s, type value);
void tblis_init_vector_scaled_[sdcz](tblis_vector* v, type scalar,
len_type n, type* data, stride_type inc);
void tblis_init_vector_[sdcz](tblis_vector* v, len_type n, type* data,stride_type inc);
void tblis_init_matrix_scaled_[sdcz](tblis_matrix* mat, type scalar,
len_type m, len_type n, type* data,
stride_type rs, stride_type cs);
void tblis_init_matrix_[sdcz](tblis_matrix* mat, len_type m, len_type n, type* data,
stride_type rs, stride_type cs);
void tblis_init_tensor_scaled_[sdcz](tblis_tensor* t, type scalar, unsigned ndim,
len_type* len, type* data, stride_type* stride);
void tblis_init_tensor_[sdcz](tblis_tensor* t, unsigned ndim, len_type* len, type* data,
stride_type* stride);
where the letter s
, d
, c
, or z
indicates type = float
, type = double
, type = scomplex
, or type = dcomplex
respectively.
The seven basic [tensor operations](Tensor Operations) (and their matrix and vector counterparts) are accessible via:
Operation | Interface |
---|---|
add | void tblis_vector_add(const tblis_comm* comm, const tblis_config* cfg,
const tblis_vector* A, tblis_vector* B);
void tblis_matrix_add(const tblis_comm* comm, const tblis_config* cfg,
const tblis_matrix* A, tblis_matrix* B);
void tblis_tensor_add(const tblis_comm* comm, const tblis_config* cfg,
const tblis_tensor* A, const label_type* idx_A,
tblis_tensor* B, const label_type* idx_B); |
dot | void tblis_vector_dot(const tblis_comm* comm, const tblis_config* cfg,
const tblis_vector* A, const tblis_vector* B,
tblis_scalar* result);
void tblis_matrix_dot(const tblis_comm* comm, const tblis_config* cfg,
const tblis_matrix* A, const tblis_matrix* B,
tblis_scalar* result);
void tblis_tensor_dot(const tblis_comm* comm, const tblis_config* cfg,
const tblis_tensor* A, const label_type* idx_A,
const tblis_tensor* B, const label_type* idx_B,
tblis_scalar* result); |
reduce | void tblis_vector_reduce(const tblis_comm* comm, const tblis_config* cfg,
reduce_t op, const tblis_vector* A,
tblis_scalar* result, len_type* idx);
void tblis_matrix_reduce(const tblis_comm* comm, const tblis_config* cfg,
reduce_t op, const tblis_matrix* A,
tblis_scalar* result, len_type* idx);
void tblis_tensor_reduce(const tblis_comm* comm, const tblis_config* cfg,
reduce_t op,
const tblis_tensor* A, const label_type* idx_A,
tblis_scalar* result, len_type* idx); |
scale | void tblis_vector_scale(const tblis_comm* comm, const tblis_config* cfg,
tblis_vector* A);
void tblis_matrix_scale(const tblis_comm* comm, const tblis_config* cfg,
tblis_matrix* A);
void tblis_tensor_scale(const tblis_comm* comm, const tblis_config* cfg,
tblis_tensor* A, const label_type* idx_A); |
set | void tblis_vector_set(const tblis_comm* comm, const tblis_config* cfg,
const tblis_scalar* alpha, tblis_vector* A);
void tblis_matrix_set(const tblis_comm* comm, const tblis_config* cfg,
const tblis_scalar* alpha, tblis_matrix* A);
void tblis_tensor_set(const tblis_comm* comm, const tblis_config* cfg,
const tblis_scalar* alpha,
tblis_tensor* A, const label_type* idx_A); |
set | void tblis_vector_shift(const tblis_comm* comm, const tblis_config* cfg,
const tblis_scalar* alpha, tblis_vector* A);
void tblis_matrix_shift(const tblis_comm* comm, const tblis_config* cfg,
const tblis_scalar* alpha, tblis_matrix* A);
void tblis_tensor_shift(const tblis_comm* comm, const tblis_config* cfg,
const tblis_scalar* alpha,
tblis_tensor* A, const label_type* idx_A); |
mult | void tblis_matrix_mult(const tblis_comm* comm, const tblis_config* cfg,
const tblis_matrix* A,
const tblis_matrix* B,
tblis_matrix* C);
void tblis_tensor_mult(const tblis_comm* comm, const tblis_config* cfg,
const tblis_tensor* A, const label_type* idx_A,
const tblis_tensor* B, const label_type* idx_B,
tblis_tensor* C, const label_type* idx_C); |
where op
is one of REDUCE_SUM
, REDUCE_MAX
, REDUCE_MAX_ABS
, REDUCE_MIN
, REDUCE_MIN_ABS
, REDUCE_NORM_1 = REDUCE_SUM_ABS
, REDUCE_NORM_2
, or REDUCE_NORM_INF = REDUCE_MAX_ABS
. For dot
and reduce
the floating-point result is stored in result
while for reduce
with a max or min reduction the position of the extremal value is also returned in idx
.
In all of the tensor interfaces, the idx_[ABC]
arrays give the index strings of each tensor, from which the exact operation to be performed can be inferred via generalized Einstein summation. When label_type
is char
(the default), then these parameters may be string literals. The scalar
and conj
parameters are reset on output objects to one and zero respectively.
Lastly, there are additional parameters comm
and cfg
in each function. The cfg
parameter should almost always be NULL
unless you really know what you are doing. The comm
parameter may also be NULL
to signal that TBLIS should perform the operation in parallel (if a thread model is enabled), but it may also be the global value tblis_single
to request single-threaded execution, or a valid tci_comm_t
(aliased as tblis_comm
) object created with the TCI library which is packaged with TBLIS (see also Threading).
TODO: update to use constructors
len_type len_A[] = {10, 9, 2, 5};
stride_type stride_A[] = { 1,10,90,180};
float data_A[10*9*2*5];
tblis_tensor A;
A.type = TYPE_FLOAT;
//A.conj = false; we don't have to set this for real types
*(float*)&A.scalar = 2.0;
A.data = data_A;
A.ndim = 4;
A.len = len_A;
A.stride = stride_A;
// initialize data_A...
len_type len_B[] = {5,9, 2};
stride_type stride_B[] = {1,5,45};
float data_B[5*9*2];
tblis_tensor B;
B.type = TYPE_FLOAT;
//B.conj = false; we don't have to set this for real types
*(float*)&B.scalar = 0.0;
B.data = data_B;
B.ndim = 3;
B.len = len_B;
B.stride = stride_B;
// this overwrites B with a scaled permuted trace of A
tblis_tensor_add(NULL, NULL, &A, "ijkl", &B, "ljk");
len_type len_A[] = {10, 9, 2, 5};
stride_type stride_A[] = { 1,10,90,180};
double data_A[10*9*2*5];
tblis_tensor A;
A.type = TYPE_DOUBLE;
*(double*)&A.scalar = 1.0;
A.data = data_A;
A.ndim = 4;
A.len = len_A;
A.stride = stride_A;
len_type len_B[] = {7,5, 9, 8};
stride_type stride_B[] = {1,7,35,315};
double data_B[7*5*9*8];
tblis_tensor B;
B.type = TYPE_DOUBLE;
*(double*)&B.scalar = 1.0;
B.data = data_B;
B.ndim = 4;
B.len = len_B;
B.stride = stride_B;
len_type len_C[] = {7,2,10, 8};
stride_type stride_C[] = {1,7,14,140};
double data_B[7*2*10*8];
tblis_tensor C;
C.type = TYPE_DOUBLE;
*(double*)&B.scalar = 1.0;
C.data = data_C;
C.ndim = 4;
C.len = len_C;
C.stride = stride_C;
// initialize data_A and data_B...
// this computes C[abcd] += A[cebf] B[afed]
tblis_tensor_mult(NULL, NULL, &A, "cebf", &B, "afed", &C, "abcd");