Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nonnegative_int type #1533

Merged
merged 6 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/kernels/include/kernels/array_shape.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "legion_dim.h"
#include "op-attrs/tensor_shape.dtg.h"
#include "utils/stack_vector.h"
#include "utils/stack_vector/stack_vector.h"
#include "utils/visitable.h"
#include <cstddef>
#include <optional>
Expand Down
2 changes: 1 addition & 1 deletion lib/kernels/src/legion_dim.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value) {
}

legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, int num_dimensions) {
return legion_dim_t(num_dimensions - ff_dim.value - 1);
return legion_dim_t(num_dimensions - ff_dim.value.get_value() - 1);
}

} // namespace FlexFlow
2 changes: 1 addition & 1 deletion lib/kernels/test/src/test_concat_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ TEST_SUITE(FF_TEST_SUITE) {
TEST_CASE("Test concat kernel forward and backward") {
size_t num_inputs = 3;
size_t size_per_input = 100;
ff_dim_t concat_axis = ff_dim_t(0);
ff_dim_t concat_axis = ff_dim_t{nonnegative_int{0}};

ManagedPerDeviceFFHandle managed_handle{};
ManagedFFStream managed_stream{};
Expand Down
3 changes: 2 additions & 1 deletion lib/kernels/test/src/test_transpose_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ TEST_SUITE(FF_TEST_SUITE) {
TEST_CASE("Test Transpose Kernel Operations") {
std::size_t num_dims = 2;

std::vector<ff_dim_t> perm = {ff_dim_t(0), ff_dim_t(1)};
std::vector<ff_dim_t> perm = {ff_dim_t{nonnegative_int{0}},
ff_dim_t{nonnegative_int{1}}};

ManagedPerDeviceFFHandle managed_handle{};
ManagedFFStream managed_stream{};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

#include "kernels/legion_dim.h"
#include "op-attrs/datatype.h"
#include "op-attrs/ff_dim.h"
#include "op-attrs/ff_dim_t.h"
#include "op-attrs/tensor_shape.dtg.h"
#include "utils/stack_vector.h"
#include "utils/stack_vector/stack_vector.h"
#include "utils/visitable.h"
#include <cstddef>

Expand Down
5 changes: 3 additions & 2 deletions lib/local-execution/src/legion_tensor_shape.cc
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#include "local-execution/legion_tensor_shape.h"
#include "kernels/legion_dim.h"
#include "op-attrs/tensor_shape.h"

namespace FlexFlow {

legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, size_t num_dims) {
return legion_dim_t(num_dims - ff_dim.value - 1);
return legion_dim_t(num_dims - ff_dim.value.get_value() - 1);
}

legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, TensorShape const &shape) {
return legion_dim_t(num_dims(shape) - ff_dim.value - 1);
return legion_dim_from_ff_dim(ff_dim, num_dims(shape));
}

} // namespace FlexFlow
14 changes: 7 additions & 7 deletions lib/local-execution/src/ops/linear.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "linear.h"
#include "kernels/linear_kernels.h"
#include "local-execution/task_argument_accessor.h"
#include "op-attrs/ff_dim.h"
#include "op-attrs/ff_dim_t.h"
#include "op-attrs/get_output_shapes.h"
#include "utils/exception.h"
#include "utils/hash-utils.h"
Expand Down Expand Up @@ -66,8 +66,8 @@ static DeviceSpecificDeviceStates
auto input = acc.get_tensor<Permissions::RO>(INPUT);
auto weight = acc.get_tensor<Permissions::RO>(WEIGHT);
auto output = acc.get_tensor<Permissions::WO>(OUTPUT);
int out_dim = output.shape.at(ff_dim_t{0});
int batch_size = output.shape.at(ff_dim_t{1});
int out_dim = output.shape.at(ff_dim_t{nonnegative_int{0}});
int batch_size = output.shape.at(ff_dim_t{nonnegative_int{1}});

float *one_ptr;

Expand Down Expand Up @@ -96,8 +96,8 @@ static std::optional<float> forward_task_impl(TaskArgumentAccessor const &acc) {
ProfilingSettings profiling = acc.get_argument<ProfilingSettings>(PROFILING);
auto attrs = acc.get_argument<LinearAttrs>(ATTRS);

int in_dim = input.shape.at(ff_dim_t{0}) + 1;
int out_dim = output.shape.at(ff_dim_t{0}) + 1;
int in_dim = input.shape.at(ff_dim_t{nonnegative_int{0}}) + 1;
int out_dim = output.shape.at(ff_dim_t{nonnegative_int{0}}) + 1;
int batch_size = output.shape.get_volume() / out_dim;

float const *bias_ptr = NULL;
Expand Down Expand Up @@ -140,8 +140,8 @@ static std::optional<float>
bias_ptr = bias.get_float_ptr();
}

int in_dim = input.shape.at(ff_dim_t{0}) + 1;
int out_dim = output.shape.at(ff_dim_t{0}) + 1;
int in_dim = input.shape.at(ff_dim_t{nonnegative_int{0}}) + 1;
int out_dim = output.shape.at(ff_dim_t{nonnegative_int{0}}) + 1;
int batch_size = output.shape.get_volume() / out_dim;

return profile(backward_kernel,
Expand Down
16 changes: 8 additions & 8 deletions lib/local-execution/src/ops/pool_2d.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ static DeviceSpecificDeviceStates
auto input = acc.get_tensor<Permissions::RO>(INPUT);
auto output = acc.get_tensor<Permissions::WO>(OUTPUT);

int input_w = input.shape.at(ff_dim_t(0)) + 1;
int input_h = input.shape.at(ff_dim_t(1)) + 1;
int input_c = input.shape.at(ff_dim_t(2)) + 1;
int input_n = input.shape.at(ff_dim_t(3)) + 1;
int output_w = output.shape.at(ff_dim_t(0)) + 1;
int output_h = output.shape.at(ff_dim_t(1)) + 1;
int output_c = output.shape.at(ff_dim_t(2)) + 1;
int output_n = output.shape.at(ff_dim_t(3)) + 1;
int input_w = input.shape.at(ff_dim_t{nonnegative_int{0}}) + 1;
int input_h = input.shape.at(ff_dim_t{nonnegative_int{1}}) + 1;
int input_c = input.shape.at(ff_dim_t{nonnegative_int{2}}) + 1;
int input_n = input.shape.at(ff_dim_t{nonnegative_int{3}}) + 1;
int output_w = output.shape.at(ff_dim_t{nonnegative_int{0}}) + 1;
int output_h = output.shape.at(ff_dim_t{nonnegative_int{1}}) + 1;
int output_c = output.shape.at(ff_dim_t{nonnegative_int{2}}) + 1;
int output_n = output.shape.at(ff_dim_t{nonnegative_int{3}}) + 1;

printf("init pool (input): n(%d) c(%d) h(%d) "
"w(%d)\n",
Expand Down
14 changes: 7 additions & 7 deletions lib/local-execution/src/ops/reverse.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ static std::optional<float> forward_task_impl(TaskArgumentAccessor const &acc) {
coord_t in_blk_size = 1, reverse_dim_size = 1, num_out_blks = 1;
for (int i = 0; i < output.shape.get_dim(); i++) {
if (i < axis.value) {
in_blk_size *= output.shape.at(ff_dim_t(i));
in_blk_size *= output.shape.at(ff_dim_t{nonnegative_int{i}});
} else if (i == axis.value) {
reverse_dim_size = output.shape.at(ff_dim_t(i));
reverse_dim_size = output.shape.at(ff_dim_t{nonnegative_int{i}});
} else {
num_out_blks *= output.shape.at(ff_dim_t(i));
num_out_blks *= output.shape.at(ff_dim_t{nonnegative_int{i}});
}
}

Expand All @@ -79,15 +79,15 @@ static std::optional<float>
auto output_grad = acc.get_tensor_grad<Permissions::RO>(OUTPUT);
auto attrs = acc.get_argument<ReverseAttrs>(ATTRS);

int axis = input_grad.shape.get_dim() - attrs.axis.value - 1;
int axis = input_grad.shape.get_dim() - attrs.axis.value.get_value() - 1;
coord_t in_blk_size = 1, reverse_dim_size = 1, num_out_blks = 1;
for (int i = 0; i < input_grad.shape.get_dim(); i++) {
if (i < axis) {
in_blk_size *= input_grad.shape.at(ff_dim_t(i));
in_blk_size *= input_grad.shape.at(ff_dim_t{nonnegative_int{i}});
} else if (i == axis) {
reverse_dim_size = input_grad.shape.at(ff_dim_t(i));
reverse_dim_size = input_grad.shape.at(ff_dim_t{nonnegative_int{i}});
} else {
num_out_blks *= input_grad.shape.at(ff_dim_t(i));
num_out_blks *= input_grad.shape.at(ff_dim_t{nonnegative_int{i}});
}
}

Expand Down
9 changes: 7 additions & 2 deletions lib/local-execution/src/ops/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,13 @@ static DeviceSpecificDeviceStates
int output_c = output.shape.at(legion_dim_t(2));
int output_n = output.shape.at(legion_dim_t(3));

SoftmaxPerDeviceState per_device_state = init_kernel(
handle, attrs.dim.value, output_n, output_c, output_h, output_w);
SoftmaxPerDeviceState per_device_state =
init_kernel(handle,
attrs.dim.value.get_value(),
output_n,
output_c,
output_h,
output_w);

return DeviceSpecificDeviceStates{
DeviceSpecific<SoftmaxPerDeviceState>::create(per_device_state)};
Expand Down
13 changes: 6 additions & 7 deletions lib/local-execution/src/ops/split.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ OpTaskInvocation backward(SplitAttrs const &attrs) {
void calc_block_size(coord_t &num_blocks,
coord_t &block_size,
ArrayShape const &array_shape,
int axis) {
ff_dim_t axis) {
num_blocks = 1;
block_size = 1;
for (int d = 0; d < array_shape.num_elements(); d++) {
if (d <= axis) {
if (d <= axis.value.get_value()) {
block_size *= array_shape.at(legion_dim_t(d));
} else {
num_blocks *= array_shape.at(legion_dim_t(d));
Expand All @@ -66,12 +66,12 @@ static std::optional<float> forward_task_impl(TaskArgumentAccessor const &acc) {
auto attrs = acc.get_argument<SplitAttrs>(ATTRS);

coord_t num_blocks, in_block_size, out_block_size[MAX_NUM_OUTPUTS];
calc_block_size(num_blocks, in_block_size, input.shape, attrs.axis.value);
calc_block_size(num_blocks, in_block_size, input.shape, attrs.axis);

for (int i = 0; i < attrs.splits.size(); i++) {
coord_t out_num_blocks;
calc_block_size(
out_num_blocks, out_block_size[i], output.shape, attrs.axis.value);
out_num_blocks, out_block_size[i], output.shape, attrs.axis);
}
float *output_float_ptr = output.get_float_ptr();
return profile(forward_kernel,
Expand All @@ -94,12 +94,11 @@ static std::optional<float>
auto attrs = acc.get_argument<SplitAttrs>(ATTRS);

coord_t num_blocks, in_block_size, out_block_size[MAX_NUM_OUTPUTS];
calc_block_size(
num_blocks, in_block_size, input_grad.shape, attrs.axis.value);
calc_block_size(num_blocks, in_block_size, input_grad.shape, attrs.axis);
for (int i = 0; i < attrs.splits.size(); i++) {
coord_t out_num_blocks;
calc_block_size(
out_num_blocks, out_block_size[i], output_grad.shape, attrs.axis.value);
out_num_blocks, out_block_size[i], output_grad.shape, attrs.axis);
}
float const *output_grad_ptr = output_grad.get_float_ptr();
return profile(backward_kernel,
Expand Down
13 changes: 11 additions & 2 deletions lib/local-execution/src/ops/transpose.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,17 @@ OpTaskInvocation init(TransposeAttrs const &attrs) {
static DeviceSpecificDeviceStates
init_task_impl(TaskArgumentAccessor const &acc) {
auto const &attrs = acc.get_argument<TransposeAttrs>(ATTRS);
std::vector<ff_dim_t> perm = inner_to_outer_idxs(attrs.perm);
TransposePerDeviceState per_device_state = init_kernel(perm.size(), perm);
size_t size = attrs.perm.size();

std::vector<ff_dim_t> perm = [&] {
std::vector<ff_dim_t> result;
for (size_t i : range(size)) {
result.push_back(ff_dim_t{nonnegative_int{size - i - 1}});
}
return result;
}();

TransposePerDeviceState per_device_state = init_kernel(size, perm);

return DeviceSpecificDeviceStates{
DeviceSpecific<TransposePerDeviceState>::create(per_device_state)};
Expand Down
Loading
Loading