Skip to content

Commit

Permalink
Merge pull request #548 from kroma-network/perf/reduce-redundant-zero…
Browse files Browse the repository at this point in the history
…-initializations

perf: reduce redundant zero initializations
  • Loading branch information
chokobole authored Oct 8, 2024
2 parents ee3ba4d + 90452b4 commit ab62362
Show file tree
Hide file tree
Showing 55 changed files with 350 additions and 112 deletions.
67 changes: 67 additions & 0 deletions tachyon/base/parallelize.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,73 @@ auto ParallelizeMap(size_t size, Callable callback,
std::move(callback));
}

template <typename Container>
void ParallelizeFill(Container& container, typename Container::value_type value,
std::optional<size_t> threshold = std::nullopt) {
Parallelize(
container,
[&value](absl::Span<typename Container::value_type> chunk) {
for (auto& v : chunk) {
v = value;
}
},
threshold);
}

template <typename Container>
void ParallelizeResize(Container& container, size_t size,
std::optional<size_t> threshold = std::nullopt) {
if (container.capacity() > size) {
container.resize(size);
} else {
std::vector<typename Container::value_type> new_container(size);
auto copy_span = absl::MakeSpan(new_container).first(container.size());
Parallelize(
copy_span,
[&container](absl::Span<typename Container::value_type> chunk,
size_t chunk_offset, size_t chunk_size) {
size_t start = chunk_offset * chunk_size;
for (size_t i = 0; i < chunk.size(); ++i) {
chunk[i] = std::move(container[start + i]);
}
},
threshold);
container = std::move(new_container);
}
}

template <typename Container>
void ParallelizeResize(Container& container, size_t size,
typename Container::value_type value,
std::optional<size_t> threshold = std::nullopt) {
if (container.capacity() > size) {
size_t old_size = container.size();
container.resize(size);
auto init_span = absl::MakeSpan(container).last(size - old_size);
Parallelize(
init_span,
[&value](absl::Span<typename Container::value_type> chunk) {
std::fill(chunk.begin(), chunk.end(), value);
},
threshold);
} else {
std::vector<typename Container::value_type> new_container(size);
Parallelize(
new_container,
[&container, &value](absl::Span<typename Container::value_type> chunk,
size_t chunk_offset, size_t chunk_size) {
size_t start = chunk_offset * chunk_size;
for (size_t i = 0; i < chunk.size(); ++i) {
chunk[i] = (start + i) < container.size()
? std::move(container[start + i])
: value;
}
},
threshold);
container = std::move(new_container);
}
}

} // namespace tachyon::base

#endif // TACHYON_BASE_PARALLELIZE_H_
2 changes: 1 addition & 1 deletion tachyon/crypto/challenger/multi_field32_conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ BigF Reduce(absl::Span<const SmallF> values) {
using BigInt = typename BigF::BigIntTy;
CHECK_LT(values.size(), BigInt::kLimbNums * 2);

BigInt ret;
BigInt ret(0);
for (size_t i = 0; i < values.size(); i += 2) {
uint32_t value = values[i].value();
if constexpr (SmallF::Config::kUseMontgomery) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class TwoAdicMultiplicativeCoset {
domain_->group_gen_inv();

size_t sz = coset.domain()->size();
// NOTE(batzor): These vectors are initialized below in the parallel loop so
// it is safe to keep it uninitialized here.
std::vector<F> first_row(sz);
std::vector<F> last_row(sz);
std::vector<F> transition(sz);
Expand Down
4 changes: 4 additions & 0 deletions tachyon/crypto/commitments/kzg/kzg.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class KZG {
#endif

void ResizeBatchCommitments(size_t size) {
// WARN(batzor): When resizing to a larger size, the last values will be
// garbage and should be filled with commitment results.
#if TACHYON_CUDA
if (msm_gpu_) {
gpu_batch_commitments_.resize(size);
Expand All @@ -124,6 +126,8 @@ class KZG {
}

std::vector<Commitment> GetBatchCommitments(BatchCommitmentState& state) {
// NOTE(batzor): Resizing this vector without initialization is safe since
// |BatchNormalize| will overwrite them.
std::vector<Commitment> batch_commitments;
#if TACHYON_CUDA
if (msm_gpu_) {
Expand Down
4 changes: 4 additions & 0 deletions tachyon/crypto/commitments/pedersen/pedersen.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class Pedersen final
#endif

void ResizeBatchCommitments() {
// WARN(batzor): When resizing to a larger size, the last values will be
// garbage and should be filled with commitment results.
size_t size = this->batch_commitment_state_.batch_count;
#if TACHYON_CUDA
if (msm_gpu_) {
Expand All @@ -96,6 +98,8 @@ class Pedersen final
}

std::vector<Commitment> GetBatchCommitments() {
// NOTE(batzor): Resizing this vector without initialization is safe since
// |BatchNormalize| will overwrite them.
std::vector<Commitment> batch_commitments;
#if TACHYON_CUDA
if (msm_gpu_) {
Expand Down
4 changes: 3 additions & 1 deletion tachyon/crypto/hashes/sponge/poseidon/poseidon_sponge_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct PoseidonSpongeBase : public FieldBasedCryptographicSponge<Derived> {
bytes.insert(bytes.end(), elem_bytes.begin(), elem_bytes.end());
}

bytes.resize(num_bytes);
bytes.resize(num_bytes, F::Zero());
return bytes;
}

Expand Down Expand Up @@ -121,6 +121,8 @@ struct PoseidonSpongeBase : public FieldBasedCryptographicSponge<Derived> {
size_t num_elements) const {
const Derived& derived = static_cast<const Derived&>(*this);

// NOTE(batzor): |SqueezeInternal| will fill all the garbage values, so it
// is safe to have it uninitialized.
std::vector<F> ret(num_elements);
switch (state.mode.type) {
case DuplexSpongeMode::Type::kAbsorbing: {
Expand Down
10 changes: 10 additions & 0 deletions tachyon/math/base/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ tachyon_cc_library(
],
)

tachyon_cc_library(
name = "const_init",
hdrs = ["const_init.h"],
)

tachyon_cc_library(
name = "egcd",
hdrs = ["egcd.h"],
Expand All @@ -88,6 +93,11 @@ tachyon_cc_library(
],
)

tachyon_cc_library(
name = "parallelize_threshold",
hdrs = ["parallelize_threshold.h"],
)

tachyon_cc_library(
name = "rational_field",
hdrs = ["rational_field.h"],
Expand Down
16 changes: 8 additions & 8 deletions tachyon/math/base/arithmetics_results.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ namespace tachyon::math {

template <typename T>
struct AddResult {
T result{};
T carry{};
T result;
T carry{0};

constexpr bool operator==(const AddResult& other) const {
return result == other.result && carry == other.carry;
Expand All @@ -26,8 +26,8 @@ struct AddResult {

template <typename T>
struct SubResult {
T result{};
T borrow{};
T result;
T borrow{0};

constexpr bool operator==(const SubResult& other) const {
return result == other.result && borrow == other.borrow;
Expand All @@ -43,8 +43,8 @@ struct SubResult {

template <typename T>
struct MulResult {
T hi{};
T lo{};
T hi{0};
T lo{0};

constexpr bool operator==(const MulResult& other) const {
return hi == other.hi && lo == other.lo;
Expand All @@ -60,8 +60,8 @@ struct MulResult {

template <typename T>
struct DivResult {
T quotient{};
T remainder{};
T quotient;
T remainder = T::Zero();

constexpr bool operator==(const DivResult& other) const {
return quotient == other.quotient && remainder == other.remainder;
Expand Down
24 changes: 11 additions & 13 deletions tachyon/math/base/big_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ TACHYON_EXPORT std::string LimbsToHexString(const uint64_t* limbs,
// designed to support a wide range of big integer arithmetic operations.
template <size_t N>
struct BigInt {
uint64_t limbs[N] = {
0,
};
uint64_t limbs[N];
constexpr static size_t kLimbNums = N;
constexpr static size_t kSmallestLimbIdx = SMALLEST_INDEX(N);
constexpr static size_t kBiggestLimbIdx = BIGGEST_INDEX(N);
Expand All @@ -61,16 +59,16 @@ struct BigInt {

constexpr BigInt() = default;
template <typename T, std::enable_if_t<std::is_signed_v<T>>* = nullptr>
constexpr explicit BigInt(T value) {
constexpr explicit BigInt(T value) : limbs{0} {
DCHECK_GE(value, 0);
limbs[kSmallestLimbIdx] = value;
}
template <typename T, std::enable_if_t<std::is_unsigned_v<T>>* = nullptr>
constexpr explicit BigInt(T value) {
constexpr explicit BigInt(T value) : limbs{0} {
limbs[kSmallestLimbIdx] = value;
}
template <typename T, std::enable_if_t<std::is_signed_v<T>>* = nullptr>
constexpr explicit BigInt(std::initializer_list<T> values) {
constexpr explicit BigInt(std::initializer_list<T> values) : limbs{0} {
DCHECK_LE(values.size(), N);
auto it = values.begin();
for (size_t i = 0; i < values.size(); ++i, ++it) {
Expand All @@ -79,7 +77,7 @@ struct BigInt {
}
}
template <typename T, std::enable_if_t<std::is_unsigned_v<T>>* = nullptr>
constexpr explicit BigInt(std::initializer_list<T> values) {
constexpr explicit BigInt(std::initializer_list<T> values) : limbs{0} {
DCHECK_LE(values.size(), N);
auto it = values.begin();
for (size_t i = 0; i < values.size(); ++i, ++it) {
Expand Down Expand Up @@ -117,14 +115,14 @@ struct BigInt {

// Convert a decimal string to a BigInt.
static std::optional<BigInt> FromDecString(std::string_view str) {
BigInt ret;
BigInt ret(0);
if (!internal::StringToLimbs(str, ret.limbs, N)) return std::nullopt;
return ret;
}

// Convert a hexadecimal string to a BigInt.
static std::optional<BigInt> FromHexString(std::string_view str) {
BigInt ret;
BigInt ret(0);
if (!(internal::HexStringToLimbs(str, ret.limbs, N))) return std::nullopt;
return ret;
}
Expand Down Expand Up @@ -255,8 +253,8 @@ struct BigInt {
constexpr BigInt<N2> Extend() const {
static_assert(N2 > N);
BigInt<N2> ret;
for (size_t i = 0; i < N; ++i) {
ret[i] = limbs[i];
for (size_t i = 0; i < N2; ++i) {
ret[i] = i < N ? limbs[i] : 0;
}
return ret;
}
Expand Down Expand Up @@ -707,8 +705,8 @@ struct BigInt {
LOG_IF_NOT_GPU(ERROR) << "Division by zero attempted";
return false;
}
BigInt quotient;
BigInt remainder;
BigInt quotient(0);
BigInt remainder(0);
size_t bits = BitTraits<BigInt>::GetNumBits(*this);
uint64_t carry = 0;
uint64_t& smallest_bit = remainder.limbs[kSmallestLimbIdx];
Expand Down
24 changes: 24 additions & 0 deletions tachyon/math/base/const_init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef TACHYON_MATH_BASE_CONST_INIT_H_
#define TACHYON_MATH_BASE_CONST_INIT_H_

namespace tachyon::math {

enum ZeroInitType {
kZeroInit,
};

enum OneInitType {
kOneInit,
};

enum MinusOneInitType {
kMinusOneInit,
};

enum TwoInvInitType {
kTwoInvInit,
};

} // namespace tachyon::math

#endif // TACHYON_MATH_BASE_CONST_INIT_H_
15 changes: 15 additions & 0 deletions tachyon/math/base/parallelize_threshold.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef TACHYON_MATH_BASE_PARALLELIZE_THRESHOLD_H_
#define TACHYON_MATH_BASE_PARALLELIZE_THRESHOLD_H_

namespace tachyon::math {

struct ParallelizeThreshold {
// The threshold for parallelizing a loop. If the size of the loop is less
// than this threshold, the loop will be executed sequentially.
static constexpr int kFieldInit = 1e6;
static constexpr int kFieldSimpleOp = 1e5;
};

} // namespace tachyon::math

#endif // TACHYON_MATH_BASE_PARALLELIZE_THRESHOLD_H_
2 changes: 1 addition & 1 deletion tachyon/math/base/rational_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class RationalField : public Field<RationalField<F>> {
: numerator_(std::move(numerator)),
denominator_(std::move(denominator)) {}

constexpr static RationalField Zero() { return RationalField(); }
constexpr static RationalField Zero() { return RationalField(F::Zero()); }

constexpr static RationalField One() { return RationalField(F::One()); }

Expand Down
2 changes: 1 addition & 1 deletion tachyon/math/elliptic_curves/bn/generator/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace tachyon {

template <size_t N>
std::vector<int8_t> ComputeAteLoopCount(const mpz_class& six_x_plus_2) {
math::BigInt<N> x;
math::BigInt<N> x(0);
math::gmp::CopyLimbs(six_x_plus_2, x.limbs);
return x.ToNAF();
}
Expand Down
2 changes: 1 addition & 1 deletion tachyon/math/finite_fields/fp12.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ class Fp12 final : public QuadraticExtensionField<Fp12<Config>> {
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 6)
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = BaseFieldConfig::kNonResidue.Pow(exp##d)
// |kFrobeniusCoeffs[1]| = q^(exp₁) = q^((P¹ - 1) / 6)
Expand Down
2 changes: 1 addition & 1 deletion tachyon/math/finite_fields/fp3.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class Fp3 final : public CubicExtensionField<Fp3<Config>> {
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 3) = 1
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = Config::kNonResidue.Pow(exp##d)

Expand Down
4 changes: 2 additions & 2 deletions tachyon/math/finite_fields/fp4.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Fp4<Config, std::enable_if_t<Config::kDegreeOverBaseField == 2>> final
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 4) = 1
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = BaseFieldConfig::kNonResidue.Pow(exp##d)

Expand Down Expand Up @@ -176,7 +176,7 @@ class Fp4<Config, std::enable_if_t<Config::kDegreeOverBaseField == 4>> final
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 4) = 1
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = Config::kNonResidue.Pow(exp##d)

Expand Down
Loading

0 comments on commit ab62362

Please sign in to comment.