Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: reduce redundant zero initializations #548

Merged
merged 5 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions tachyon/base/parallelize.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,73 @@ auto ParallelizeMap(size_t size, Callable callback,
std::move(callback));
}

template <typename Container>
void ParallelizeFill(Container& container, typename Container::value_type value,
std::optional<size_t> threshold = std::nullopt) {
Parallelize(
container,
[&value](absl::Span<typename Container::value_type> chunk) {
for (auto& v : chunk) {
v = value;
}
},
threshold);
}

template <typename Container>
void ParallelizeResize(Container& container, size_t size,
std::optional<size_t> threshold = std::nullopt) {
if (container.capacity() > size) {
container.resize(size);
} else {
std::vector<typename Container::value_type> new_container(size);
auto copy_span = absl::MakeSpan(new_container).first(container.size());
Parallelize(
copy_span,
[&container](absl::Span<typename Container::value_type> chunk,
size_t chunk_offset, size_t chunk_size) {
size_t start = chunk_offset * chunk_size;
for (size_t i = 0; i < chunk.size(); ++i) {
chunk[i] = std::move(container[start + i]);
}
},
threshold);
container = std::move(new_container);
}
}

template <typename Container>
void ParallelizeResize(Container& container, size_t size,
typename Container::value_type value,
std::optional<size_t> threshold = std::nullopt) {
if (container.capacity() > size) {
size_t old_size = container.size();
container.resize(size);
auto init_span = absl::MakeSpan(container).last(size - old_size);
Parallelize(
init_span,
[&value](absl::Span<typename Container::value_type> chunk) {
std::fill(chunk.begin(), chunk.end(), value);
},
threshold);
} else {
std::vector<typename Container::value_type> new_container(size);
Parallelize(
new_container,
[&container, &value](absl::Span<typename Container::value_type> chunk,
size_t chunk_offset, size_t chunk_size) {
size_t start = chunk_offset * chunk_size;
for (size_t i = 0; i < chunk.size(); ++i) {
chunk[i] = (start + i) < container.size()
? std::move(container[start + i])
: value;
}
},
threshold);
container = std::move(new_container);
}
}

} // namespace tachyon::base

#endif // TACHYON_BASE_PARALLELIZE_H_
2 changes: 1 addition & 1 deletion tachyon/crypto/challenger/multi_field32_conversions.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ BigF Reduce(absl::Span<const SmallF> values) {
using BigInt = typename BigF::BigIntTy;
CHECK_LT(values.size(), BigInt::kLimbNums * 2);

BigInt ret;
BigInt ret(0);
for (size_t i = 0; i < values.size(); i += 2) {
uint32_t value = values[i].value();
if constexpr (SmallF::Config::kUseMontgomery) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class TwoAdicMultiplicativeCoset {
domain_->group_gen_inv();

size_t sz = coset.domain()->size();
// NOTE(batzor): These vectors are initialized below in the parallel loop so
// it is safe to keep it uninitialized here.
std::vector<F> first_row(sz);
std::vector<F> last_row(sz);
std::vector<F> transition(sz);
Expand Down
4 changes: 4 additions & 0 deletions tachyon/crypto/commitments/kzg/kzg.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class KZG {
#endif

void ResizeBatchCommitments(size_t size) {
// WARN(batzor): When resizing to a larger size, the last values will be
// garbage and should be filled with commitment results.
#if TACHYON_CUDA
if (msm_gpu_) {
gpu_batch_commitments_.resize(size);
Expand All @@ -124,6 +126,8 @@ class KZG {
}

std::vector<Commitment> GetBatchCommitments(BatchCommitmentState& state) {
// NOTE(batzor): Resizing this vector without initialization is safe since
// |BatchNormalize| will overwrite them.
std::vector<Commitment> batch_commitments;
#if TACHYON_CUDA
if (msm_gpu_) {
Expand Down
4 changes: 4 additions & 0 deletions tachyon/crypto/commitments/pedersen/pedersen.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class Pedersen final
#endif

void ResizeBatchCommitments() {
// WARN(batzor): When resizing to a larger size, the last values will be
// garbage and should be filled with commitment results.
size_t size = this->batch_commitment_state_.batch_count;
#if TACHYON_CUDA
if (msm_gpu_) {
Expand All @@ -96,6 +98,8 @@ class Pedersen final
}

std::vector<Commitment> GetBatchCommitments() {
// NOTE(batzor): Resizing this vector without initialization is safe since
// |BatchNormalize| will overwrite them.
std::vector<Commitment> batch_commitments;
#if TACHYON_CUDA
if (msm_gpu_) {
Expand Down
4 changes: 3 additions & 1 deletion tachyon/crypto/hashes/sponge/poseidon/poseidon_sponge_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct PoseidonSpongeBase : public FieldBasedCryptographicSponge<Derived> {
bytes.insert(bytes.end(), elem_bytes.begin(), elem_bytes.end());
}

bytes.resize(num_bytes);
bytes.resize(num_bytes, F::Zero());
return bytes;
}

Expand Down Expand Up @@ -121,6 +121,8 @@ struct PoseidonSpongeBase : public FieldBasedCryptographicSponge<Derived> {
size_t num_elements) const {
const Derived& derived = static_cast<const Derived&>(*this);

// NOTE(batzor): |SqueezeInternal| will fill all the garbage values, so it
// is safe to have it uninitialized.
std::vector<F> ret(num_elements);
switch (state.mode.type) {
case DuplexSpongeMode::Type::kAbsorbing: {
Expand Down
10 changes: 10 additions & 0 deletions tachyon/math/base/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ tachyon_cc_library(
],
)

tachyon_cc_library(
name = "const_init",
hdrs = ["const_init.h"],
)

tachyon_cc_library(
name = "egcd",
hdrs = ["egcd.h"],
Expand All @@ -88,6 +93,11 @@ tachyon_cc_library(
],
)

tachyon_cc_library(
name = "parallelize_threshold",
hdrs = ["parallelize_threshold.h"],
)

tachyon_cc_library(
name = "rational_field",
hdrs = ["rational_field.h"],
Expand Down
16 changes: 8 additions & 8 deletions tachyon/math/base/arithmetics_results.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ namespace tachyon::math {

template <typename T>
struct AddResult {
T result{};
T carry{};
T result;
T carry{0};

constexpr bool operator==(const AddResult& other) const {
return result == other.result && carry == other.carry;
Expand All @@ -26,8 +26,8 @@ struct AddResult {

template <typename T>
struct SubResult {
T result{};
T borrow{};
T result;
T borrow{0};

constexpr bool operator==(const SubResult& other) const {
return result == other.result && borrow == other.borrow;
Expand All @@ -43,8 +43,8 @@ struct SubResult {

template <typename T>
struct MulResult {
T hi{};
T lo{};
T hi{0};
T lo{0};

constexpr bool operator==(const MulResult& other) const {
return hi == other.hi && lo == other.lo;
Expand All @@ -60,8 +60,8 @@ struct MulResult {

template <typename T>
struct DivResult {
T quotient{};
T remainder{};
T quotient;
T remainder = T::Zero();

constexpr bool operator==(const DivResult& other) const {
return quotient == other.quotient && remainder == other.remainder;
Expand Down
24 changes: 11 additions & 13 deletions tachyon/math/base/big_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,7 @@ TACHYON_EXPORT std::string LimbsToHexString(const uint64_t* limbs,
// designed to support a wide range of big integer arithmetic operations.
template <size_t N>
struct BigInt {
uint64_t limbs[N] = {
0,
};
uint64_t limbs[N];
constexpr static size_t kLimbNums = N;
constexpr static size_t kSmallestLimbIdx = SMALLEST_INDEX(N);
constexpr static size_t kBiggestLimbIdx = BIGGEST_INDEX(N);
Expand All @@ -61,16 +59,16 @@ struct BigInt {

constexpr BigInt() = default;
template <typename T, std::enable_if_t<std::is_signed_v<T>>* = nullptr>
constexpr explicit BigInt(T value) {
constexpr explicit BigInt(T value) : limbs{0} {
DCHECK_GE(value, 0);
limbs[kSmallestLimbIdx] = value;
}
template <typename T, std::enable_if_t<std::is_unsigned_v<T>>* = nullptr>
constexpr explicit BigInt(T value) {
constexpr explicit BigInt(T value) : limbs{0} {
limbs[kSmallestLimbIdx] = value;
}
template <typename T, std::enable_if_t<std::is_signed_v<T>>* = nullptr>
constexpr explicit BigInt(std::initializer_list<T> values) {
constexpr explicit BigInt(std::initializer_list<T> values) : limbs{0} {
DCHECK_LE(values.size(), N);
auto it = values.begin();
for (size_t i = 0; i < values.size(); ++i, ++it) {
Expand All @@ -79,7 +77,7 @@ struct BigInt {
}
}
template <typename T, std::enable_if_t<std::is_unsigned_v<T>>* = nullptr>
constexpr explicit BigInt(std::initializer_list<T> values) {
constexpr explicit BigInt(std::initializer_list<T> values) : limbs{0} {
DCHECK_LE(values.size(), N);
auto it = values.begin();
for (size_t i = 0; i < values.size(); ++i, ++it) {
Expand Down Expand Up @@ -117,14 +115,14 @@ struct BigInt {

// Convert a decimal string to a BigInt.
static std::optional<BigInt> FromDecString(std::string_view str) {
BigInt ret;
BigInt ret(0);
if (!internal::StringToLimbs(str, ret.limbs, N)) return std::nullopt;
return ret;
}

// Convert a hexadecimal string to a BigInt.
static std::optional<BigInt> FromHexString(std::string_view str) {
BigInt ret;
BigInt ret(0);
if (!(internal::HexStringToLimbs(str, ret.limbs, N))) return std::nullopt;
return ret;
}
Expand Down Expand Up @@ -255,8 +253,8 @@ struct BigInt {
constexpr BigInt<N2> Extend() const {
static_assert(N2 > N);
BigInt<N2> ret;
for (size_t i = 0; i < N; ++i) {
ret[i] = limbs[i];
for (size_t i = 0; i < N2; ++i) {
ret[i] = i < N ? limbs[i] : 0;
}
return ret;
}
Expand Down Expand Up @@ -707,8 +705,8 @@ struct BigInt {
LOG_IF_NOT_GPU(ERROR) << "Division by zero attempted";
return false;
}
BigInt quotient;
BigInt remainder;
BigInt quotient(0);
BigInt remainder(0);
size_t bits = BitTraits<BigInt>::GetNumBits(*this);
uint64_t carry = 0;
uint64_t& smallest_bit = remainder.limbs[kSmallestLimbIdx];
Expand Down
24 changes: 24 additions & 0 deletions tachyon/math/base/const_init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef TACHYON_MATH_BASE_CONST_INIT_H_
#define TACHYON_MATH_BASE_CONST_INIT_H_

namespace tachyon::math {

enum ZeroInitType {
kZeroInit,
};

enum OneInitType {
kOneInit,
};

enum MinusOneInitType {
kMinusOneInit,
};

enum TwoInvInitType {
kTwoInvInit,
};

} // namespace tachyon::math

#endif // TACHYON_MATH_BASE_CONST_INIT_H_
15 changes: 15 additions & 0 deletions tachyon/math/base/parallelize_threshold.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef TACHYON_MATH_BASE_PARALLELIZE_THRESHOLD_H_
#define TACHYON_MATH_BASE_PARALLELIZE_THRESHOLD_H_

namespace tachyon::math {

struct ParallelizeThreshold {
// The threshold for parallelizing a loop. If the size of the loop is less
// than this threshold, the loop will be executed sequentially.
static constexpr int kFieldInit = 1e6;
static constexpr int kFieldSimpleOp = 1e5;
};

} // namespace tachyon::math

#endif // TACHYON_MATH_BASE_PARALLELIZE_THRESHOLD_H_
2 changes: 1 addition & 1 deletion tachyon/math/base/rational_field.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class RationalField : public Field<RationalField<F>> {
: numerator_(std::move(numerator)),
denominator_(std::move(denominator)) {}

constexpr static RationalField Zero() { return RationalField(); }
constexpr static RationalField Zero() { return RationalField(F::Zero()); }

constexpr static RationalField One() { return RationalField(F::One()); }

Expand Down
2 changes: 1 addition & 1 deletion tachyon/math/elliptic_curves/bn/generator/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace tachyon {

template <size_t N>
std::vector<int8_t> ComputeAteLoopCount(const mpz_class& six_x_plus_2) {
math::BigInt<N> x;
math::BigInt<N> x(0);
math::gmp::CopyLimbs(six_x_plus_2, x.limbs);
return x.ToNAF();
}
Expand Down
2 changes: 1 addition & 1 deletion tachyon/math/finite_fields/fp12.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ class Fp12 final : public QuadraticExtensionField<Fp12<Config>> {
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 6)
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = BaseFieldConfig::kNonResidue.Pow(exp##d)
// |kFrobeniusCoeffs[1]| = q^(exp₁) = q^((P¹ - 1) / 6)
Expand Down
2 changes: 1 addition & 1 deletion tachyon/math/finite_fields/fp3.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class Fp3 final : public CubicExtensionField<Fp3<Config>> {
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 3) = 1
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = Config::kNonResidue.Pow(exp##d)

Expand Down
4 changes: 2 additions & 2 deletions tachyon/math/finite_fields/fp4.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Fp4<Config, std::enable_if_t<Config::kDegreeOverBaseField == 2>> final
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 4) = 1
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = BaseFieldConfig::kNonResidue.Pow(exp##d)

Expand Down Expand Up @@ -176,7 +176,7 @@ class Fp4<Config, std::enable_if_t<Config::kDegreeOverBaseField == 4>> final
// |kFrobeniusCoeffs[0]| = q^((P⁰ - 1) / 4) = 1
Config::kFrobeniusCoeffs[0] = FrobeniusCoefficient::One();
#define SET_FROBENIUS_COEFF(d) \
BigInt<d * N> exp##d; \
BigInt<d * N> exp##d(0); \
gmp::CopyLimbs(exp##d##_gmp, exp##d.limbs); \
Config::kFrobeniusCoeffs[d] = Config::kNonResidue.Pow(exp##d)

Expand Down
Loading
Loading