Skip to content

Commit

Permalink
Added string labels to benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
glebashnik committed Nov 11, 2024
1 parent 0c21980 commit 1314577
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ public static Label getOrCreateLabel(long numeric) {
}

// Negative numeric labels are mapped to string labels.
// They are cached.
var existingLabel = getLabel(numeric);

if (existingLabel != null) {
Expand Down Expand Up @@ -139,8 +138,7 @@ private static Label getLabel(String string) {
var weakReference = byString.get(string);
return weakReference != null ? weakReference.get() : null;
}



private static Label createLabel(String string) {
// Need a lock to avoid creating the same label twice if another thread is creating the same label.
var lock = stripedLock.get(string);
Expand Down
176 changes: 113 additions & 63 deletions vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,18 @@ public class TensorFunctionBenchmark {
private final static Random random = new Random();

public double benchmark(int iterations, int vectorSize, List<Tensor> modelVectors, TensorType.Dimension.Type dimensionType,
boolean extraSpace) {
Tensor queryVector = vectors(1, vectorSize, dimensionType).get(0);
boolean extraSpace, boolean stringLabels) {
Tensor queryVector = vectors(1, vectorSize, dimensionType, stringLabels).get(0);
if (extraSpace) {
queryVector = queryVector.multiply(unitVector("j"));
modelVectors = modelVectors.stream().map(t -> t.multiply(unitVector("k"))).toList();
}
dotProduct(queryVector, modelVectors, Math.max(iterations/10, 10)); // warmup
dotProduct(queryVector, modelVectors, Math.max(iterations / 10, 10)); // warmup
System.gc();
long startTime = System.currentTimeMillis();
dotProduct(queryVector, modelVectors, iterations);
long totalTime = System.currentTimeMillis() - startTime;
return (double)totalTime / (double)iterations;
return (double) totalTime / (double) iterations;
}

private Tensor unitVector(String dimension) {
Expand All @@ -45,16 +45,19 @@ private Tensor unitVector(String dimension) {

private double dotProduct(Tensor tensor, List<Tensor> tensors, int iterations) {
double result = 0;
for (int i = 0 ; i < iterations; i++)
for (int i = 0; i < iterations; i++)
result = dotProduct(tensor, tensors);
return result;
}

private double dotProduct(Tensor tensor, List<Tensor> tensors) {
double largest = Double.MIN_VALUE;
TensorFunction<Name> dotProductFunction = new Reduce<>(new Join<>(new ConstantTensor<>(tensor),
new VariableTensor<>("argument"), (a, b) -> a * b),
Reduce.Aggregator.sum).toPrimitive();
TensorFunction<Name> dotProductFunction = new Reduce<>(
new Join<>(new ConstantTensor<>(tensor),
new VariableTensor<>("argument"), (a, b) -> a * b
),
Reduce.Aggregator.sum
).toPrimitive();
MapEvaluationContext<Name> context = new MapEvaluationContext<>();

for (Tensor tensorElement : tensors) { // tensors.size() = 1 for larger tensor
Expand All @@ -67,41 +70,61 @@ private double dotProduct(Tensor tensor, List<Tensor> tensors) {
return largest;
}

private static List<Tensor> vectors(int vectorCount, int vectorSize, TensorType.Dimension.Type dimensionType) {
private static List<Tensor> vectors(int vectorCount, int vectorSize, TensorType.Dimension.Type dimensionType, boolean stringLabels) {
List<Tensor> tensors = new ArrayList<>();
TensorType type = vectorType(new TensorType.Builder(), "x", dimensionType, vectorSize);
for (int i = 0; i < vectorCount; i++) {
Tensor.Builder builder = Tensor.Builder.of(type);
for (int j = 0; j < vectorSize; j++) {
builder.cell().label("x", j).value(random.nextDouble());
if (stringLabels)
builder.cell().label("x", "l" + j).value(random.nextDouble());
else
builder.cell().label("x", j).value(random.nextDouble());
}
tensors.add(builder.build());
}
return tensors;
}

private static List<Tensor> matrix(int vectorCount, int vectorSize, TensorType.Dimension.Type dimensionType) {
private static List<Tensor> matrix(int vectorCount, int vectorSize, TensorType.Dimension.Type dimensionType, boolean stringLabels) {
TensorType.Builder typeBuilder = new TensorType.Builder();
typeBuilder.dimension("i", dimensionType == TensorType.Dimension.Type.indexedBound ? TensorType.Dimension.Type.indexedUnbound : dimensionType);
typeBuilder.dimension(
"i",
dimensionType == TensorType.Dimension.Type.indexedBound ? TensorType.Dimension.Type.indexedUnbound : dimensionType
);
vectorType(typeBuilder, "x", dimensionType, vectorSize);
Tensor.Builder builder = Tensor.Builder.of(typeBuilder.build());
for (int i = 0; i < vectorCount; i++) {
for (int j = 0; j < vectorSize; j++) {
builder.cell()
.label("i", i)
.label("x", j)
.value(random.nextDouble());
if (stringLabels) {
builder.cell()
.label("i", "l" + i)
.label("x", "l" + j)
.value(random.nextDouble());
} else {
builder.cell()
.label("i", i)
.label("x", j)
.value(random.nextDouble());
}
}
}
return List.of(builder.build());
}

private static TensorType vectorType(TensorType.Builder builder, String name, TensorType.Dimension.Type type, int size) {
switch (type) {
case mapped: builder.mapped(name); break;
case indexedUnbound: builder.indexed(name); break;
case indexedBound: builder.indexed(name, size); break;
default: throw new IllegalArgumentException("Dimension type " + type + " not supported");
case mapped:
builder.mapped(name);
break;
case indexedUnbound:
builder.indexed(name);
break;
case indexedBound:
builder.indexed(name, size);
break;
default:
throw new IllegalArgumentException("Dimension type " + type + " not supported");
}
return builder.build();
}
Expand All @@ -110,56 +133,83 @@ public static void main(String[] args) {
double time = 0;
// Important to use size larger than in Label.SMALL_INDEX_LABELS for more comprehensive benchmark
int vectorSize = 2000;

// Todo Add benchmark with string labels
boolean stringLabels = true;

// ---------------- Indexed unbound:

time = new TensorFunctionBenchmark().benchmark(5000, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false);
System.out.printf("Indexed unbound vectors, time per join: %1$8.3f ms\n", time);
time = new TensorFunctionBenchmark().benchmark(5000, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, false);
System.out.printf("Indexed unbound matrix, time per join: %1$8.3f ms\n", time);
time = new TensorFunctionBenchmark().benchmark(
5000, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.indexedUnbound, false),
TensorType.Dimension.Type.indexedUnbound, false, false
);
System.out.printf("Indexed unbound vectors, time per join: %1$8.3f ms\n", time);
time = new TensorFunctionBenchmark().benchmark(
5000, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.indexedUnbound, false),
TensorType.Dimension.Type.indexedUnbound, false, false
);
System.out.printf("Indexed unbound matrix, time per join: %1$8.3f ms\n", time);

// ---------------- Indexed bound:
time = new TensorFunctionBenchmark().benchmark(5000, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.indexedBound), TensorType.Dimension.Type.indexedBound, false);
System.out.printf("Indexed bound vectors, time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(5000,vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.indexedBound), TensorType.Dimension.Type.indexedBound, false);
System.out.printf("Indexed bound matrix, time per join: %1$8.3f ms\n", time);
time = new TensorFunctionBenchmark().benchmark(
5000, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.indexedBound, false),
TensorType.Dimension.Type.indexedBound, false, false
);
System.out.printf("Indexed bound vectors, time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(
5000, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.indexedBound, false),
TensorType.Dimension.Type.indexedBound, false, false
);
System.out.printf("Indexed bound matrix, time per join: %1$8.3f ms\n", time);

// ---------------- Mapped:
time = new TensorFunctionBenchmark().benchmark(500, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.mapped), TensorType.Dimension.Type.mapped, false);
System.out.printf("Mapped vectors, time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(100, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.mapped), TensorType.Dimension.Type.mapped, false);
System.out.printf("Mapped matrix, time per join: %1$8.3f ms\n", time);
time = new TensorFunctionBenchmark().benchmark(
500, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.mapped, false),
TensorType.Dimension.Type.mapped, false, false
);
System.out.printf("Mapped vectors, time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(
100, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.mapped, false),
TensorType.Dimension.Type.mapped, false, false
);
System.out.printf("Mapped matrix, time per join: %1$8.3f ms\n", time);

// ---------------- Mapped with string labels:
time = new TensorFunctionBenchmark().benchmark(
500, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.mapped, true),
TensorType.Dimension.Type.mapped, false, true
);
System.out.printf("Mapped vectors with string labels, time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(
100, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.mapped, true),
TensorType.Dimension.Type.mapped, false, true
);
System.out.printf("Mapped matrix with string labels, time per join: %1$8.3f ms\n", time);

// ---------------- Indexed (unbound) with extra space (sidesteps current special-case optimizations):
time = new TensorFunctionBenchmark().benchmark(50, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, true);
System.out.printf("Indexed vectors, x space time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(50, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.indexedUnbound), TensorType.Dimension.Type.indexedUnbound, true);
System.out.printf("Indexed matrix, x space time per join: %1$8.3f ms\n", time);

// ---------------- Mapped with extra space (sidesteps current special-case optimizations):
time = new TensorFunctionBenchmark().benchmark(100, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.mapped), TensorType.Dimension.Type.mapped, true);
System.out.printf("Mapped vectors, x space time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(100, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.mapped), TensorType.Dimension.Type.mapped, true);
System.out.printf("Mapped matrix, x space time per join: %1$8.3f ms\n", time);

/* 2.4Ghz Intel Core i9, Macbook Pro 2019
Indexed unbound vectors, time per join: 0,066 ms
Indexed unbound matrix, time per join: 0,108 ms
Indexed bound vectors, time per join: 0,068 ms
Indexed bound matrix, time per join: 0,106 ms
Mapped vectors, time per join: 0,845 ms
Mapped matrix, time per join: 1,779 ms
Indexed vectors, x space time per join: 5,778 ms
Indexed matrix, x space time per join: 3,342 ms
Mapped vectors, x space time per join: 8,184 ms
Mapped matrix, x space time per join: 11,547 ms
*/
time = new TensorFunctionBenchmark().benchmark(
50, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.indexedUnbound, false),
TensorType.Dimension.Type.indexedUnbound, true, false
);
System.out.printf("Indexed vectors, x space time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(
50, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.indexedUnbound, false),
TensorType.Dimension.Type.indexedUnbound, true, false
);
System.out.printf("Indexed matrix, x space time per join: %1$8.3f ms\n", time);

// ---------------- Mapped with extra space (sidesteps current special-case optimizations) with string labels:
time = new TensorFunctionBenchmark().benchmark(
100, vectorSize, vectors(100, vectorSize, TensorType.Dimension.Type.mapped, true),
TensorType.Dimension.Type.mapped, true, true
);
System.out.printf("Mapped vectors, x space with string labels time per join: %1$8.3f ms\n", time);

time = new TensorFunctionBenchmark().benchmark(
100, vectorSize, matrix(100, vectorSize, TensorType.Dimension.Type.mapped, true),
TensorType.Dimension.Type.mapped, true, true
);
System.out.printf("Mapped matrix, x space with string labels time per join: %1$8.3f ms\n", time);
}

}

0 comments on commit 1314577

Please sign in to comment.