Skip to content

Commit

Permalink
Draft for garbage collectable label cache
Browse files Browse the repository at this point in the history
  • Loading branch information
glebashnik committed Nov 11, 2024
1 parent 7382e7c commit 0c21980
Show file tree
Hide file tree
Showing 11 changed files with 235 additions and 150 deletions.
5 changes: 3 additions & 2 deletions vespajlib/src/main/java/com/yahoo/tensor/PartialAddress.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package com.yahoo.tensor;

import com.yahoo.tensor.impl.Label;
import com.yahoo.tensor.impl.LabelCache;

/**
* An address to a subset of a tensors' cells, specifying a label for some, but not necessarily all, of the tensors
Expand Down Expand Up @@ -98,14 +99,14 @@ public Builder(int size) {

public Builder add(String dimensionName, long label) {
dimensionNames[index] = dimensionName;
labels[index] = Label.of(label);
labels[index] = LabelCache.getOrCreateLabel(label);
index++;
return this;
}

public Builder add(String dimensionName, String label) {
dimensionNames[index] = dimensionName;
labels[index] = Label.of(label);
labels[index] = LabelCache.getOrCreateLabel(label);
index++;
return this;
}
Expand Down
9 changes: 5 additions & 4 deletions vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package com.yahoo.tensor;

import com.yahoo.tensor.impl.Label;
import com.yahoo.tensor.impl.LabelCache;
import com.yahoo.tensor.impl.TensorAddressAny;

import java.util.Arrays;
Expand Down Expand Up @@ -156,7 +157,7 @@ public static class Builder {

private static Label[] createEmptyLabels(int size) {
var labels = new Label[size];
Arrays.fill(labels, Label.INVALID_INDEX_LABEL);
Arrays.fill(labels, LabelCache.INVALID_INDEX_LABEL);
return labels;
}

Expand Down Expand Up @@ -194,7 +195,7 @@ public Builder add(String dimension, String label) {
int labelIndex = type.indexOfDimensionAsInt(dimension);
if ( labelIndex < 0)
throw new IllegalArgumentException(type + " does not contain dimension '" + dimension + "'");
labels[labelIndex] = Label.of(label);
labels[labelIndex] = LabelCache.getOrCreateLabel(label);
return this;
}

Expand All @@ -208,7 +209,7 @@ public Builder add(String dimension, long label) {
int labelIndex = type.indexOfDimensionAsInt(dimension);
if ( labelIndex < 0)
throw new IllegalArgumentException(type + " does not contain dimension '" + dimension + "'");
labels[labelIndex] = Label.of(label);
labels[labelIndex] = LabelCache.getOrCreateLabel(label);
return this;
}

Expand All @@ -222,7 +223,7 @@ public Builder copy() {

void validate() {
for (int i = 0; i < labels.length; i++)
if (labels[i] == Label.INVALID_INDEX_LABEL)
if (labels[i] == LabelCache.INVALID_INDEX_LABEL)
throw new IllegalArgumentException("Missing a label for dimension '" +
type.dimensions().get(i).name() + "' for " + type);
}
Expand Down
128 changes: 15 additions & 113 deletions vespajlib/src/main/java/com/yahoo/tensor/impl/Label.java
Original file line number Diff line number Diff line change
@@ -1,27 +1,25 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.tensor.impl;

import com.google.common.collect.MapMaker;

import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import com.yahoo.tensor.Tensor;

/**
* A label is a value of a mapped dimension of a tensor.
* This class provides a mapping of labels to numbers which allow for more efficient computation with
* mapped tensor dimensions.
*
* @author baldersheim
* A label for a tensor dimension.
* It works for both mapped dimensions with string labels and indexed dimensions with numeric labels.
* For mapped dimensions, a negative numeric label is assigned by LabelCache.
* For indexed dimension, the index itself is used as a positive numeric label.
* Tensor operations rely on the numeric label for performance.
*
* @author glebashnik
*/
public class Label {
private final long numeric;
private String string = null;

private Label(long numeric) {
Label(long numeric) {
this.numeric = numeric;
}

private Label(long numeric, String string) {
Label(long numeric, String string) {
this.numeric = numeric;
this.string = string;
}
Expand All @@ -31,8 +29,12 @@ public long toNumeric() {
}

public String toString() {
if (numeric == Tensor.invalidIndex) {
return null;
}
// String label for indexed dimension are created at runtime to reduce memory usage.
if (string == null) {
string = String.valueOf(numeric);
return String.valueOf(numeric);
}

return string;
Expand All @@ -50,104 +52,4 @@ public boolean equals(Object o) {
public int hashCode() {
return Long.hashCode(numeric);
}

public static final Label INVALID_INDEX_LABEL = new Label(-1, null);
public static final Label[] SMALL_INDEX_LABELS = creatSmallIndexLabels(1000);

private static Label[] creatSmallIndexLabels(int count) {
var labels = new Label[count];

for (var i = 0; i < count; i++) {
labels[i] = new Label(i, String.valueOf(i));
}

return labels;
}

private static final ConcurrentMap<String, Label> byString = new MapMaker()
.concurrencyLevel(1).weakValues().makeMap();

private static final ConcurrentMap<Long, Label> byNumeric = new MapMaker()
.concurrencyLevel(1).weakValues().makeMap();

private static final AtomicLong idCounter = new AtomicLong(-2);

private static Label add(long numeric, String string) {
var newLabel = new Label(numeric, string);
var existingLabel = byString.putIfAbsent(string, newLabel);

if (existingLabel != null) {
return existingLabel;
}

byNumeric.put(newLabel.numeric, newLabel);
return newLabel;
}

private static boolean validNumericIndex(String s) {
if (s.isEmpty() || ((s.length() > 1) && (s.charAt(0) == '0'))) return false;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if ((c < '0') || (c > '9')) return false;
}
return true;
}

public static Label of(String string) {
if (string == null) {
return INVALID_INDEX_LABEL;
}

// Index labels are not cached.
// They are not cached, but rather pre-computed for small values or created on demand.
if (validNumericIndex(string)) {
try {
var numeric = Long.parseLong(string, 10);

if (numeric < SMALL_INDEX_LABELS.length) {
return SMALL_INDEX_LABELS[(int) numeric];
}

return new Label(numeric, string);
} catch(NumberFormatException e){
// Continue with cached labels
}
}

// Non-index labels are cached.
var existingLabel = byString.get(string);

if (existingLabel != null) {
return existingLabel;
}

var numeric = idCounter.getAndDecrement();
return add(numeric, string);
}

public static Label of(long numeric) {
// Positive numeric labels are indexes.
// They are not cached, but rather pre-computed for small values or created on demand.
if (numeric >= 0) {
if (numeric < SMALL_INDEX_LABELS.length) {
return SMALL_INDEX_LABELS[(int) numeric];
}

return new Label(numeric);
}

if (numeric == INVALID_INDEX_LABEL.numeric) {
return INVALID_INDEX_LABEL;
}

// Negative numeric labels are mapped to string labels.
// They are cached.
var existingLabel = byNumeric.get(numeric);

if (existingLabel != null) {
return existingLabel;
}

throw new IllegalArgumentException("No negative numeric label " + numeric);
}
}
Loading

0 comments on commit 0c21980

Please sign in to comment.