Skip to content

Commit

Permalink
Speed up MappingStats Computation on Coordinating Node (elastic#82830)
Browse files Browse the repository at this point in the history
We can exploit the mapping deduplication logic to save deserializing the
same mapping repeatedly here. This should fix extremly long running
computations when the cache needs to be refreshed for these stats
in the common case of many duplicate mappings in a cluster.
In a follow-up we can probably do the same for `AnalysisStats` as well.
  • Loading branch information
original-brownbear authored Jan 26, 2022
1 parent 3157d1c commit f2cb910
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 158 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/82830.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 82830
summary: Speed up `MappingStats` Computation on Coordinating Node
area: Stats
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
Expand Down Expand Up @@ -276,7 +277,7 @@ public void testFieldTypes() {
}""").get();
response = client().admin().cluster().prepareClusterStats().get();
assertThat(response.getIndicesStats().getMappings().getFieldTypeStats().size(), equalTo(3));
Set<FieldStats> stats = response.getIndicesStats().getMappings().getFieldTypeStats();
List<FieldStats> stats = response.getIndicesStats().getMappings().getFieldTypeStats();
for (FieldStats stat : stats) {
if (stat.getName().equals("integer")) {
assertThat(stat.getCount(), greaterThanOrEqualTo(1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
Expand All @@ -51,30 +52,14 @@ public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
final Map<String, IndexFeatureStats> usedBuiltInTokenFilters = new HashMap<>();
final Map<String, IndexFeatureStats> usedBuiltInAnalyzers = new HashMap<>();

final Map<MappingMetadata, Integer> mappingCounts = new IdentityHashMap<>(metadata.getMappingsByHash().size());
for (IndexMetadata indexMetadata : metadata) {
ensureNotCancelled.run();
if (indexMetadata.isSystem()) {
// Don't include system indices in statistics about analysis,
// we care about the user's indices.
continue;
}
Set<String> indexAnalyzers = new HashSet<>();
MappingMetadata mappingMetadata = indexMetadata.mapping();
if (mappingMetadata != null) {
MappingVisitor.visitMapping(mappingMetadata.getSourceAsMap(), (field, fieldMapping) -> {
for (String key : new String[] { "analyzer", "search_analyzer", "search_quote_analyzer" }) {
Object analyzerO = fieldMapping.get(key);
if (analyzerO != null) {
final String analyzer = analyzerO.toString();
IndexFeatureStats stats = usedBuiltInAnalyzers.computeIfAbsent(analyzer, IndexFeatureStats::new);
stats.count++;
if (indexAnalyzers.add(analyzer)) {
stats.indexCount++;
}
}
}
});
}

Set<String> indexCharFilters = new HashSet<>();
Set<String> indexTokenizers = new HashSet<>();
Expand Down Expand Up @@ -133,7 +118,27 @@ public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
Map<String, Settings> tokenFilterSettings = indexSettings.getGroups("index.analysis.filter");
usedBuiltInTokenFilters.keySet().removeAll(tokenFilterSettings.keySet());
aggregateAnalysisTypes(tokenFilterSettings.values(), usedTokenFilterTypes, indexTokenFilterTypes);
countMapping(mappingCounts, indexMetadata);
}
for (Map.Entry<MappingMetadata, Integer> mappingAndCount : mappingCounts.entrySet()) {
ensureNotCancelled.run();
Set<String> indexAnalyzers = new HashSet<>();
final int count = mappingAndCount.getValue();
MappingVisitor.visitMapping(mappingAndCount.getKey().getSourceAsMap(), (field, fieldMapping) -> {
for (String key : new String[] { "analyzer", "search_analyzer", "search_quote_analyzer" }) {
Object analyzerO = fieldMapping.get(key);
if (analyzerO != null) {
final String analyzer = analyzerO.toString();
IndexFeatureStats stats = usedBuiltInAnalyzers.computeIfAbsent(analyzer, IndexFeatureStats::new);
stats.count += count;
if (indexAnalyzers.add(analyzer)) {
stats.indexCount += count;
}
}
}
});
}

return new AnalysisStats(
usedCharFilterTypes.values(),
usedTokenizerTypes.values(),
Expand All @@ -146,6 +151,14 @@ public static AnalysisStats of(Metadata metadata, Runnable ensureNotCancelled) {
);
}

public static void countMapping(Map<MappingMetadata, Integer> mappingCounts, IndexMetadata indexMetadata) {
final MappingMetadata mappingMetadata = indexMetadata.mapping();
if (mappingMetadata == null) {
return;
}
mappingCounts.compute(mappingMetadata, (k, count) -> count == null ? 1 : count + 1);
}

private static void aggregateAnalysisTypes(
Collection<Settings> settings,
Map<String, IndexFeatureStats> stats,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
return builder;
}

void update(int chars, long lines, int sourceUsages, int docUsages) {
void update(int chars, long lines, int sourceUsages, int docUsages, int count) {
this.maxChars = Math.max(this.maxChars, chars);
this.totalChars += chars;
this.totalChars += (long) chars * count;
this.maxLines = Math.max(this.maxLines, lines);
this.totalLines += lines;
this.totalSourceUsages += sourceUsages;
this.totalLines += lines * count;
this.totalSourceUsages += (long) sourceUsages * count;
this.maxSourceUsages = Math.max(this.maxSourceUsages, sourceUsages);
this.totalDocUsages += docUsages;
this.totalDocUsages += (long) docUsages * count;
this.maxDocUsages = Math.max(this.maxDocUsages, docUsages);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand All @@ -48,85 +48,87 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) {
Map<String, FieldStats> fieldTypes = new HashMap<>();
Set<String> concreteFieldNames = new HashSet<>();
Map<String, RuntimeFieldStats> runtimeFieldTypes = new HashMap<>();
final Map<MappingMetadata, Integer> mappingCounts = new IdentityHashMap<>(metadata.getMappingsByHash().size());
for (IndexMetadata indexMetadata : metadata) {
ensureNotCancelled.run();
if (indexMetadata.isSystem()) {
// Don't include system indices in statistics about mappings,
// we care about the user's indices.
continue;
}
AnalysisStats.countMapping(mappingCounts, indexMetadata);
}
for (Map.Entry<MappingMetadata, Integer> mappingAndCount : mappingCounts.entrySet()) {
ensureNotCancelled.run();
Set<String> indexFieldTypes = new HashSet<>();
Set<String> indexRuntimeFieldTypes = new HashSet<>();
MappingMetadata mappingMetadata = indexMetadata.mapping();
if (mappingMetadata != null) {
final Map<String, Object> map = mappingMetadata.getSourceAsMap();
MappingVisitor.visitMapping(map, (field, fieldMapping) -> {
concreteFieldNames.add(field);
String type = null;
Object typeO = fieldMapping.get("type");
if (typeO != null) {
type = typeO.toString();
} else if (fieldMapping.containsKey("properties")) {
type = "object";
}
if (type != null) {
FieldStats stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
stats.count++;
if (indexFieldTypes.add(type)) {
stats.indexCount++;
}
Object scriptObject = fieldMapping.get("script");
if (scriptObject instanceof Map<?, ?> script) {
Object sourceObject = script.get("source");
stats.scriptCount++;
updateScriptParams(sourceObject, stats.fieldScriptStats);
Object langObject = script.get("lang");
if (langObject != null) {
stats.scriptLangs.add(langObject.toString());
}
}
}
});

MappingVisitor.visitRuntimeMapping(map, (field, fieldMapping) -> {
Object typeObject = fieldMapping.get("type");
if (typeObject == null) {
return;
}
String type = typeObject.toString();
RuntimeFieldStats stats = runtimeFieldTypes.computeIfAbsent(type, RuntimeFieldStats::new);
stats.count++;
if (indexRuntimeFieldTypes.add(type)) {
stats.indexCount++;
}
if (concreteFieldNames.contains(field)) {
stats.shadowedCount++;
final int count = mappingAndCount.getValue();
final Map<String, Object> map = mappingAndCount.getKey().getSourceAsMap();
MappingVisitor.visitMapping(map, (field, fieldMapping) -> {
concreteFieldNames.add(field);
String type = null;
Object typeO = fieldMapping.get("type");
if (typeO != null) {
type = typeO.toString();
} else if (fieldMapping.containsKey("properties")) {
type = "object";
}
if (type != null) {
FieldStats stats = fieldTypes.computeIfAbsent(type, FieldStats::new);
stats.count += count;
if (indexFieldTypes.add(type)) {
stats.indexCount += count;
}
Object scriptObject = fieldMapping.get("script");
if (scriptObject == null) {
stats.scriptLessCount++;
} else if (scriptObject instanceof Map<?, ?> script) {
if (scriptObject instanceof Map<?, ?> script) {
Object sourceObject = script.get("source");
updateScriptParams(sourceObject, stats.fieldScriptStats);
stats.scriptCount += count;
updateScriptParams(sourceObject, stats.fieldScriptStats, count);
Object langObject = script.get("lang");
if (langObject != null) {
stats.scriptLangs.add(langObject.toString());
}
}
});
}
}
});

MappingVisitor.visitRuntimeMapping(map, (field, fieldMapping) -> {
Object typeObject = fieldMapping.get("type");
if (typeObject == null) {
return;
}
String type = typeObject.toString();
RuntimeFieldStats stats = runtimeFieldTypes.computeIfAbsent(type, RuntimeFieldStats::new);
stats.count += count;
if (indexRuntimeFieldTypes.add(type)) {
stats.indexCount += count;
}
if (concreteFieldNames.contains(field)) {
stats.shadowedCount += count;
}
Object scriptObject = fieldMapping.get("script");
if (scriptObject == null) {
stats.scriptLessCount += count;
} else if (scriptObject instanceof Map<?, ?> script) {
Object sourceObject = script.get("source");
updateScriptParams(sourceObject, stats.fieldScriptStats, count);
Object langObject = script.get("lang");
if (langObject != null) {
stats.scriptLangs.add(langObject.toString());
}
}
});
}
return new MappingStats(fieldTypes.values(), runtimeFieldTypes.values());
}

private static void updateScriptParams(Object scriptSourceObject, FieldScriptStats scriptStats) {
private static void updateScriptParams(Object scriptSourceObject, FieldScriptStats scriptStats, int multiplier) {
if (scriptSourceObject != null) {
String scriptSource = scriptSourceObject.toString();
int chars = scriptSource.length();
long lines = scriptSource.lines().count();
int docUsages = countOccurrences(scriptSource, DOC_PATTERN);
int sourceUsages = countOccurrences(scriptSource, SOURCE_PATTERN);
scriptStats.update(chars, lines, sourceUsages, docUsages);
scriptStats.update(chars, lines, sourceUsages, docUsages, multiplier);
}
}

Expand All @@ -139,21 +141,21 @@ private static int countOccurrences(String script, Pattern pattern) {
return occurrences;
}

private final Set<FieldStats> fieldTypeStats;
private final Set<RuntimeFieldStats> runtimeFieldStats;
private final List<FieldStats> fieldTypeStats;
private final List<RuntimeFieldStats> runtimeFieldStats;

MappingStats(Collection<FieldStats> fieldTypeStats, Collection<RuntimeFieldStats> runtimeFieldStats) {
List<FieldStats> stats = new ArrayList<>(fieldTypeStats);
stats.sort(Comparator.comparing(IndexFeatureStats::getName));
this.fieldTypeStats = Collections.unmodifiableSet(new LinkedHashSet<>(stats));
this.fieldTypeStats = Collections.unmodifiableList(stats);
List<RuntimeFieldStats> runtimeStats = new ArrayList<>(runtimeFieldStats);
runtimeStats.sort(Comparator.comparing(RuntimeFieldStats::type));
this.runtimeFieldStats = Collections.unmodifiableSet(new LinkedHashSet<>(runtimeStats));
this.runtimeFieldStats = Collections.unmodifiableList(runtimeStats);
}

MappingStats(StreamInput in) throws IOException {
fieldTypeStats = Collections.unmodifiableSet(new LinkedHashSet<>(in.readList(FieldStats::new)));
runtimeFieldStats = Collections.unmodifiableSet(new LinkedHashSet<>(in.readList(RuntimeFieldStats::new)));
fieldTypeStats = Collections.unmodifiableList(in.readList(FieldStats::new));
runtimeFieldStats = Collections.unmodifiableList(in.readList(RuntimeFieldStats::new));
}

@Override
Expand All @@ -165,14 +167,14 @@ public void writeTo(StreamOutput out) throws IOException {
/**
* Return stats about field types.
*/
public Set<FieldStats> getFieldTypeStats() {
public List<FieldStats> getFieldTypeStats() {
return fieldTypeStats;
}

/**
* Return stats about runtime field types.
*/
public Set<RuntimeFieldStats> getRuntimeFieldStats() {
public List<RuntimeFieldStats> getRuntimeFieldStats() {
return runtimeFieldStats;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
return builder;
}

Map<String, MappingMetadata> getMappingsByHash() {
public Map<String, MappingMetadata> getMappingsByHash() {
return mappingsByHash;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,39 @@ public void testAccountsRegularIndices() {
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 4)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.build();
IndexMetadata.Builder indexMetadata = new IndexMetadata.Builder("foo").settings(settings).putMapping(mapping);
Metadata metadata = new Metadata.Builder().put(indexMetadata).build();
AnalysisStats analysisStats = AnalysisStats.of(metadata, () -> {});
IndexFeatureStats expectedStats = new IndexFeatureStats("german");
expectedStats.count = 1;
expectedStats.indexCount = 1;
assertEquals(Collections.singleton(expectedStats), analysisStats.getUsedBuiltInAnalyzers());
Metadata metadata = new Metadata.Builder().put(new IndexMetadata.Builder("foo").settings(settings).putMapping(mapping)).build();
{
AnalysisStats analysisStats = AnalysisStats.of(metadata, () -> {});
IndexFeatureStats expectedStats = new IndexFeatureStats("german");
expectedStats.count = 1;
expectedStats.indexCount = 1;
assertEquals(Collections.singleton(expectedStats), analysisStats.getUsedBuiltInAnalyzers());
}

Metadata metadata2 = Metadata.builder(metadata)
.put(new IndexMetadata.Builder("bar").settings(settings).putMapping(mapping))
.build();
{
AnalysisStats analysisStats = AnalysisStats.of(metadata2, () -> {});
IndexFeatureStats expectedStats = new IndexFeatureStats("german");
expectedStats.count = 2;
expectedStats.indexCount = 2;
assertEquals(Collections.singleton(expectedStats), analysisStats.getUsedBuiltInAnalyzers());
}

Metadata metadata3 = Metadata.builder(metadata2).put(new IndexMetadata.Builder("baz").settings(settings).putMapping("""
{"properties":{"bar1":{"type":"text","analyzer":"french"},
"bar2":{"type":"text","analyzer":"french"},"bar3":{"type":"text","analyzer":"french"}}}""")).build();
{
AnalysisStats analysisStats = AnalysisStats.of(metadata3, () -> {});
IndexFeatureStats expectedStatsGerman = new IndexFeatureStats("german");
expectedStatsGerman.count = 2;
expectedStatsGerman.indexCount = 2;
IndexFeatureStats expectedStatsFrench = new IndexFeatureStats("french");
expectedStatsFrench.count = 3;
expectedStatsFrench.indexCount = 1;
assertEquals(Set.of(expectedStatsGerman, expectedStatsFrench), analysisStats.getUsedBuiltInAnalyzers());
}
}

public void testIgnoreSystemIndices() {
Expand Down
Loading

0 comments on commit f2cb910

Please sign in to comment.