-
Notifications
You must be signed in to change notification settings - Fork 33
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes for adding default dimensions in CWSink. #209
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,6 @@ | |
import com.amazonaws.services.cloudwatch.model.PutMetricDataResult; | ||
import com.amazonaws.services.cloudwatch.model.StandardUnit; | ||
import com.amazonaws.services.cloudwatch.model.StatisticSet; | ||
import com.amazonaws.util.StringUtils; | ||
import com.codahale.metrics.Clock; | ||
import com.codahale.metrics.Counter; | ||
import com.codahale.metrics.Counting; | ||
|
@@ -36,6 +35,7 @@ | |
import java.util.ArrayList; | ||
import java.util.Collection; | ||
import java.util.Date; | ||
import java.util.HashSet; | ||
import java.util.LinkedHashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -47,6 +47,8 @@ | |
import java.util.stream.Collectors; | ||
import java.util.stream.LongStream; | ||
import java.util.stream.Stream; | ||
import org.apache.commons.lang.StringUtils; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
|
@@ -83,6 +85,16 @@ public class DimensionedCloudWatchReporter extends ScheduledReporter { | |
// Visible for testing | ||
public static final String DIMENSION_SNAPSHOT_STD_DEV = "snapshot-std-dev"; | ||
|
||
public static final String DIMENSION_JOB_ID = "jobId"; | ||
|
||
public static final String DIMENSION_APPLICATION_ID = "applicationId"; | ||
|
||
public static final String DIMENSION_DOMAIN_ID = "domainId"; | ||
|
||
public static final String DIMENSION_INSTANCE_ROLE = "instanceRole"; | ||
|
||
public static final String UNKNOWN = "unknown"; | ||
|
||
/** | ||
* Amazon CloudWatch rejects values that are either too small or too large. | ||
* Values must be in the range of 8.515920e-109 to 1.174271e+108 (Base 10) or 2e-360 to 2e360 (Base 2). | ||
|
@@ -339,20 +351,58 @@ private void stageMetricDatum(final boolean metricConfigured, | |
if (metricConfigured && (builder.withZeroValuesSubmission || metricValue > 0)) { | ||
final DimensionedName dimensionedName = DimensionedName.decode(metricName); | ||
final Set<Dimension> dimensions = new LinkedHashSet<>(builder.globalDimensions); | ||
dimensions.addAll(dimensionedName.getDimensions()); | ||
MetricInfo metricInfo = getMetricInfo(dimensionedName); | ||
dimensions.addAll(metricInfo.getDimensions()); | ||
if (shouldAppendDropwizardTypeDimension) { | ||
dimensions.add(new Dimension().withName(DIMENSION_NAME_TYPE).withValue(dimensionValue)); | ||
} | ||
|
||
metricData.add(new MetricDatum() | ||
.withTimestamp(new Date(builder.clock.getTime())) | ||
.withValue(cleanMetricValue(metricValue)) | ||
.withMetricName(dimensionedName.getName()) | ||
.withMetricName(metricInfo.getMetricName()) | ||
.withDimensions(dimensions) | ||
.withUnit(standardUnit)); | ||
} | ||
} | ||
|
||
private MetricInfo getMetricInfo(DimensionedName dimensionedName) { | ||
final String jobId = System.getenv().getOrDefault("SERVERLESS_EMR_JOB_ID", UNKNOWN); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is the cons to add more dimensions in CW? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vamsi-amazon @penghuo can we assume "SERVERLESS_EMR_JOB_ID" is isolated at job level? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @penghuo Didn't get your question? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can add more dimensions in the DimensionedName object while publishing the metric. |
||
final String applicationId = System.getenv().getOrDefault("SERVERLESS_EMR_VIRTUAL_CLUSTER_ID", UNKNOWN); | ||
final String domainId = System.getenv().getOrDefault("FLINT_CLUSTER_NAME", UNKNOWN); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also shall we include this in skip index mapping? https://github.com/opensearch-project/sql/blob/efb159a8cb0560fbde996cdf7c72ce82deb15681/spark/src/test/resources/flint-index-mappings/flint_mys3_default_http_logs_skipping_index.json#L20 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @noCharger Didn't get you. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @vamsi-amazon do we persist cluster name in index meta field? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure. I guess no. What are you trying to get to? |
||
final Dimension jobDimension = new Dimension().withName(DIMENSION_JOB_ID).withValue(jobId); | ||
final Dimension applicationDimension = new Dimension().withName(DIMENSION_APPLICATION_ID).withValue(applicationId); | ||
final Dimension domainIdDimension = new Dimension().withName(DIMENSION_DOMAIN_ID).withValue(domainId); | ||
Dimension instanceRoleDimension = new Dimension().withName(DIMENSION_INSTANCE_ROLE).withValue(UNKNOWN); | ||
String metricName = dimensionedName.getName(); | ||
String[] parts = metricName.split("\\."); | ||
if (doesNameConsistsOfMetricNameSpace(parts)) { | ||
metricName = Stream.of(parts).skip(2).collect(Collectors.joining(".")); | ||
//For executors only id is added to the metric name, that's why the numeric check. | ||
//If it is not numeric then the instance is driver. | ||
if (StringUtils.isNumeric(parts[1])) { | ||
instanceRoleDimension = new Dimension().withName(DIMENSION_INSTANCE_ROLE).withValue("executor" + parts[1]); | ||
} | ||
else { | ||
instanceRoleDimension = new Dimension().withName(DIMENSION_INSTANCE_ROLE).withValue(parts[1]); | ||
} | ||
} | ||
Set<Dimension> dimensions = new HashSet<>(); | ||
dimensions.add(jobDimension); | ||
dimensions.add(applicationDimension); | ||
dimensions.add(instanceRoleDimension); | ||
dimensions.add(domainIdDimension); | ||
dimensions.addAll(dimensionedName.getDimensions()); | ||
return new MetricInfo(metricName, dimensions); | ||
} | ||
|
||
// This tries to replicate the logic here: https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala#L137 | ||
// Since we don't have access to Spark Configuration here: we are relying on the presence of executorId as part of the metricName. | ||
private boolean doesNameConsistsOfMetricNameSpace(String[] metricNameParts) { | ||
return metricNameParts.length >= 3 | ||
&& (metricNameParts[1].equals("driver") || StringUtils.isNumeric(metricNameParts[1])); | ||
} | ||
|
||
private void stageMetricDatumWithConvertedSnapshot(final boolean metricConfigured, | ||
final String metricName, | ||
final Snapshot snapshot, | ||
|
@@ -478,6 +528,25 @@ public String getDesc() { | |
} | ||
} | ||
|
||
public static class MetricInfo { | ||
private String metricName; | ||
private Set<Dimension> dimensions; | ||
|
||
public MetricInfo(String metricName, Set<Dimension> dimensions) { | ||
this.metricName = metricName; | ||
this.dimensions = dimensions; | ||
} | ||
|
||
public String getMetricName() { | ||
return metricName; | ||
} | ||
|
||
public Set<Dimension> getDimensions() { | ||
return dimensions; | ||
} | ||
} | ||
|
||
|
||
public static class Builder { | ||
|
||
private final String namespace; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
although it's good to provide insights, DIMENSION_JOB_ID / DIMENSION_APPLICATION_ID / DIMENSION_DOMAIN_ID / DIMENSION_INSTANCE_ROLE are actually high cardinality dimensions which considered as seperate metrics. It will bring extra cost setting them as default dimensions. Ref: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_billing.html