Skip to content

Commit

Permalink
Add interactive job metrics (#240)
Browse files Browse the repository at this point in the history
* Add repl metrics

Signed-off-by: Louis Chu <[email protected]>

* Fix style after rebase main

Signed-off-by: Louis Chu <[email protected]>

* Rename vars

Signed-off-by: Louis Chu <[email protected]>

---------

Signed-off-by: Louis Chu <[email protected]>
  • Loading branch information
noCharger authored Feb 16, 2024
1 parent 9c15194 commit 9c34a1d
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,49 @@ public class MetricConstants {
* Similar to OS_READ_METRIC_PREFIX, this constant is used for categorizing and identifying metrics that pertain to write operations.
*/
public static final String OS_WRITE_OP_METRIC_PREFIX = "opensearch.write";

/**
* Metric name for counting the errors encountered with Amazon S3 operations.
*/
public static final String S3_ERR_CNT_METRIC = "s3.error.count";

/**
* Metric name for counting the number of sessions currently running.
*/
public static final String REPL_RUNNING_METRIC = "session.running.count";

/**
* Metric name for counting the number of sessions that have failed.
*/
public static final String REPL_FAILED_METRIC = "session.failed.count";

/**
* Metric name for counting the number of sessions that have successfully completed.
*/
public static final String REPL_SUCCESS_METRIC = "session.success.count";

/**
* Metric name for tracking the processing time of sessions.
*/
public static final String REPL_PROCESSING_TIME_METRIC = "session.processingTime";

/**
* Metric name for counting the number of statements currently running.
*/
public static final String STATEMENT_RUNNING_METRIC = "statement.running.count";

/**
* Metric name for counting the number of statements that have failed.
*/
public static final String STATEMENT_FAILED_METRIC = "statement.failed.count";

/**
* Metric name for counting the number of statements that have successfully completed.
*/
public static final String STATEMENT_SUCCESS_METRIC = "statement.success.count";

/**
* Metric name for tracking the processing time of statements.
*/
public static final String STATEMENT_PROCESSING_TIME_METRIC = "STATEMENT.processingTime";
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.opensearch.flint.core.metrics;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Timer;
import org.apache.spark.SparkEnv;
import org.apache.spark.metrics.source.FlintMetricSource;
import org.apache.spark.metrics.source.Source;
Expand Down Expand Up @@ -38,6 +39,47 @@ public static void incrementCounter(String metricName) {
}
}

/**
* Decrements the value of the specified metric counter by one, if the counter exists and its current count is greater than zero.
*
* @param metricName The name of the metric counter to be decremented.
*/
public static void decrementCounter(String metricName) {
Counter counter = getOrCreateCounter(metricName);
if (counter != null && counter.getCount() > 0) {
counter.dec();
}
}

/**
* Retrieves a {@link Timer.Context} for the specified metric name, creating a new timer if one does not already exist.
* This context can be used to measure the duration of a particular operation or event.
*
* @param metricName The name of the metric timer to retrieve the context for.
* @return A {@link Timer.Context} instance for timing operations, or {@code null} if the timer could not be created or retrieved.
*/
public static Timer.Context getTimerContext(String metricName) {
Timer timer = getOrCreateTimer(metricName);
if (timer != null) {
return timer.time();
}
return null;
}

/**
* Stops the timer associated with the given {@link Timer.Context}, effectively recording the elapsed time since the timer was started
* and returning the duration. If the context is {@code null}, this method does nothing and returns {@code null}.
*
* @param context The {@link Timer.Context} to stop. May be {@code null}, in which case this method has no effect and returns {@code null}.
* @return The elapsed time in nanoseconds since the timer was started, or {@code null} if the context was {@code null}.
*/
public static Long stopTimer(Timer.Context context) {
if (context != null) {
return context.stop();
}
return null;
}

// Retrieves or creates a new counter for the given metric name
private static Counter getOrCreateCounter(String metricName) {
SparkEnv sparkEnv = SparkEnv.get();
Expand All @@ -54,6 +96,22 @@ private static Counter getOrCreateCounter(String metricName) {
return counter;
}

// Retrieves or creates a new Timer for the given metric name
private static Timer getOrCreateTimer(String metricName) {
SparkEnv sparkEnv = SparkEnv.get();
if (sparkEnv == null) {
LOG.warning("Spark environment not available, cannot instrument metric: " + metricName);
return null;
}

FlintMetricSource flintMetricSource = getOrInitFlintMetricSource(sparkEnv);
Timer timer = flintMetricSource.metricRegistry().getTimers().get(metricName);
if (timer == null) {
timer = flintMetricSource.metricRegistry().timer(metricName);
}
return timer;
}

// Gets or initializes the FlintMetricSource
private static FlintMetricSource getOrInitFlintMetricSource(SparkEnv sparkEnv) {
Seq<Source> metricSourceSeq = sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
package org.opensearch.flint.core.metrics;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Timer;
import org.apache.spark.SparkEnv;
import org.apache.spark.metrics.source.FlintMetricSource;
import org.junit.Test;
import org.junit.jupiter.api.Assertions;
import org.mockito.MockedStatic;
import org.mockito.Mockito;

import java.util.concurrent.TimeUnit;

import static org.junit.Assert.assertEquals;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.RETURNS_DEEP_STUBS;
import static org.mockito.Mockito.mock;
Expand All @@ -18,7 +23,34 @@
public class MetricsUtilTest {

@Test
public void incOpenSearchMetric() {
public void testIncrementDecrementCounter() {
try (MockedStatic<SparkEnv> sparkEnvMock = mockStatic(SparkEnv.class)) {
// Mock SparkEnv
SparkEnv sparkEnv = mock(SparkEnv.class, RETURNS_DEEP_STUBS);
sparkEnvMock.when(SparkEnv::get).thenReturn(sparkEnv);

// Mock FlintMetricSource
FlintMetricSource flintMetricSource = Mockito.spy(new FlintMetricSource());
when(sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME()).head())
.thenReturn(flintMetricSource);

// Test the methods
String testMetric = "testPrefix.2xx.count";
MetricsUtil.incrementCounter(testMetric);
MetricsUtil.incrementCounter(testMetric);
MetricsUtil.decrementCounter(testMetric);

// Verify interactions
verify(sparkEnv.metricsSystem(), times(0)).registerSource(any());
verify(flintMetricSource, times(4)).metricRegistry();
Counter counter = flintMetricSource.metricRegistry().getCounters().get(testMetric);
Assertions.assertNotNull(counter);
Assertions.assertEquals(counter.getCount(), 1);
}
}

@Test
public void testStartStopTimer() {
try (MockedStatic<SparkEnv> sparkEnvMock = mockStatic(SparkEnv.class)) {
// Mock SparkEnv
SparkEnv sparkEnv = mock(SparkEnv.class, RETURNS_DEEP_STUBS);
Expand All @@ -29,14 +61,21 @@ public void incOpenSearchMetric() {
when(sparkEnv.metricsSystem().getSourcesByName(FlintMetricSource.FLINT_METRIC_SOURCE_NAME()).head())
.thenReturn(flintMetricSource);

// Test the method
MetricsUtil.incrementCounter("testPrefix.2xx.count");
// Test the methods
String testMetric = "testPrefix.processingTime";
Timer.Context context = MetricsUtil.getTimerContext(testMetric);
TimeUnit.MILLISECONDS.sleep(500);
MetricsUtil.stopTimer(context);

// Verify interactions
verify(sparkEnv.metricsSystem(), times(0)).registerSource(any());
verify(flintMetricSource, times(2)).metricRegistry();
Assertions.assertNotNull(
flintMetricSource.metricRegistry().getCounters().get("testPrefix.2xx.count"));
Timer timer = flintMetricSource.metricRegistry().getTimers().get(testMetric);
Assertions.assertNotNull(timer);
Assertions.assertEquals(timer.getCount(), 1L);
assertEquals(1.9, timer.getMeanRate(), 0.1);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import scala.concurrent.duration.{Duration, MINUTES}
import com.amazonaws.services.s3.model.AmazonS3Exception
import org.opensearch.flint.core.FlintClient
import org.opensearch.flint.core.metadata.FlintMetadata
import org.opensearch.flint.core.metrics.MetricConstants
import org.opensearch.flint.core.metrics.MetricsUtil.incrementCounter
import play.api.libs.json.{JsArray, JsBoolean, JsObject, Json, JsString, JsValue}

import org.apache.spark.{SparkConf, SparkException}
Expand Down Expand Up @@ -401,6 +403,7 @@ trait FlintJobExecutor {
case r: ParseException =>
handleQueryException(r, "Syntax error", spark, dataSource, query, queryId, sessionId)
case r: AmazonS3Exception =>
incrementCounter(MetricConstants.S3_ERR_CNT_METRIC)
handleQueryException(
r,
"Fail to read data from S3. Cause",
Expand Down
Loading

0 comments on commit 9c34a1d

Please sign in to comment.