Skip to content

Commit

Permalink
Add info in GPU Counter failures, misc fixes to validation (#1261)
Browse files Browse the repository at this point in the history
* Add fix for generic validator
* Added more detailed error messages for counter checks
  • Loading branch information
Tanat Boozayaangool authored Jan 9, 2023
1 parent 1a47ef1 commit cf6b6b5
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import com.google.gapid.models.Devices.DeviceValidationResult.ValidatorType;
import com.google.gapid.models.Models;
import com.google.gapid.proto.device.Device;
import com.google.gapid.proto.service.Service;
import com.google.gapid.rpc.SingleInFlight;
import com.google.gapid.util.Logging;
import com.google.gapid.util.OS;
Expand Down Expand Up @@ -219,9 +218,9 @@ private void displayResult(DeviceValidationResult result) {
}

// Separate error text from help text as a workaround to enable proper text wrapping.
errorMessageGroup =
withLayoutData(
createGroup(this, ""), withSpans(new GridData(SWT.FILL, SWT.TOP, true, false), 3, 1));
GridData gridData = new GridData(SWT.FILL, SWT.TOP, true, false);
gridData.heightHint = 60;
errorMessageGroup = withLayoutData(createGroup(this, ""), withSpans(gridData, 3, 1));
Text errText =
withLayoutData(
createTextbox(errorMessageGroup, SWT.READ_ONLY | SWT.WRAP, result.toString()),
Expand Down
2 changes: 1 addition & 1 deletion gapis/trace/android/generic/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func NewGenericValidator(device bind.Device) *GenericValidator {
}

func counterChecker() validate.Checker {
return validate.And(validate.IsNumber, validate.CheckLargerThanZero())
return validate.And(validate.IsNumber, validate.Not(validate.CheckAllEqualTo(0)))
}

func (v *GenericValidator) Validate(ctx context.Context, processor *perfetto.Processor) error {
Expand Down
1 change: 1 addition & 0 deletions gapis/trace/android/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ func (t *androidTracer) Validate(ctx context.Context, enableLocalFiles bool) (*s
return
}
log.I(ctx, "Writing trace size %v bytes to %v", numWritten, file.Name())
res.TracePath = file.Name()
})
if traceLoadingErr != nil {
return nil, traceLoadingErr
Expand Down
89 changes: 67 additions & 22 deletions gapis/trace/android/validate/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"context"
"fmt"
"math"
"sort"
"strings"

"github.com/google/gapid/core/log"
"github.com/google/gapid/gapis/perfetto"
Expand All @@ -26,7 +28,7 @@ import (
)

const (
counterIDQuery = "select id from gpu_counter_track where name = '%v'"
counterIdQuery = "select id from gpu_counter_track where name = '%v'"
counterValuesQuery = "" +
"select value from counter " +
"where track_id = %v order by ts " +
Expand Down Expand Up @@ -159,40 +161,50 @@ func CheckAverageApproximateTo(num, margin float64) Checker {
}

// ValidateGpuCounters queries for the GPU counter from the trace and
// validates the value based on associated the requirement,
// validates the value based on the associated check,
// up to the amount specified in passThreshold.
func ValidateGpuCounters(ctx context.Context, processor *perfetto.Processor, counters []GpuCounter, passThreshold int) error {
passCount := 0
var failedCounters []GpuCounter
var missingCounters []GpuCounter
counters = trimDuplicates(counters)
if passThreshold > len(counters) {
log.E(ctx, "Received passThreshold (%v) higher than amount of counters (%v)", passThreshold, len(counters))
passThreshold = len(counters)
}

for _, counter := range counters {
queryResult, err := processor.Query(fmt.Sprintf(counterIDQuery, counter.Name))
// Converts counter name to track table ID.
counterIdResult, err := processor.Query(fmt.Sprintf(counterIdQuery, counter.Name))
if err != nil {
return log.Errf(ctx, err, "Failed to query with %v", fmt.Sprintf(counterIDQuery, counter.Name))
}
if len(queryResult.GetColumns()) != 1 {
return log.Errf(ctx, err, "Expected one result with query: %v", fmt.Sprintf(counterIDQuery, counter.Name))
return log.Errf(ctx, err, "Failed to query with %v", fmt.Sprintf(counterIdQuery, counter.Name))
}
var counterID int64
for _, column := range queryResult.GetColumns() {
longValues := column.GetLongValues()
if len(longValues) != 1 {
// This tends to happen when the device fails to report GPU counter values.
return log.Errf(ctx, nil, "Expected one result for %v but got %v", counter, len(longValues))
}
counterID = longValues[0]
break

// Ensure that there's one track table ID for the current counter.
counterIdValues := counterIdResult.GetColumns()[0].GetLongValues()
if len(counterIdValues) == 0 {
log.E(ctx, "Trace does not contain expected counter %v", counter)
missingCounters = append(missingCounters, counter)
continue
} else if len(counterIdValues) != 1 {
// We should never run into situation where there are more than 1 results.
return log.Errf(ctx, nil, "Found unexpected %v results for counter %v", len(counterIdValues), counter)
}
queryResult, err = processor.Query(fmt.Sprintf(counterValuesQuery, counterID, sampleCounter))
trackTableId := counterIdValues[0]

valueQueryResult, err := processor.Query(fmt.Sprintf(counterValuesQuery, trackTableId, sampleCounter))
if err != nil {
return log.Errf(ctx, err, "Failed to query with %v for counter %v", fmt.Sprintf(counterValuesQuery, counterID), counter)
return log.Errf(ctx, err, "Failed to query with %v for counter %v", fmt.Sprintf(counterValuesQuery, trackTableId), counter)
}

// Query exactly #sampleCounter samples, fail if not enough samples
if queryResult.GetNumRecords() != sampleCounter {
return log.Errf(ctx, nil, "Number of samples is incorrect for counter: %v %v", counter, queryResult.GetNumRecords())
if valueQueryResult.GetNumRecords() != sampleCounter {
log.E(ctx, "Expected %v number of samples for counter %v but got: %v", sampleCounter, counter, valueQueryResult.GetNumRecords())
failedCounters = append(failedCounters, counter)
continue
}

if counter.Check(queryResult.GetColumns()[0], queryResult.GetColumnDescriptors()[0].GetType()) {
if counter.Check(valueQueryResult.GetColumns()[0], valueQueryResult.GetColumnDescriptors()[0].GetType()) {
passCount++
if passCount >= passThreshold {
return nil
Expand All @@ -201,7 +213,40 @@ func ValidateGpuCounters(ctx context.Context, processor *perfetto.Processor, cou
failedCounters = append(failedCounters, counter)
}
}
return log.Errf(ctx, nil, "Check failed for counters: %v", failedCounters)

var sb strings.Builder
sb.WriteString(fmt.Sprintf("Passed %v checks out of %v, expected to pass %v check(s). ", passCount, len(counters), passThreshold))

if len(missingCounters) > 0 {
sb.WriteString(fmt.Sprintf("Missing %v counter(s): %v\n", len(missingCounters), missingCounters))
}
if len(failedCounters) > 0 {
sb.WriteString(fmt.Sprintf("Failed check for %v counter(s): %v", len(failedCounters), failedCounters))
}

return log.Errf(ctx, nil, sb.String())
}

// trimDuplicates removes duplicates from the GPU counter array.
func trimDuplicates(counters []GpuCounter) []GpuCounter {
duplicateMap := make(map[int]GpuCounter)
res := make([]GpuCounter, 0)
for _, counter := range counters {
duplicateMap[int(counter.Id)] = counter
}

// Sort the ids to retrieve from the map.
ids := make([]int, 0, len(res))
for id := range duplicateMap {
ids = append(ids, id)
}
sort.Ints(ids)

for _, id := range ids {
res = append(res, duplicateMap[id])
}

return res
}

// GetTrackIDs returns all track ids from gpu_track with the given scope.
Expand Down

0 comments on commit cf6b6b5

Please sign in to comment.