diff --git a/examples/nvperf.jl b/examples/nvperf.jl new file mode 100644 index 0000000000..8b4e8ce78b --- /dev/null +++ b/examples/nvperf.jl @@ -0,0 +1,13 @@ +using CUDA + +NVPERF.initialize() +CUPTI.initialize_profiler() + +avail = CUPTI.counter_availability() +chip = first(NVPERF.supported_chips()) + +evaluator = NVPERF.CUDAMetricsEvaluator(chip, avail) + +NVPERF.list_metrics(evaluator) + +@show NVPERF.MetricEvalRequest(me, "dram__bytes.sum.per_second") \ No newline at end of file diff --git a/lib/cupti/wrappers.jl b/lib/cupti/wrappers.jl index b2456f83ba..009f54fe82 100644 --- a/lib/cupti/wrappers.jl +++ b/lib/cupti/wrappers.jl @@ -4,8 +4,34 @@ function version() VersionNumber(version_ref[]) end -# function counter_availabiltiy(ctx = context()) -# params = Ref(CUpti_Profiler_GetCounterAvailability_Params( -# CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, -# C_NULL, ctx, 0 -# )) + # params = Ref(CUpti_Profiler_CounterDataImage_CalculateSize_Params( + # CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE, + # C_NULL, 0, C_NULL, 0)) + # cuptiProfilerCounterDataImageCalculateSize(params) + + +function initialize_profiler() + params = Ref(CUpti_Profiler_Initialize_Params( + CUpti_Profiler_Initialize_Params_STRUCT_SIZE, + C_NULL)) + cuptiProfilerInitialize(params) +end + +function counter_availability(ctx = context()) + # 1. Query size + params = Ref(CUpti_Profiler_GetCounterAvailability_Params( + CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, + C_NULL, ctx.handle, 0, C_NULL)) + cuptiProfilerGetCounterAvailability(params) + + sz = params[].counterAvailabilityImageSize + buffer = Vector{UInt8}(undef, sz) + + GC.@preserve buffer begin + params = Ref(CUpti_Profiler_GetCounterAvailability_Params( + CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE, + C_NULL, ctx.handle, sz, pointer(buffer))) + cuptiProfilerGetCounterAvailability(params) + end + return buffer +end diff --git a/lib/nvperf/wrappers.jl b/lib/nvperf/wrappers.jl index 46c32a185f..d2ef392fa6 100644 --- a/lib/nvperf/wrappers.jl +++ b/lib/nvperf/wrappers.jl @@ -16,17 +16,132 @@ function supported_chips() return names end -function scratch_buffer(chipName) - GC.@preserve chipName begin +function scratch_buffer(chipName, counter_availability) + GC.@preserve chipName counter_availability begin params = Ref(NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params( NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE, - C_NULL, pointer(chipName), C_NULL, 0 + C_NULL, pointer(chipName), pointer(counter_availability), 0 )) NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize(params) - params[].scrachBufferSize + sz = params[].scratchBufferSize end + return Vector{UInt8}(undef, sz) end -# function list_metrics(chipName) +abstract type MetricsEvaluator end -# end \ No newline at end of file +mutable struct CUDAMetricsEvaluator <: MetricsEvaluator + handle::Ptr{NVPW_MetricsEvaluator} + scratch::Vector{UInt8} + availability::Vector{UInt8} + chip::String + + function CUDAMetricsEvaluator(chip, availability) + scratch = scratch_buffer(chip, availability) + + GC.@preserve chip availability scratch begin + params = Ref(NVPW_CUDA_MetricsEvaluator_Initialize_Params( + NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE, + C_NULL, pointer(scratch), length(scratch), pointer(chip), + pointer(availability), C_NULL, 0, C_NULL)) + + NVPW_CUDA_MetricsEvaluator_Initialize(params) + this = new(params[].pMetricsEvaluator, scratch, availability, chip) + end + finalizer(destroy, this) + return this + end +end +Base.unsafe_convert(::Type{Ptr{NVPW_MetricsEvaluator}}, me::CUDAMetricsEvaluator) = me.handle + + +function destroy(me::MetricsEvaluator) + GC.@preserve me begin + params = Ref(NVPW_MetricsEvaluator_Destroy_Params( + NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me) + )) + NVPW_MetricsEvaluator_Destroy(params) + end + return nothing +end + +struct MetricsIterator + me::MetricsEvaluator + type::NVPW_MetricType + names::Ptr{Cchar} + indices::Ptr{Csize_t} + numMetrics::Csize_t + + function MetricsIterator(me, type) + GC.@preserve me begin + params = Ref(NVPW_MetricsEvaluator_GetMetricNames_Params( + NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), type, C_NULL, C_NULL, 0)) + NVPW_MetricsEvaluator_GetMetricNames(params) + + names = Ptr{Cchar}(params[].pMetricNames) + indices = params[].pMetricNameBeginIndices + + return new(me, type, names, indices, params[].numMetrics) + end + end +end + +Base.length(metrics::MetricsIterator) = metrics.numMetrics +Base.eltype(::MetricsIterator) = String + +function Base.iterate(metrics::MetricsIterator, state=1) + if state <= metrics.numMetrics + name = unsafe_string(metrics.names + unsafe_load(metrics.indices, state)) + return (name, state+1) + else + return nothing + end +end + +function list_metrics(me::MetricsEvaluator) + for i in 0:(NVPW_METRIC_TYPE__COUNT-1) + type = NVPW_MetricType(i) + + for metric in MetricsIterator(me, type) + @show metric + end + end +end + +function submetrics(me::MetricsEvaluator, type) + GC.@preserve me begin + params = Ref(NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params( + NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), type, C_NULL, 0)) + NVPW_MetricsEvaluator_GetSupportedSubmetrics(params) + unsafe_wrap(Array, params[].pSupportedSubmetrics, params[].numSupportedSubmetrics) + end +end + +# TODO rollup to string +# TODO submetric to string + +# function submetric(m) +# if m == NVPW_SUBMETRIC_PEAK_SUSTAINED +# return ".peak_sustained" +# elseif + +# MetricTypeAndIndex + +struct MetricEvalRequest + data::NVPW_MetricEvalRequest + + function MetricEvalRequest(me::MetricsEvaluator, name) + eval_request = Ref{NVPW_MetricEvalRequest}() + GC.@preserve me name eval_request begin + params = Ref(NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params( + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE, + C_NULL, Base.unsafe_convert(Ptr{NVPW_MetricsEvaluator}, me), pointer(name), + Base.unsafe_convert(Ptr{NVPW_MetricEvalRequest}, eval_request), NVPW_MetricEvalRequest_STRUCT_SIZE)) + NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest(params) + return new(eval_request[]) + end + end +end