diff --git a/docs/ProfilingDevelopment.md b/docs/ProfilingDevelopment.md
index 309bac05f5..26fa6cd66c 100644
--- a/docs/ProfilingDevelopment.md
+++ b/docs/ProfilingDevelopment.md
@@ -209,9 +209,9 @@ from the VM callbacks and also messing with cpu/wall-time accounting for threads
 
 The `ThreadContext` collector exposes three APIs for GVL profiling:
 
-* `thread_context_collector_on_gvl_waiting(VALUE thread)`
-* `thread_context_collector_on_gvl_running(VALUE thread)`
-* `thread_context_collector_sample_after_gvl_running(VALUE self_instance)`
+* `thread_context_collector_on_gvl_waiting`
+* `thread_context_collector_on_gvl_running`
+* `thread_context_collector_sample_after_gvl_running`
 
 The intuition here is that:
 
diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
index da5e286bd7..7dbecfde7b 100644
--- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
+++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c
@@ -182,6 +182,10 @@ struct cpu_and_wall_time_worker_state {
     unsigned int after_gvl_running;
     // How many times we skipped the after_gvl_running sampling
     unsigned int gvl_dont_sample;
+    // Min/max/total wall-time spent on gvl sampling
+    uint64_t gvl_sampling_time_ns_min;
+    uint64_t gvl_sampling_time_ns_max;
+    uint64_t gvl_sampling_time_ns_total;
   } stats;
 };
 
@@ -1045,8 +1049,12 @@ static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance) {
     ID2SYM(rb_intern("allocations_during_sample")),         /* => */ state->allocation_profiling_enabled ? UINT2NUM(state->stats.allocations_during_sample) : Qnil,
 
     // GVL profiling stats
-    ID2SYM(rb_intern("after_gvl_running")), /* => */ UINT2NUM(state->stats.after_gvl_running),
-    ID2SYM(rb_intern("gvl_dont_sample")),   /* => */ UINT2NUM(state->stats.gvl_dont_sample),
+    ID2SYM(rb_intern("after_gvl_running")),          /* => */ UINT2NUM(state->stats.after_gvl_running),
+    ID2SYM(rb_intern("gvl_dont_sample")),            /* => */ UINT2NUM(state->stats.gvl_dont_sample),
+    ID2SYM(rb_intern("gvl_sampling_time_ns_min")),   /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_min, != UINT64_MAX, ULL2NUM),
+    ID2SYM(rb_intern("gvl_sampling_time_ns_max")),   /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_max, > 0, ULL2NUM),
+    ID2SYM(rb_intern("gvl_sampling_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats.gvl_sampling_time_ns_total, > 0, ULL2NUM),
+    ID2SYM(rb_intern("gvl_sampling_time_ns_avg")),   /* => */ RUBY_AVG_OR_NIL(state->stats.gvl_sampling_time_ns_total, state->stats.after_gvl_running),
   };
   for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]);
   return stats_as_hash;
@@ -1084,8 +1092,10 @@ static void reset_stats_not_thread_safe(struct cpu_and_wall_time_worker_state *s
   //       Given the expected infrequency of resetting (~once per 60s profile) and the auxiliary/non-critical nature of these stats
   //       this momentary loss of accuracy is deemed acceptable to keep overhead to a minimum.
   state->stats = (struct stats) {
-    .cpu_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
-    .allocation_sampling_time_ns_min = UINT64_MAX, // Since we always take the min between existing and latest sample
+    // All these values are initialized to their highest value possible since we always take the min between existing and latest sample
+    .cpu_sampling_time_ns_min        = UINT64_MAX,
+    .allocation_sampling_time_ns_min = UINT64_MAX,
+    .gvl_sampling_time_ns_min        = UINT64_MAX,
   };
 }
 
@@ -1369,6 +1379,16 @@ static VALUE _native_resume_signals(DDTRACE_UNUSED VALUE self) {
 
     long wall_time_ns_before_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
     thread_context_collector_sample_after_gvl_running(state->thread_context_collector_instance, rb_thread_current(), wall_time_ns_before_sample);
+    long wall_time_ns_after_sample = monotonic_wall_time_now_ns(RAISE_ON_FAILURE);
+
+    long delta_ns = wall_time_ns_after_sample - wall_time_ns_before_sample;
+
+    // Guard against wall-time going backwards, see https://github.com/DataDog/dd-trace-rb/pull/2336 for discussion.
+    uint64_t sampling_time_ns = delta_ns < 0 ? 0 : delta_ns;
+
+    state->stats.gvl_sampling_time_ns_min = uint64_min_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_min);
+    state->stats.gvl_sampling_time_ns_max = uint64_max_of(sampling_time_ns, state->stats.gvl_sampling_time_ns_max);
+    state->stats.gvl_sampling_time_ns_total += sampling_time_ns;
 
     state->stats.after_gvl_running++;
 
diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
index 43045da98a..3626af65b7 100644
--- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
+++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
@@ -507,7 +507,15 @@
           expect(waiting_for_gvl_time).to be_within(5).percent_of(total_time), \
             "Expected waiting_for_gvl_time to be close to total_time, debug_failures: #{debug_failures}"
 
-          expect(cpu_and_wall_time_worker.stats.fetch(:after_gvl_running)).to be > 0
+          expect(cpu_and_wall_time_worker.stats).to match(
+            hash_including(
+              after_gvl_running: be > 0,
+              gvl_sampling_time_ns_min: be > 0,
+              gvl_sampling_time_ns_max: be > 0,
+              gvl_sampling_time_ns_total: be > 0,
+              gvl_sampling_time_ns_avg: be > 0,
+            )
+          )
         end
 
         context "when 'Waiting for GVL' periods are below waiting_for_gvl_threshold_ns" do
@@ -530,9 +538,21 @@
 
             cpu_and_wall_time_worker.stop
 
-            expect(cpu_and_wall_time_worker.stats.fetch(:after_gvl_running)).to be 0
+            # Note: There may still be "Waiting for GVL" samples in the output, but these samples will come from the
+            # periodic cpu/wall-sampling, not samples directly triggered by the end of a "Waiting for GVL" period.
+
             expect(cpu_and_wall_time_worker.stats.fetch(:gvl_dont_sample))
               .to be > 100 # Arbitrary, on my machine I see 250k on a run
+
+            expect(cpu_and_wall_time_worker.stats).to match(
+              hash_including(
+                after_gvl_running: 0,
+                gvl_sampling_time_ns_min: nil,
+                gvl_sampling_time_ns_max: nil,
+                gvl_sampling_time_ns_total: nil,
+                gvl_sampling_time_ns_avg: nil,
+              )
+            )
           end
         end
       end
@@ -1164,6 +1184,10 @@
           allocations_during_sample: nil,
           after_gvl_running: 0,
           gvl_dont_sample: 0,
+          gvl_sampling_time_ns_min: nil,
+          gvl_sampling_time_ns_max: nil,
+          gvl_sampling_time_ns_total: nil,
+          gvl_sampling_time_ns_avg: nil,
         }
       )
     end