Skip to content

Measuring Performance

Siddhartha Kasivajhula edited this page Aug 1, 2023 · 3 revisions

See the Performance Loop for daily workflows related to measuring and iterating on performance optimization.

Regression Report

You can also generate a performance regression report (currently only on the compiler branch in #74) on any aspect of the language as follows:

  1. Check out the reference version of the code.
  2. cd qi-sdk/profile/<benchmark_folder> (where benchmark_folder could be local, nonlocal or loading, depending on whether you're interested in local benchmarks (i.e. individual forms), nonlocal benchmarks (i.e. tasks exercising many parts of the language) or module load times).
  3. ./report.rkt -f json > /path/to/before.json
  4. Check out the version of the code that you'd like to measure relative to that reference.
  5. ./report.rkt -r /path/to/before.json

TODO: Add Makefile targets for this.

Statistical Benchmarking

Here's a recipe from Ben that generates data and charts like this:

#lang racket

(require plot file/convertible math/statistics pict)

(define (write-pict-to-svg p f)
  (with-output-to-file f
    (thunk
      (write-bytes (convert p 'svg-bytes)))))

(define (not-o-any . args)
  (not (for/or ([arg (in-list args)])
         arg)))

(define (for/and-over-not . args)
  (for/and ([arg (in-list args)])
    (not arg)))

(define (race-threads1 . args)
  (define me (current-thread))
  (define t1 (thread (thunk (thread-send me (apply not-o-any args)))))
  (define t2 (thread (thunk (thread-send me (apply for/and-over-not args)))))
  (begin0 (thread-receive)
    (kill-thread t1)
    (kill-thread t2)))

(define (race-threads2 . args)
  (define me (current-thread))
  (define t2 (thread (thunk (thread-send me (apply for/and-over-not args)))))
  (define t1 (thread (thunk (thread-send me (apply not-o-any args)))))
  (begin0 (thread-receive)
    (kill-thread t1)
    (kill-thread t2)))

(define (generate n freq)
  (build-list n (λ (_i)
                  (define n (random))
                  (and (>= n freq) n))))

(define ns (list #e1e5 #e1e6 #e1e7))
(define freqs (list 0.0 0.3 0.7 1.0))
(define fs (list not-o-any for/and-over-not race-threads1 race-threads2))

(struct experiment [f n freq stats] #:transparent)
(struct stats [min mean max stddev] #:transparent)

(define experiments
  (map (match-lambda [(list n freq f) (experiment f n freq #f)])
       (cartesian-product ns freqs fs)))

(define n-trials 5)

(define/match (run-experiment _e)
  [((experiment f n freq #f))
   (define times
     (for/list ([_i n-trials])
       (define-values (_res _cpu real _garbage) (time-apply f (generate n freq)))
       real))
   (experiment f n freq (make-stats times))])

(define (make-stats times)
  (define μ (mean times))
  (define σ (stddev/mean μ times))
  (stats (apply min times) μ (apply max times) σ))

(define (experiments->renderers es skip x-min)
  (define (x i) (+ 0.5 x-min (* i skip)))
  (define label (object-name (experiment-f (first es))))
  (define es-by-n (sort es < #:key experiment-n))
  (define histogram
    (discrete-histogram
      (for/list ([e (in-list es-by-n)])
        (match-define (experiment _ n _ (stats _ mean _ _ )) e)
        (vector (format "n = ~a" n) mean))
      #:skip skip #:x-min x-min #:label (~a label) #:color (add1 x-min)))
  (define annotations
    (list
      (error-bars
        (for/list ([(e i) (in-indexed (in-list es-by-n))])
          (match-define (stats _ mean _ stddev) (experiment-stats e))
          (vector (x i) mean stddev)))
      #;
      (for/list ([(e i) (in-indexed (in-list es-by-n))])
        (match-define (stats min _ max _) (experiment-stats e))
        (list (point-label (vector (x i) min) "min")
              (point-label (vector (x i) max) "max")))))
  (list histogram annotations))

(module+ main
  (random-seed 0)
  (displayln "running experiments")
  (define results (time (map run-experiment experiments)))
  (displayln "plotting")
  (define results-by-freq (group-by experiment-freq results))
  (define results-by-freq-by-f (map (curry group-by experiment-f) results-by-freq))
  (define results-by-freq-by-f-sorted-by-freq
    (sort results-by-freq-by-f < #:key (compose1 experiment-freq caar)))

  (define skip (length fs))
  (define p
    (apply vc-append
           (for/list ([group (in-list results-by-freq-by-f-sorted-by-freq)])
             (define freq (experiment-freq (caar group)))
             (parameterize ([plot-y-transform (axis-transform-bound log-transform 0.01 +inf.0)]
                            [plot-y-ticks (log-ticks)])
               (plot-pict
                 (for/list ([(exps i) (in-indexed group)])
                   (experiments->renderers exps skip i))
                 #:title (format "Frequency: ~a" freq)
                 #:width (* 4 (plot-width))
                 #:y-min 0.001
                 #:x-label "n"
                 #:y-label "t")))))
  (show-pict p)
  (write-pict-to-svg p "bench.svg"))

TODO: Incorporate this into the SDK's generic benchmarking facilities at some point.

Clone this wiki locally