scan

#! /usr/bin/env python3

import argparse
import sys
import os

from options import OptionParser
from multirun import *

def parseArguments():
  parser = OptionParser()

  # Change the default to use a CSV file
  csv = parser.parser._actions[[ action.dest for action in parser.parser._actions ].index('csv')]
  csv.default = 'scan.csv'
  csv.help = csv.help.replace('None', 'scan.csv')

  # Options specific to scan
  parser.parser.add_argument('--steps',
    type = int,
    nargs = '+',
    default = None,
    help = 'list, or None to make a linear scan from min_step to max_step [default: None]')

  parser.parser.add_argument('--min-step',
    dest = 'min_step',
    type = int,
    default = 1,
    help = 'minimum number of cores to scan [default: 1]')

  parser.parser.add_argument('--max-step',
    dest = 'max_step',
    type = int,
    default = None,
    help = 'maximum number of cores to scan. Use None to guess based on the number of available cores (or threads) and concurrent jobs [default: None]')

  parser.parser.add_argument('--events-extra-per-thread',
    dest = 'events_extra_per_thread',
    default = 0,
    type = int,
    help = 'increase by this amount the number of events per thread [default: 0]')

  parser.parser.add_argument('--events-limit',
    dest = 'events_limit',
    type = int,
    default = 0,
    help = 'if not 0, limit the total number of events to be processed [default: 0]')

  return parser.parse(sys.argv[1:])


def printJobInfo(opts, steps):
  print("""
Selected Configuration:
  - config:       {}
  - steps:        {}
  - IO benchmark: {}
  - csv file:     {}
  - logdir:       {}
  - events_limit: {}
  - jobs:         {}
  - repeat:       {}
  - threads:      {}
  - streams:      {}
""".format(opts.config,
           steps,
           opts.run_io_benchmark,
           opts.csv,
           opts.logdir,
           opts.events_limit,
           opts.jobs,
           opts.repeats,
           opts.threads,
           opts.streams))


if __name__ == "__main__":
  if not 'CMSSW_BASE' in os.environ:
    print('Error: the CMS environment is not set up, please run "cmsenv" or "eval `scram runtime -sh`".')
    sys.exit(1)

  opts = parseArguments()

  process = parseProcess(opts.config)

  # common options for multirun
  options = {
    'verbose'             : opts.verbose,
    'plumbing'            : opts.plumbing,
    'warmup'              : opts.warmup,
    'events'              : opts.events,
    'resolution'          : opts.event_resolution,
    'skipevents'          : opts.event_skip,
    'repeats'             : opts.repeats,
    'wait'                : opts.wait,
    'jobs'                : opts.jobs,
    'threads'             : opts.threads,
    'streams'             : opts.streams,
    'gpus_per_job'        : opts.gpus_per_job,
    'allow_hyperthreading': opts.allow_hyperthreading,
    'set_numa_affinity'   : opts.numa_affinity,
    'set_cpu_affinity'    : opts.cpu_affinity,
    'set_gpu_affinity'    : opts.gpu_affinity,
    'slots'               : opts.slots,
    'executable'          : opts.executable,
    'data'                : open(opts.csv, 'wt', buffering=1, encoding='utf-8') if opts.csv else None,
    'header'              : opts.csvheader,
    'logdir'              : opts.logdir if opts.logdir else None,
    'tmpdir'              : opts.tmpdir,
    'keep'                : opts.keep,
    'automerge'           : opts.automerge,
    'autodelete'          : opts.autodelete,
    'autodelete_delay'    : opts.autodelete_delay,
  }

  # print a system overview
  info()

  # check the available cpus
  cpus = get_cpu_info()
  if options['allow_hyperthreading']:
    count = sum(len(cpu.hardware_threads) for cpu in cpus.values())
  else:
    count = sum(len(cpu.physical_processors) for cpu in cpus.values())

  # use the explicit list of steps, or a linear scan
  max_step = opts.max_step
  if max_step is None:
    if options['jobs'] is None:
        max_step = count
    else:
        max_step = count // options['jobs']

  steps = opts.steps
  if not opts.steps:
    steps = list(range(opts.min_step, max_step + 1))

  # prepare a trimmed down configuration for benchmarking only reading the input data
  if opts.run_io_benchmark:
    io_process = copy.deepcopy(process)
    io_process.hltGetRaw = cms.EDAnalyzer("HLTGetRaw", RawDataCollection = cms.InputTag("rawDataCollector"))
    io_process.path = cms.Path(io_process.hltGetRaw)
    io_process.schedule = cms.Schedule(io_process.path)
    if 'PrescaleService' in io_process.__dict__:
      del io_process.PrescaleService

  printJobInfo(opts, steps)

  # make a copy of the options to be updated during the scan
  step_opt = dict(options)

  # do not wait before the first step
  wait = False

  for step in steps:
    # update the options for each step
    step_opt['threads'] = options['threads'] if options['threads'] is not None else step
    step_opt['streams'] = options['streams'] if options['streams'] is not None else step
    step_opt['jobs']    = options['jobs']    if options['jobs']    is not None else (count + step - 1) // step

    # if the logs are enabled, use a separate directory for each step
    if options['logdir'] is not None:
        base = options['logdir']
        if base:
            base = base + '/'
        step_opt['logdir'] = base + 'scan_%d' % step

    # update the number of events to process based on the number of threads
    if opts.events_extra_per_thread > 0:
        step_opt['events'] = options['events'] + opts.events_extra_per_thread * step_opt['threads']

    if opts.events_limit > 0:
        if step_opt['events'] > opts.events_limit:
            step_opt['events'] = opts.events_limit

    # benchmark reading the input data
    if opts.run_io_benchmark:
      print('Benchmarking only I/O')
      io_options = dict(step_opt, logdir = None, keep = [], data = None)
      multiCmsRun(io_process, **io_options)
      print()
      # schedule a wait after the I/O benchmark
      if opts.wait > 0:
        wait = True

    # wait the required number of seconds afer the I/O benchmark and between each iteration
    if wait:
      time.sleep(opts.wait)

    # run
    multiCmsRun(process, **step_opt)
    # schedule a wait between each iteration
    if opts.wait > 0:
      wait = True
    # if the input files do not depend on the job configuration, warm up only once
    if opts.events_extra_per_thread == 0:
        step_opt['warmup'] = False

    # print the CSV header only once
    step_opt['header'] = False

  # close the CSV file
  if options['data']:
    options['data'].close()