diff --git a/util/plaidbench/extract-plaidbench-data.py b/util/plaidbench/extract-plaidbench-data.py index 75b193b7..7627241b 100644 --- a/util/plaidbench/extract-plaidbench-data.py +++ b/util/plaidbench/extract-plaidbench-data.py @@ -39,9 +39,9 @@ ... ''' +import argparse # Used to parse commandline arguments import os # Used for scanning directories, getting paths, and checking files. import xlwt # Used to create excel spreadsheets. -import argparse # Used to parse commandline arguments # Contains all of the stats benchStats = {} @@ -388,7 +388,7 @@ def main(args): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Script to extract plaidbench data.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--verbose', '-v', action='store_true', default=False, diff --git a/util/plaidbench/extract-shoc-data.py b/util/plaidbench/extract-shoc-data.py new file mode 100644 index 00000000..5757bc52 --- /dev/null +++ b/util/plaidbench/extract-shoc-data.py @@ -0,0 +1,869 @@ +#!/usr/bin/python3 +''' +Extract execution-time scores from SHOC runs. Will look for the results of each +individual benchmark in the "Logs" directory. +Ex: optsched-run-01/Logs/dev0_FFT.log + +Since SHOC doesn't allow you to select only a subset of benchmarks to run, you +can use this command to run the level1 benchmarks we are interested in: +cd $SHOC/build +mkdir optsched-01 && cd optsched-01 +mkdir shoc-fft && cd $_ && ../../bin/shocdriver -opencl -benchmark FFT -s 4 && cd .. && \ +mkdir shoc-gemm && cd $_ && ../../bin/shocdriver -opencl -benchmark GEMM -s 4 && cd .. && \ +mkdir shoc-md && cd $_ && ../../bin/shocdriver -opencl -benchmark MD -s 4 && cd .. && \ +mkdir shoc-sort && cd $_ && ../../bin/shocdriver -opencl -benchmark Sort -s 4 && cd .. && \ +mkdir shoc-spmv && cd $_ && ../../bin/shocdriver -opencl -benchmark Spmv -s 4 && cd .. && \ +mkdir shoc-stencil2d && cd $_ && ../../bin/shocdriver -opencl -benchmark Stencil2D -s 4 && cd .. + +Then you must copy the logs and err files over to the Logs directory. +mkdir Logs +cp shoc-*/Logs/*.err Logs +cp shoc-*/Logs/*.log Logs + +Feed the result into this script +./extract-shoc-data.py optsched-run-01/ +''' + +import argparse +import os +import re +import statistics +from openpyxl import Workbook +from openpyxl.styles import Font + +RE_BENCHMARK_NAME = re.compile('Running benchmark (.*)') +RE_BENCHMARK_RESULTS = re.compile('result for (.*):(\s)*(.*) (.*)') + + +# Name of the results.txt file +filenameShoc = 'shoc.log' + +displayOrder = { + 'FFT': ['fft_sp', 'ifft_sp', 'fft_dp', 'ifft_dp'], + 'GEMM': ['sgemm_n', 'sgemm_t', 'dgemm_n', 'dgemm_t'], + 'MD': ['md_sp_flops', 'md_sp_bw', 'md_dp_flops', 'md_dp_bw'], + 'Sort': ['sort'], + 'Spmv': [ + 'spmv_csr_scalar_sp', + 'spmv_csr_scalar_dp', + 'spmv_csr_scalar_pad_sp', + 'spmv_csr_scalar_pad_dp', + 'spmv_csr_vector_sp', + 'spmv_csr_vector_dp', + 'spmv_csr_vector_pad_sp', + 'spmv_csr_vector_pad_dp', + 'spmv_ellpackr_sp', + 'spmv_ellpackr_dp' + ], + 'Stencil2D': ['stencil', 'stencil_dp'], + # 'S3D': ['s3d', 's3d_dp'], +} + +benchmarks = [ + 'BusSpeedDownload', + 'BusSpeedReadback', + 'MaxFlops', + 'DeviceMemory', + 'KernelCompile', + 'QueueDelay', + 'BFS', + 'FFT', + 'GEMM', + 'MD', + 'MD5Hash', + 'Reduction', + 'Scan', + 'Sort', + 'Spmv', + 'Stencil2D', + 'Triad', + 'S3D' +] + +dataFormat = [ + 'test', + 'atts', + 'units', + 'median', + 'mean', + 'stddev', + 'min', + 'max', + 'trial0', + 'trial1', + 'trial2', + 'trial3', + 'trial4', + 'trial5', + 'trial6', + 'trial7', + 'trial8', + 'trial9', +] + +queueDelayDataFormat = [ + 'test', + 'atts', + 'units', + 'median', + 'mean', + 'stddev', + 'min', + 'max', + 'trial0', + 'trial1', + 'trial2', +] + + +def parseStats(inputFolder): + # Get the path to the logs folder + currentPath = os.path.join(inputFolder, 'Logs') + stats = { + # 'BusSpeedDownload' : processBusSpeedDownload(currentPath), + # 'BusSpeedReadback' : processBusSpeedReadback(currentPath), + # 'MaxFlops' : processMaxFlops(currentPath), + # 'DeviceMemory' : processDeviceMemory(currentPath), + # 'KernelCompile' : processKernelCompile(currentPath), + # 'QueueDelay' : processQueueDelay(currentPath), + # 'BFS' : processBFS(currentPath), + 'FFT': processFTT(currentPath), + 'GEMM': processGEMM(currentPath), + 'MD': processMD(currentPath), + # 'MD5Hash' : processMD5Hash(currentPath), + # 'Reduction' : processReduction(currentPath), + # 'Scan' : processScan(currentPath), + 'Sort': processSort(currentPath), + 'Spmv': processSpmv(currentPath), + 'Stencil2D': processStencil2D(currentPath), + # 'Triad' : processTriad(currentPath), + # 'S3D' : processS3D(currentPath) + } + return stats + + +def printStats(stats): + for benchmark in stats: + print('{}'.format(benchmark)) + for test in stats[benchmark]: + if '_pcie' in test: + continue + print(' {}'.format(test)) + print(' {}'.format(stats[benchmark][test])) + + +def createSpreadsheet(inputFolder, output, stats): + if 'xls' not in output[-4:]: + output += '.xlsx' + + # Create new excel worksheet + wb = Workbook() + + # Grab the active worksheet + ws = wb.active + + # Insert column titles + ws['A1'] = 'Benchmarks' + ws['A1'].font = Font(bold=True) + ws['C1'] = inputFolder + ws['C1'].font = Font(bold=True) + ws['C2'] = 'Score' + ws['D2'] = 'Units' + ws['E2'] = 'RV' + + col = 'A' + row = 3 + for benchmark in displayOrder: + ws[col+str(row)] = benchmark + + for test in displayOrder[benchmark]: + ws[chr(ord(col)+1)+str(row)] = test + + scores = [float(stats[benchmark][test]['trial9']), + float(stats[benchmark][test]['trial8']), + float(stats[benchmark][test]['trial7']), + float(stats[benchmark][test]['trial6']), + float(stats[benchmark][test]['trial5']), + ] + + median = statistics.median(scores) + randomVar = (max(scores) - min(scores)) / min(scores) + + ws[chr(ord(col)+2)+str(row)] = median + ws[chr(ord(col)+3)+str(row)] = stats[benchmark][test]['units'] + ws[chr(ord(col)+4)+str(row)] = randomVar + row += 1 + + wb.save(output) + +# Input: Logs folder +# Output: One dictionarary + + +def processBusSpeedDownload(folder): + stats = {} + filename = 'dev0_BusSpeedDownload.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + # Process the line as a space delimited list + curLine = line.split() + if len(curLine) > 2: + # The line we want to check is usually the line with the + # biggest size. + if curLine[1] == '524288kB' and \ + curLine[0] == 'DownloadSpeed': + # Insert the stats into a dictionary with its + # coressponding data format name as the key. + stats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + else: + print('Cannot find log file {}'.format(filename)) + + return {'bspeed_download': stats} + +# Input: Logs folder +# Output: One dictionarary, two entries: +# bspeed_download (type dict) +# bspeed_readback (type dict) + + +def processBusSpeedReadback(folder): + stats = {} + filename = 'dev0_BusSpeedReadback.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + # Process the line as a space delimited list + curLine = line.split() + if len(curLine) > 2: + # The line we want to check is usually the line with the + # biggest size. + if curLine[1] == '524288kB' and curLine[0] == 'ReadbackSpeed': + # Insert the stats into a dictionary with its coressponding + # data format name as the key. + stats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + else: + print('Cannot find log file {}'.format(filename)) + + return {'bspeed_readback': stats} + + +def processMaxFlops(folder): + bestSP = {} + bestDP = {} + filename = 'dev0_MaxFlops.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + # Track current state on whether to process data. + dataStart = False + maxFlopsSP = 0.0 + maxFlopsDP = 0.0 + curTest = '' + + for line in f: + curLine = line.split() + # There is a blank newline which indicates the end. + # Check if we reached that newline. + # Stop processing data if we reached end. + if len(curLine) == 0: + dataStart = False + + # We are in the middle of the data block. + # Process data. + elif dataStart: + curStat = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + curTest = curStat['test'].split('-')[-1] + + if curTest == 'SP': + if float(curStat['median']) > maxFlopsSP: + maxFlopsSP = float(curStat['max']) + bestSP = curStat + elif curTest == 'DP': + if float(curStat['median']) > maxFlopsDP: + maxFlopsDP = float(curStat['max']) + bestDP = curStat + + # Current line reached where the data is located + # Start processing data + elif curLine == dataFormat: + dataStart = True + else: + print('Cannot find log file {}'.format(filename)) + + return {'maxspflops': bestSP, 'maxdpflops': bestDP} + + +def processDeviceMemory(folder): + stats = {} + filename = 'dev0_DeviceMemory.log' + max_readGlobalMemoryCoalesced = -1.0 + max_readGlobalMemoryUnit = -1.0 + max_writeGlobalMemoryCoalesced = -1.0 + max_writeGlobalMemoryUnit = -1.0 + max_readLocalMemory = -1.0 + max_writeLocalMemory = -1.0 + max_TextureRepeatedRandomAccess = -1.0 + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'readGlobalMemoryCoalesced': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_readGlobalMemoryCoalesced: + max_readGlobalMemoryCoalesced = float( + curStats['median']) + stats['gmem_readbw'] = curStats + elif curLine[0] == 'readGlobalMemoryUnit': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_readGlobalMemoryUnit: + max_readGlobalMemoryUnit = float( + curStats['median']) + stats['gmem_readbw_strided'] = curStats + elif curLine[0] == 'writeGlobalMemoryCoalesced': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_writeGlobalMemoryCoalesced: + max_writeGlobalMemoryCoalesced = float( + curStats['median']) + stats['gmem_writebw'] = curStats + elif curLine[0] == 'writeGlobalMemoryUnit': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_writeGlobalMemoryUnit: + max_writeGlobalMemoryUnit = float( + curStats['median']) + stats['gmem_writebw_strided'] = curStats + elif curLine[0] == 'readLocalMemory': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_readLocalMemory: + max_readLocalMemory = float(curStats['median']) + stats['lmem_readbw'] = curStats + elif curLine[0] == 'writeLocalMemory': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_writeLocalMemory: + max_writeLocalMemory = float(curStats['median']) + stats['lmem_writebw'] = curStats + elif curLine[0] == 'TextureRepeatedRandomAccess': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max_TextureRepeatedRandomAccess: + max_TextureRepeatedRandomAccess = float( + curStats['median']) + stats['tex_readbw'] = curStats + return stats + + +def processKernelCompile(folder): + stats = {} + minCompileTime = 9999999.0 + filename = 'dev0_KernelCompile.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'BuildProgram': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) < minCompileTime: + minCompileTime = float(curStats['median']) + stats = curStats + else: + print('Cannot find log file {}'.format(filename)) + + return {'ocl_kernel': stats} + + +def processQueueDelay(folder): + stats = {} + minDelay = 9999999.0 + filename = 'dev0_QueueDelay.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'SSDelay': + curStats = {queueDelayDataFormat[i]: curLine[i] for i in range( + len(queueDelayDataFormat))} + if float(curStats['median']) < minDelay: + minDelay = float(curStats['median']) + stats = curStats + else: + print('Cannot find log file {}'.format(filename)) + + return {'ocl_queue': stats} + + +def processBFS(folder): + stats = {} + filename = 'dev0_BFS.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'BFS': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['bfs'] = curStats + elif curLine[0] == 'BFS_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['bfs_pcie'] = curStats + elif curLine[0] == 'BFS_teps': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['bfs_teps'] = curStats + + return stats + + +def processFTT(folder): + stats = {} + filename = 'dev0_FFT.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'SP-FFT': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['fft_sp'] = curStats + elif curLine[0] == 'SP-FFT_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['fft_sp_pcie'] = curStats + elif curLine[0] == 'SP-FFT-INV': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['ifft_sp'] = curStats + elif curLine[0] == 'SP-FFT-INV_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['ifft_sp_pcie'] = curStats + elif curLine[0] == 'DP-FFT': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['fft_dp'] = curStats + elif curLine[0] == 'DP-FFT_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['fft_dp_pcie'] = curStats + elif curLine[0] == 'DP-FFT-INV': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['ifft_dp'] = curStats + elif curLine[0] == 'DP-FFT-INV_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['ifft_dp_pcie'] = curStats + + return stats + + +def processGEMM(folder): + stats = {} + filename = 'dev0_GEMM.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'SGEMM-N': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['sgemm_n'] = curStats + elif curLine[0] == 'SGEMM-T': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['sgemm_t'] = curStats + elif curLine[0] == 'SGEMM-N_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['sgemm_n_pcie'] = curStats + elif curLine[0] == 'SGEMM-T_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['sgemm_t_pcie'] = curStats + elif curLine[0] == 'DGEMM-N': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['dgemm_n'] = curStats + elif curLine[0] == 'DGEMM-T': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['dgemm_t'] = curStats + elif curLine[0] == 'DGEMM-N_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['dgemm_n_pcie'] = curStats + elif curLine[0] == 'DGEMM-T_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['dgemm_t_pcie'] = curStats + + return stats + + +def processMD(folder): + stats = {} + filename = 'dev0_MD.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'MD-LJ': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_sp_flops'] = curStats + elif curLine[0] == 'MD-LJ-Bandwidth': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_sp_bw'] = curStats + elif curLine[0] == 'MD-LJ_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_sp_flops_pcie'] = curStats + elif curLine[0] == 'MD-LJ-Bandwidth_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_sp_bw_pcie'] = curStats + elif curLine[0] == 'MD-LJ-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_dp_flops'] = curStats + elif curLine[0] == 'MD-LJ-DP-Bandwidth': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_dp_bw'] = curStats + elif curLine[0] == 'MD-LJ-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_dp_flops_pcie'] = curStats + elif curLine[0] == 'MD-LJ-DP-Bandwidth_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md_dp_bw_pcie'] = curStats + + return stats + + +def processMD5Hash(folder): + stats = {} + filename = 'dev0_MD5Hash.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'MD5Hash': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['md5hash'] = curStats + + return stats + + +def processReduction(folder): + stats = {} + filename = 'dev0_Reduction.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'Reduction': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['reduction'] = curStats + elif curLine[0] == 'Reduction_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['reduction_pcie'] = curStats + elif curLine[0] == 'Reduction-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['reduction_dp'] = curStats + elif curLine[0] == 'Reduction-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['reduction_dp_pcie'] = curStats + + return stats + + +def processScan(folder): + stats = {} + filename = 'dev0_Scan.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'Scan': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['scan'] = curStats + elif curLine[0] == 'Scan_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['scan_pcie'] = curStats + elif curLine[0] == 'Scan-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['scan_dp'] = curStats + elif curLine[0] == 'Scan-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['scan_dp_pcie'] = curStats + + return stats + + +def processSort(folder): + stats = {} + filename = 'dev0_Sort.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'Sort-Rate': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['sort'] = curStats + elif curLine[0] == 'Sort-Rate_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['sort_pcie'] = curStats + + return stats + + +def processSpmv(folder): + stats = {} + filename = 'dev0_Spmv.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'CSR-Scalar-SP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_sp'] = curStats + elif curLine[0] == 'CSR-Scalar-SP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_sp_pcie'] = curStats + elif curLine[0] == 'CSR-Scalar-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_dp'] = curStats + elif curLine[0] == 'CSR-Scalar-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_dp_pcie'] = curStats + elif curLine[0] == 'Padded_CSR-Scalar-SP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_pad_sp'] = curStats + elif curLine[0] == 'Padded_CSR-Scalar-SP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_pad_sp_pcie'] = curStats + elif curLine[0] == 'Padded_CSR-Scalar-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_pad_dp'] = curStats + elif curLine[0] == 'Padded_CSR-Scalar-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_scalar_pad_dp_pcie'] = curStats + elif curLine[0] == 'CSR-Vector-SP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_sp'] = curStats + elif curLine[0] == 'CSR-Vector-SP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_sp_pcie'] = curStats + elif curLine[0] == 'CSR-Vector-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_dp'] = curStats + elif curLine[0] == 'CSR-Vector-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_dp_pcie'] = curStats + elif curLine[0] == 'Padded_CSR-Vector-SP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_pad_sp'] = curStats + elif curLine[0] == 'Padded_CSR-Vector-SP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_pad_sp_pcie'] = curStats + elif curLine[0] == 'Padded_CSR-Vector-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_pad_dp'] = curStats + elif curLine[0] == 'Padded_CSR-Vector-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_csr_vector_pad_dp_pcie'] = curStats + elif curLine[0] == 'ELLPACKR-SP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_ellpackr_sp'] = curStats + elif curLine[0] == 'ELLPACKR-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['spmv_ellpackr_dp'] = curStats + + return stats + + +def processStencil2D(folder): + stats = {} + filename = 'dev0_Stencil2D.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'SP_Sten2D': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['stencil'] = curStats + elif curLine[0] == 'DP_Sten2D': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['stencil_dp'] = curStats + + return stats + + +def processTriad(folder): + stats = {} + max = -1.0 + filename = 'dev0_Triad.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'TriadBdwth': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + if float(curStats['median']) > max: + max = float(curStats['median']) + stats = curStats + else: + print('Cannot find log file {}'.format(filename)) + + return {'triad_bw': stats} + + +def processS3D(folder): + stats = {} + filename = 'dev0_S3D.log' + filepath = os.path.join(folder, filename) + if os.path.exists(filepath): + with open(filepath) as f: + for line in f: + curLine = line.split() + if curLine: + if curLine[0] == 'S3D-SP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['s3d'] = curStats + elif curLine[0] == 'S3D-SP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['s3d_pcie'] = curStats + elif curLine[0] == 'S3D-DP': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['s3d_dp'] = curStats + elif curLine[0] == 'S3D-DP_PCIe': + curStats = {dataFormat[i]: curLine[i] + for i in range(len(dataFormat))} + stats['s3d_dp_pcie'] = curStats + + return stats + + +def main(args): + # Start stats collection + stats = parseStats(args.inputFolder) + + if args.verbose: + printStats(stats) + + output = args.output + if output == '': + output = os.path.dirname(args.inputFolder) + + if not args.disable: + createSpreadsheet(args.inputFolder, output, stats) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Script to extract shoc execution-time data.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument(dest='inputFolder', + help='The path to a benchmark directory') + + parser.add_argument('--verbose', '-v', + action='store_true', default=False, + dest='verbose', + help='Print stats to terminal') + + parser.add_argument('--output', '-o', + default='', + dest='output', + help='Output results spreadsheet filepath containing only the median and variance') + + parser.add_argument('--all', '-a', + action='store_true', default=False, + dest='printAllRuns', + help='Write all runs statistics instead of only the median') + + parser.add_argument('--disable', '-d', + action='store_true', default=False, + dest='disable', + help='Disable spreadsheet output.') + + args = parser.parse_args() + + main(args) diff --git a/util/plaidbench/get-benchmarks-stats.py b/util/plaidbench/get-benchmarks-stats.py index 55120eec..725106d9 100644 --- a/util/plaidbench/get-benchmarks-stats.py +++ b/util/plaidbench/get-benchmarks-stats.py @@ -6,7 +6,7 @@ about the benchmarks from the log files generated by the run-plaidbench.sh script. Author: Vang Thao -Last Update: December 30, 2019 +Last Update: September 2020 ********************************************************************************** OUTPUT: @@ -34,129 +34,145 @@ ... ''' -import os # Used for scanning directories, getting paths, and checking files. -import re # Used for parsing log file +import argparse +import logging +import os +import sys from openpyxl import Workbook from openpyxl.styles import Font -import argparse -RE_DAG_INFO = re.compile(r'Processing DAG (.*) with (\d+) insts and max latency (\d+)') -RE_PASS_NUM = re.compile(r'End of (.*) pass through') - -# Contains all of the stats -benchStats = {} -# Contain cumulative stats for the run -cumulativeStats = {} - -# List of benchmark names -benchmarks = [ - 'densenet121', - 'densenet169', - 'densenet201', - 'inception_resnet_v2', - 'inception_v3', - 'mobilenet', - 'nasnet_large', - 'nasnet_mobile', - 'resnet50', - 'vgg16', - 'vgg19', - 'xception', - 'imdb_lstm', -] - -def parseStats(inputFolder): +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from readlogs import * + +def parseStats(filePaths): + """ + Parse and process the logs from the selected benchmark runs then return the statistics in a dictionary. + + Parameters: + inputFolder -- A string containing the name of a directory with plaidbench or SHOC results. + + Returns: + dictionary: A dictionary containing the statistics for the benchmark. + The key is the name of the files which are mapped to another dictionary containing the keys + regions, kernels, insts, and maxRegionSize which are mapped to integer values. + """ + # Get logger + logger = logging.getLogger('parseStats') + # Overall stats for the benchmark suite - cumulativeStats['numOfBenchmarks'] = 0 - cumulativeStats['totalKernels'] = 0 - cumulativeStats['totalInsts'] = 0 - cumulativeStats['totalRegions'] = 0 - cumulativeStats['maxRegionSize'] = -1 - cumulativeStats['maxRegionSize'] = -1 - cumulativeStats['averageRegionSize'] = 0 + stats = {} # Begin stats collection for this run - for bench in benchmarks: - # Get the path to the log file - currentPath = os.path.join(inputFolder, bench) - currentLogFile = os.path.join(currentPath, bench + '.log') - - stats = {} - stats['kernels'] = [] - stats['regions'] = 0 - stats['inst'] = 0 - stats['average'] = 0 - stats['maxRegionSize'] = 0 - + for bench in filePaths: # first check if log file exists. - if (os.path.exists(currentLogFile)): - cumulativeStats['numOfBenchmarks'] += 1 + if os.path.exists(filePaths[bench]): # Open log file if it exists. - with open(currentLogFile) as file: + with open(filePaths[bench]) as file: + # Initialize current benchmark statistics + curStats = {} + curStats['kernels'] = 0 + curStats['regions'] = 0 + curStats['insts'] = 0 + curStats['average'] = -1 + curStats['maxRegionSize'] = -1 + # Read the whole log file # and split the scheduling # regions into a list log = file.read() - blocks = log.split('********** Opt Scheduling **********')[1:] + blocks = split_blocks(log) # Iterate over each scheduling region for block in blocks: - # Ignore second pass since it should - # have the same stats as first - getPassNum = RE_PASS_NUM.search(block) - if (getPassNum): - passNum = getPassNum.group(1) - if passNum == 'second': - continue + events = keep_only_first_event(parse_events(block)) + + # Ignore second pass since it should + # have the same stats as first + if ('PassFinished' in events.keys() and events['PassFinished']['num'] == 2): + continue # Get DAG stats - dagStats = RE_DAG_INFO.search(block) - dagName = dagStats.group(1) - inst = dagStats.group(2) + dagName = events['ProcessDag']['name'] + inst = events['ProcessDag']['num_instructions'] # Split kernel name from its region number names = dagName.split(':') - kernelName = names[0] + regionNum = names[1] + + # Region 0 indicates a new kernel + if regionNum == '0': + curStats['kernels'] += 1 - # Add new kernels to list of kernels - if (kernelName not in stats['kernels']): - stats['kernels'].append(kernelName) + curStats['insts'] += inst + curStats['regions'] += 1 + if inst > curStats['maxRegionSize']: + curStats['maxRegionSize'] = inst - stats['inst'] += int(inst) - stats['regions'] += 1 - if (int(inst) > stats['maxRegionSize']): - stats['maxRegionSize'] = int(inst) + if curStats['regions'] != 0: + curStats['average'] = curStats['insts'] / \ + curStats['regions'] - stats['average'] = stats['inst']/float(stats['regions']) + # Save stats for this benchmark + stats[bench] = curStats # If the file doesn't exist, output error log. else: - print('Cannot find log file for benchmark {}.'.format(bench)) - - # Save stats for this benchmark inside benchStats - benchStats[bench] = stats - - # Record overall stat - cumulativeStats['totalKernels'] += len(stats['kernels']) - cumulativeStats['totalInsts'] += stats['inst'] - cumulativeStats['totalRegions'] += stats['regions'] - if (cumulativeStats['maxRegionSize'] < stats['maxRegionSize']): - cumulativeStats['maxRegionSize'] = stats['maxRegionSize'] - - cumulativeStats['averageRegionSize'] = cumulativeStats['totalInsts']/float(cumulativeStats['totalRegions']) - -def printStats(): - print('Benchmark Stats') - print(' Total number of benchmarks: {}'.format(cumulativeStats['numOfBenchmarks'])) - print(' Total kernels: {}'.format(cumulativeStats['totalKernels'])) - print(' Total regions: {}'.format(cumulativeStats['totalRegions'])) - print(' Total instructions: {}'.format(cumulativeStats['totalInsts'])) - print(' Average Region Size {:0.1f}'.format(cumulativeStats['averageRegionSize'])) - print(' Max region size: {}'.format(cumulativeStats['maxRegionSize'])) - -def createSpreadsheets(output): - if 'xls' not in output[-4:]: - output += '.xlsx' + logger.warning( + 'Cannot find log file for benchmark {}.'.format(bench)) + + return stats + + +def printStats(stats): + ''' + Prints the recorded statistics in a human readable format. + + Parameters: + stats -- A dictionary mapping a benchmark to its statistics + ''' + # Overall statistics variables + totalKernels = 0 + totalRegions = 0 + totalInsts = 0 + totalAverageRegionSize = -1 + + # Statistics for individual benchmarks + for bench in stats: + print('Benchmark: {}'.format(bench)) + print(' Kernels : {}'.format(stats[bench]['kernels'])) + totalKernels += stats[bench]['kernels'] + print(' Scheduling Regions : {}'.format(stats[bench]['regions'])) + totalRegions += stats[bench]['regions'] + print(' Instructions : {}'.format(stats[bench]['insts'])) + totalInsts += stats[bench]['insts'] + averageRegionSize = -1 + if stats[bench]['regions'] != 0: + averageRegionSize = stats[bench]['insts'] / stats[bench]['regions'] + print(' Average Region Size : {:.2f}'.format(averageRegionSize)) + + # Statistics for the overall benchmark suite + print('Overall Statistics') + print(' Kernels : {}'.format(totalKernels)) + print(' Scheduling Regions : {}'.format(totalRegions)) + print(' Instructions : {}'.format(totalInsts)) + if totalRegions != 0: + totalAverageRegionSize = totalInsts / totalRegions + print(' Average Region Size : {:.2f}'.format(totalAverageRegionSize)) + + +def createSpreadsheets(stats, outputFile): + ''' + Write the dictionary returned from parseStats to an excel spreadsheet. + Also calculate and print the average region size and the overall statistics. + + Parameters: + stats -- A dictionary containing the statistics for the benchmarks. + outputFile -- A string containing the desired name for the output spreadsheet. + ''' + # Check if the user specified an extension with xls or xlsx + # If not then add the extension to the string + if 'xls' not in outputFile[-4:]: + outputFile += '.xlsx' # Create new excel worksheet wb = Workbook() @@ -169,54 +185,91 @@ def createSpreadsheets(output): ws['A1'].font = Font(bold=True) row = 3 - for bench in benchmarks: + + totalKernels = 0 + totalRegions = 0 + totalInsts = 0 + maxRegionSize = -1 + totalAverageRegionSize = -1 + totalNumberOfBenchmarks = 0 + + # Handle writing to spreadsheet for each benchmarks + for bench in stats: + totalNumberOfBenchmarks += 1 + + averageRegionSize = -1 + ws['A' + str(row)] = bench - row += 1 + ws['B' + str(row)] = 'Total number of kernels' + ws['C' + str(row)] = stats[bench]['kernels'] + totalKernels += stats[bench]['kernels'] - ws['A' + str(row)] = 'Total' - ws['A' + str(row)].font = Font(bold=True) + ws['B' + str(row+1)] = 'Total number of scheduling regions' + ws['C' + str(row+1)] = stats[bench]['regions'] + totalRegions += stats[bench]['regions'] - # Stats entry - col = 'B' - row = 1 - ws[col + str(row)] = 'Benchmark Stats' - row = 2 - ws[col+str(row)] = 'Kernels' - ws[chr(ord(col)+1)+str(row)] = 'Scheduling Regions' - ws[chr(ord(col)+2)+str(row)] = 'Instructions' - ws[chr(ord(col)+3)+str(row)] = 'Average Region Size' - ws[chr(ord(col)+4)+str(row)] = 'Max Region Size' + ws['B' + str(row+2)] = 'Total number of instructions' + ws['C' + str(row+2)] = stats[bench]['insts'] + totalInsts += stats[bench]['insts'] + + if stats[bench]['regions'] != 0: + averageRegionSize = stats[bench]['insts'] / stats[bench]['regions'] + ws['B' + str(row+3)] = 'Avg. Region Size' + ws['C' + str(row+3)] = averageRegionSize + + ws['B' + str(row+4)] = 'Max Region Size' + ws['C' + str(row+4)] = stats[bench]['maxRegionSize'] + if maxRegionSize < stats[bench]['maxRegionSize']: + maxRegionSize = stats[bench]['maxRegionSize'] + + row += 5 + + # Write the overall statistics + ws['A' + str(row)] = 'Overall Stats' + ws['A' + str(row)].font = Font(bold=True) + ws['B' + str(row)] = 'Total number of benchmarks' + ws['C' + str(row)] = totalNumberOfBenchmarks + ws['B' + str(row+1)] = 'Total number of kernels' + ws['C' + str(row+1)] = totalKernels + ws['B' + str(row+2)] = 'Total number of scheduling regions' + ws['C' + str(row+2)] = totalRegions + ws['B' + str(row+3)] = 'Total number of instructions' + ws['C' + str(row+3)] = totalInsts + if totalRegions != 0: + totalAverageRegionSize = totalInsts / totalRegions + ws['B' + str(row+4)] = 'Avg. Region Size' + ws['C' + str(row+4)] = totalAverageRegionSize + ws['B' + str(row+5)] = 'Max Region Size' + ws['C' + str(row+5)] = maxRegionSize + + wb.save(outputFile) - row = 3 - for bench in benchmarks: - ws[col+str(row)] = len(benchStats[bench]['kernels']) - ws[chr(ord(col)+1)+str(row)] = benchStats[bench]['regions'] - ws[chr(ord(col)+2)+str(row)] = benchStats[bench]['inst'] - ws[chr(ord(col)+3)+str(row)] = benchStats[bench]['average'] - ws[chr(ord(col)+4)+str(row)] = benchStats[bench]['maxRegionSize'] - row += 1 - - ws[col+str(row)] = cumulativeStats['totalKernels'] - ws[chr(ord(col)+1)+str(row)] = cumulativeStats['totalRegions'] - ws[chr(ord(col)+2)+str(row)] = cumulativeStats['totalInsts'] - ws[chr(ord(col)+3)+str(row)] = cumulativeStats['averageRegionSize'] - ws[chr(ord(col)+4)+str(row)] = cumulativeStats['maxRegionSize'] - - wb.save(output) def main(args): + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + # Get filepaths for the selected benchmark suite + filePaths = get_bench_log_paths(args.inputFolder, args.benchmark) + # Start stats collection - parseStats(args.inputFolder) + stats = parseStats(filePaths) + # Print stats if enabled if args.verbose: - printStats() + printStats(stats) + # Create spreadsheet if not args.disable: - createSpreadsheets(args.output) + createSpreadsheets(stats, args.output) + if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Script to extract benchmarks stats', \ - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser(description='Script to extract benchmarks stats', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument(dest='inputFolder', + help='The path to a benchmark directory') parser.add_argument('--verbose', '-v', action='store_true', default=False, @@ -233,10 +286,11 @@ def main(args): dest='disable', help='Disable spreadsheet output.') - parser.add_argument('--input', '-i', - default='.', - dest='inputFolder', - help='The path to scan for benchmark directories') + parser.add_argument('--benchmark', '-b', + default='plaid', + choices=['plaid', 'shoc'], + dest='benchmark', + help='Select the benchmarking suite to parse for.') args = parser.parse_args() diff --git a/util/plaidbench/get-occupancy.py b/util/plaidbench/get-occupancy.py index 718e710e..3852e62c 100644 --- a/util/plaidbench/get-occupancy.py +++ b/util/plaidbench/get-occupancy.py @@ -1,167 +1,104 @@ #!/usr/bin/python3 ''' ********************************************************************************** -Description: Extract occupancy stats from plaidbench runs. +Description: Extract occupancy stats from a plaidbench run. Author: Vang Thao Created: December 30, 2019 -Last Update: December 30, 2019 +Last Update: September 2020 ********************************************************************************** OUTPUT: - This script takes in data from plaidbench runs and output a spreadsheet - containing the average occupancy for each benchmark and the overall - average occupancy. + This script takes in data from a plaidbench run and output a spreadsheet + containing the average occupancy for each benchmark. Spreadsheet 1: occupancy.xlsx Requirements: - python3 - pip3 - openpyxl (sreadsheet module, installed using pip3) - - patch to print out occupancy + - patch for LLVM to print out occupancy HOW TO USE: 1.) Run a plaidbench benchmarks with run-plaidbench.sh to generate a directory containing the results for the run. - 2.) Move the directory into a separate folder containing only the - directories generated by the script. - 3.) Pass the path to the folder as an input to this script with - the -i option. + 2.) Pass the path to the folder as an input to this script Example: - ./get-occupancy.py -i /home/tom/plaidbench-runs - - where plaidbench-runs/ contains - plaidbench-optsched-01/ - plaidbench-optsched-02/ - ... - plaidbench-amd-01/ - ... + ./get-occupancy.py /home/tom/plaidbench-run-01 ''' +import argparse +import logging import os -import re +import sys from openpyxl import Workbook from openpyxl.styles import Font -import argparse -RE_OCCUPANCY = re.compile('Final occupancy for function (.*):(\d+)') - -# Contains all of the stats -benchStats = {} - -# List of benchmark names -benchmarks = [ - 'densenet121', - 'densenet169', - 'densenet201', - 'inception_resnet_v2', - 'inception_v3', - 'mobilenet', - 'nasnet_large', - 'nasnet_mobile', - 'resnet50', - 'vgg16', - 'vgg19', - 'xception', - 'imdb_lstm', -] - -# Ignore these functions -# They are outputted before scheduling -ignore = [ - 'copyBufferRect', - 'copyBufferRectAligned', - 'copyBuffer', - 'copyBufferAligned', - 'fillBuffer', - 'copyBufferToImage', - 'copyImageToBuffer', - 'copyImage', - 'copyImage1DA', - 'fillImage', - 'scheduler' -] - -def parseStats(inputFolder, ignoreFolders): - scanDirPath = os.path.abspath(inputFolder) - - # Get name of all directories in the specified folder - subfolders = [f.name for f in os.scandir(path=scanDirPath) if f.is_dir() ] - - # For each folder - for folderName in subfolders: - if folderName in ignoreFolders: - continue - name = folderName.split('-') - - # Get the run number from the end - # of the folder name - runNumber = name[-1] - - # Get the name of the run - # and exclude the run number - nameOfRun = '-'.join(name[:-1]) - - # Create an entry in the stats for the - # name of the run - if (nameOfRun not in benchStats): - benchStats[nameOfRun] = {} - - for bench in benchmarks: - # Get the path to the log file - currentPath = os.path.join(inputFolder, folderName) - currentPath = os.path.join(currentPath, bench) - currentLogFile = os.path.join(currentPath, bench + '.log') - stats = {} - stats['average'] = 0.0 - stats['total'] = 0.0 - stats['numKernel'] = 0 - - # First check if log file exists. - if (os.path.exists(currentLogFile)): - # Open log file if it exists. - with open(currentLogFile) as file: - for line in file: - # Match the line that contain occupancy stats - getOccupancyStats = RE_OCCUPANCY.match(line) - if (getOccupancyStats): - # Get the kernel name - kernelName = getOccupancyStats.group(1) - - # Ignore these function - if (kernelName in ignore): - continue - - # Get occupancy - occupancy = int(getOccupancyStats.group(2)) - - # Used for averaging - stats['total'] += occupancy - stats['numKernel'] += 1 - else: - print('Cannot find log file for {} run {} benchmark {}.'.format(nameOfRun, runNumber, bench)) - - if stats['numKernel'] != 0: - stats['average'] = stats['total'] / stats['numKernel'] - - # Save stats - benchStats[nameOfRun][bench] = stats - -def printStats(): - for nameOfRun in benchStats: - print('{}'.format(nameOfRun)) - total = 0.0 - kernel = 0 - for bench in benchStats[nameOfRun]: - print(' {} : {:.2f}'.format(bench, benchStats[nameOfRun][bench]['average'])) - total += benchStats[nameOfRun][bench]['total'] - kernel += benchStats[nameOfRun][bench]['numKernel'] - if kernel != 0: - print(' Average: {:.2f}'.format(total/kernel)) - -def createSpreadsheets(output): - if 'xls' not in output[-4:]: - output += '.xlsx' +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from readlogs import * + +def parseStats(filePaths): + # Get logger + logger = logging.getLogger('parseStats') + + # Overall stats for the benchmark suite + stats = {} + + for bench in filePaths: + curStats = {} + curStats['average'] = -1.0 + curStats['total'] = 0.0 + curStats['numRegions'] = 0 + curStats['maxLength'] = -1 + curStats['numKernel'] = 0 + + # First check if log file exists. + if os.path.exists(filePaths[bench]): + # Open log file if it exists. + with open(filePaths[bench]) as file: + for line in file: + # Match the line that contain occupancy stats + getOccupancyStats = OPT_RE_OCCUPANCY.match(line) + if (getOccupancyStats): + # Get the kernel name + kernelName = getOccupancyStats.group(1) + + # Ignore these function + if (kernelName in OPT_IGNORE): + continue + + # Get occupancy + occupancy = int(getOccupancyStats.group(2)) + + # Used for averaging + curStats['total'] += occupancy + curStats['numKernel'] += 1 + if curStats['numKernel'] != 0: + curStats['average'] = curStats['total'] / \ + curStats['numKernel'] + else: + print('Cannot find log file {}.'.format(filePaths[bench])) + + # Save stats + stats[bench] = curStats + return stats + + +def printStats(stats): + total = 0.0 + kernel = 0 + for bench in stats: + print(' {} : {:.2f}'.format( + bench, stats[bench]['average'])) + total += stats[bench]['total'] + kernel += stats[bench]['numKernel'] + if kernel != 0: + print(' Average: {:.2f}'.format(total/kernel)) + + +def createSpreadsheets(stats, outputFile): + if 'xls' not in outputFile[-4:]: + outputFile += '.xlsx' # Create new excel worksheet wb = Workbook() @@ -173,66 +110,67 @@ def createSpreadsheets(output): ws['A1'] = 'Benchmarks' ws['A1'].font = Font(bold=True) - row = 3 - for bench in benchmarks: + # Stats entry + col = 'B' + row = 1 + ws[col+str(row)] = 'Occupancy' + row = 2 + + total = 0.0 + kernel = 0 + for bench in stats: ws['A' + str(row)] = bench + ws[col+str(row)] = stats[bench]['average'] + total += stats[bench]['total'] + kernel += stats[bench]['numKernel'] row += 1 ws['A' + str(row)] = 'Average' ws['A' + str(row)].font = Font(bold=True) + ws[col+str(row)] = total/kernel + + wb.save(outputFile) - # Stats entry - col = 'B' - for nameOfRun in benchStats: - row = 1 - ws[col + str(row)] = nameOfRun - row = 2 - ws[col+str(row)] = 'Occupancy' - row = 3 - - total = 0.0 - kernel = 0 - for bench in benchmarks: - ws[col+str(row)] = benchStats[nameOfRun][bench]['average'] - total += benchStats[nameOfRun][bench]['total'] - kernel += benchStats[nameOfRun][bench]['numKernel'] - row += 1 - ws[col+str(row)] = total/kernel - - # Convert column char to ASCII value - # then increment it and convert - # back into char. Used to go to next - # column for next test run. - col = chr(ord(col)+1) - - wb.save(output) def main(args): - # Parse folders to ignore into a list - ignoreFolders = args.ignoreFolders.split(',') + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + # Get filepaths for the selected benchmark suite + filePaths = get_bench_log_paths(args.inputFolder, args.benchmark) # Start stats collection - parseStats(args.inputFolder, ignoreFolders) + stats = parseStats(filePaths) + # Print stats if enabled if args.verbose: - printStats() + printStats(stats) + # Create spreadsheet if not args.disable: - createSpreadsheets(args.output) + filename = '' + if args.output is None: + filename = os.path.dirname('occupancy-' + args.inputFolder) + else: + filename = args.output + + createSpreadsheets(stats, filename) + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Script to extract occupancy data. \ - Requires patch to print occupancy.', \ - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + Requires patch to print occupancy.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument(dest='inputFolder', + help='The path to a benchmark directory') parser.add_argument('--verbose', '-v', action='store_true', default=False, dest='verbose', help='Print average occupancy to terminal') - parser.add_argument('--output', '-o', - default='occupancy', - dest='output', + parser.add_argument('--output', '-o', dest='output', help='Output spreadsheet filepath') parser.add_argument('--disable', '-d', @@ -240,16 +178,11 @@ def main(args): dest='disable', help='Disable spreadsheet output.') - parser.add_argument('--input', '-i', - default='.', - dest='inputFolder', - help='The path to scan for benchmark directories') - - parser.add_argument('--ignore', - type=str, - default='', - dest='ignoreFolders', - help='List of folders to ignore separated by semi-colon') + parser.add_argument('--benchmark', '-b', + default='plaid', + choices=['plaid', 'shoc'], + dest='benchmark', + help='Select the benchmarking suite to parse for.') args = parser.parse_args() diff --git a/util/plaidbench/get-optsched-stats.py b/util/plaidbench/get-optsched-stats.py index e2eeb868..881ec3ed 100644 --- a/util/plaidbench/get-optsched-stats.py +++ b/util/plaidbench/get-optsched-stats.py @@ -2,30 +2,28 @@ ''' ********************************************************************************** Description: This script is meant to be used with the OptSched scheduler and - the run-plaidbench.sh script. This script will extract stats - about how our OptSched scheduler is doing from the log files - generated from the run-plaidbench.sh script. -Author: Vang Thao -Last Update: December 30, 2019 + the run-plaidbench.sh script or with test results from shoc. + This script will extract stats about how our OptSched scheduler + is doing from the log files generated plaidml or shoc. +Author: Vang Thao +Last Update: September 2020 ********************************************************************************** OUTPUT: - This script takes in data from plaidbench runs and output a single spreadsheet. - Spreadsheet 1: optsched-stats.xlsx + This script takes in data from plaidbench or shoc runs and output a single + spreadsheet. + Spreadsheet 1: optsched-stats.csv Requirements: - python3 - - pip3 - - openpyxl (sreadsheet module, installed using pip3) HOW TO USE: 1.) Run a plaidbench benchmarks with run-plaidbench.sh to generate a directory containing the results for the run. - 2.) Pass the path to the directory as an input to this script with - the -i option. + 2.) Pass the path to the directory as an input to this script Example: - ./get-optsched-stats.py -i /home/tom/plaidbench-optsched-01/ + ./get-optsched-stats.py /home/tom/plaidbench-optsched-01/ where plaidbench-optsched-01/ contains densenet121 @@ -34,303 +32,294 @@ ... ''' -import os # Used for scanning directories, getting paths, and checking files. -import re -from openpyxl import Workbook -from openpyxl.styles import Font -from openpyxl.styles import Alignment import argparse +import csv +import logging +import os +import sys + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from readlogs import * -REGEX_DAG_INFO = re.compile(r'Processing DAG (.*) with (\d+) insts and max latency (\d+)') -REGEX_LIST_OPTIMAL = re.compile(r'list schedule (.?)* is optimal') -REGEX_COST_IMPROV = re.compile(r'cost imp=(\d+).') -REGEX_OPTIMAL = re.compile(r'The schedule is optimal') -REGEX_PASS_NUM = re.compile(r'End of (.*) pass through') - -# Contains all of the stats -benchStats = {} -passStats = {} -passes = ['first', 'second', 'third'] - -# List of benchmark names -benchmarks = [ - 'densenet121', - 'densenet169', - 'densenet201', - 'inception_resnet_v2', - 'inception_v3', - 'mobilenet', - 'nasnet_large', - 'nasnet_mobile', - 'resnet50', - 'vgg16', - 'vgg19', - 'xception', - 'imdb_lstm', -] # List of stats that can be initialized to 0 statsProcessed = [ 'TotalProcessed', + 'SchedRevert', 'EnumCnt', 'OptImpr', 'OptNotImpr', 'TimeoutImpr', 'TimeoutNotImpr', 'TimeoutCnt', - 'TotalInstr' + 'TotalInstr', + 'TimeoutInstrToEnum', ] -def initializePassStats(dictToInitialize): +def getNewStatsDict(): + ''' + Return a dict and initialize basic stats used by this script + ''' + stats = {} for stat in statsProcessed: - dictToInitialize[stat] = 0 - dictToInitialize['AverageSizeToEnum'] = -1.0 - dictToInitialize['LargestOptimalRegion'] = -1 - dictToInitialize['LargestImprovedRegion'] = -1 + stats[stat] = 0 + stats['AverageSizeToEnum'] = -1.0 + stats['LargestOptimalRegion'] = -1 + stats['LargestImprovedRegion'] = -1 + + return stats + + +def parseStats(filePaths): + # Get logger + logger = logging.getLogger('parseStats') -def parseStats(inputFolder): # Initialize pass stats collection variables - for x in passes: - passStats[x] = {} - initializePassStats(passStats[x]) + stats = {} # Begin stats collection for this run - for bench in benchmarks: - # Get the path to the log file - currentPath = os.path.join(inputFolder, bench) - currentLogFile = os.path.join(currentPath, bench + '.log') - - # First check if log file exists. - if os.path.exists(currentLogFile): - benchStats[bench] = {} + for bench in filePaths: + logger.debug('Verifying file {} exists'.format(filePaths[bench])) + # first check if log file exists. + if os.path.exists(filePaths[bench]): # Open log file if it exists. - with open(currentLogFile) as file: + with open(filePaths[bench]) as file: + logger.debug( + 'File found! Processing file {}'.format(filePaths[bench])) + # Contain the stats for this benchmark - stats = {} - for x in passes: - stats[x] = {} - initializePassStats(stats[x]) + curStats = {} log = file.read() - blocks = log.split('********** Opt Scheduling **********')[1:] + blocks = split_blocks(log) for block in blocks: + events = keep_only_first_event(parse_events(block)) # Get pass num, if none is found then # use third as default. - getPass = REGEX_PASS_NUM.search(block) - if getPass: - passNum = getPass.group(1) + if 'PassFinished' in events.keys(): + passNum = events['PassFinished']['num'] + if passNum not in curStats.keys(): + curStats[passNum] = getNewStatsDict() else: - passNum = "third" + passNum = 0 + if passNum not in curStats.keys(): + curStats[passNum] = getNewStatsDict() + + curStats[passNum]['TotalProcessed'] += 1 - stats[passNum]['TotalProcessed'] += 1 + if OPT_RE_REVERT_SCHED.search(block): + curStats[passNum]['SchedRevert'] += 1 + continue # If our enumerator was called then # record stats for it. - if 'Enumerating' in block: - stats[passNum]['EnumCnt'] += 1 - # Get cost - searchCost = REGEX_COST_IMPROV.search(block) - cost = int(searchCost.group(1)) - - # Get DAG stats - dagInfo = REGEX_DAG_INFO.search(block) - numOfInstr = int(dagInfo.group(2)) - stats[passNum]['TotalInstr'] += numOfInstr - - if REGEX_OPTIMAL.search(block): - # Optimal and improved - if cost > 0: - stats[passNum]['OptImpr'] += 1 - if (numOfInstr > stats[passNum]['LargestImprovedRegion']): - stats[passNum]['LargestImprovedRegion'] = numOfInstr - # Optimal but not improved - elif cost == 0: - stats[passNum]['OptNotImpr'] += 1 - if (numOfInstr > stats[passNum]['LargestOptimalRegion']): - stats[passNum]['LargestOptimalRegion'] = numOfInstr - elif 'timedout' in block: + if 'Enumerating' in events.keys(): + curStats[passNum]['EnumCnt'] += 1 + numOfInstr = events['ProcessDag']['num_instructions'] + curStats[passNum]['TotalInstr'] += numOfInstr + + if 'DagTimedOut' in events.keys(): + cost = events['DagTimedOut']['cost_improvement'] # Timeout and improved if cost > 0: - stats[passNum]['TimeoutImpr'] += 1 - if numOfInstr > stats[passNum]['LargestImprovedRegion']: - stats[passNum]['LargestImprovedRegion'] = numOfInstr + curStats[passNum]['TimeoutImpr'] += 1 + if numOfInstr > curStats[passNum]['LargestImprovedRegion']: + curStats[passNum]['LargestImprovedRegion'] = numOfInstr # Timeout but not improved elif cost == 0: - stats[passNum]['TimeoutNotImpr'] += 1 - stats[passNum]['TimeoutCnt'] += 1 - + curStats[passNum]['TimeoutNotImpr'] += 1 + # Negative Cost! Raise error + else: + raise AssertionError("Found negative cost for the block:\n" + block) + curStats[passNum]['TimeoutCnt'] += 1 + curStats[passNum]['TimeoutInstrToEnum'] += numOfInstr + elif 'DagSolvedOptimally' in events.keys(): + cost = events['DagSolvedOptimally']['cost_improvement'] + # Optimal and improved + if cost > 0: + curStats[passNum]['OptImpr'] += 1 + if numOfInstr > curStats[passNum]['LargestImprovedRegion']: + curStats[passNum]['LargestImprovedRegion'] = numOfInstr + # Optimal but not improved + elif cost == 0: + curStats[passNum]['OptNotImpr'] += 1 + # Negative Cost! Raise error + else: + raise AssertionError("Found negative cost for the block:\n" + block) + if numOfInstr > curStats[passNum]['LargestOptimalRegion']: + curStats[passNum]['LargestOptimalRegion'] = numOfInstr + else: + raise AssertionError("Couldn't find improvement cost for the block:\n" + block) + + for passNum in curStats: + if curStats[passNum]['EnumCnt'] != 0: + curStats[passNum]['AverageSizeToEnum'] = curStats[passNum]['TotalInstr'] / \ + curStats[passNum]['EnumCnt'] + + stats[bench] = curStats # If the file doesn't exist, output error log. else: print('Cannot find log file for benchmark {}.'.format(bench)) - for passNum in passes: - for stat in statsProcessed: - passStats[passNum][stat] += stats[passNum][stat] - if stats[passNum]['EnumCnt'] != 0: - stats[passNum]['AverageSizeToEnum'] = float(stats[passNum]['TotalInstr'])/stats[passNum]['EnumCnt'] - if passStats[passNum]['LargestOptimalRegion'] < stats[passNum]['LargestOptimalRegion']: - passStats[passNum]['LargestOptimalRegion'] = stats[passNum]['LargestOptimalRegion'] - if passStats[passNum]['LargestImprovedRegion'] < stats[passNum]['LargestImprovedRegion']: - passStats[passNum]['LargestImprovedRegion'] = stats[passNum]['LargestImprovedRegion'] - - benchStats[bench] = stats - - for passNum in passes: - if passStats[passNum]['EnumCnt'] != 0: - passStats[passNum]['AverageSizeToEnum'] = float(passStats[passNum]['TotalInstr'])/passStats[passNum]['EnumCnt'] - -def printStats(): - for passNum in passes: - if passStats[passNum]['TotalProcessed'] == 0: - continue - - # Third pass is just a default if the two-pass - # implementation wasn't used and not an actual - # 3 pass implementation. - if passNum == 'third': - print('first') - else: - print(passNum) - - for stat in passStats[passNum]: - print(' {} : {}'.format(stat, passStats[passNum][stat])) - -def writeBenchmarkNames(ws, row): - for bench in benchmarks: - ws['A' + str(row)] = bench - row += 1 - ws['A' + str(row)] = 'Overall' - ws['A' + str(row)].font = Font(bold=True) - -def createSpreadsheets(output): - if 'xls' not in output[-4:]: - output += '.xlsx' - - # Create new excel worksheet - wb = Workbook() - - # Grab the active worksheet - ws = wb.active - - # Insert column titles - ws['A1'] = 'Benchmarks' - ws['A1'].font = Font(bold=True) - col = 'B' - row = 1 - ws[col + str(row)] = 'Benchmark Stats' - row = 2 - ws[col+str(row)] = 'Regions processed' - ws[chr(ord(col)+1)+str(row)] = 'Passed to B&B' - ws[chr(ord(col)+2)+str(row)] = 'Optimal and improved' - ws[chr(ord(col)+3)+str(row)] = 'Optimal and not improved' - ws[chr(ord(col)+4)+str(row)] = 'Timed out and improved' - ws[chr(ord(col)+5)+str(row)] = 'Timed out and not improved' - ws[chr(ord(col)+6)+str(row)] = 'Avg. Region size passed to B&B' - ws[chr(ord(col)+7)+str(row)] = 'Largest optimal region' - ws[chr(ord(col)+8)+str(row)] = 'Largest improved region' - - # Stats entry - row = 3 - for passNum in passes: - # Skip pass if there is no data. - if passStats[passNum]['TotalProcessed'] == 0: - continue - - # Identify each pass if data set is from 2-pass - # scheduler. - if not passNum == 'third': - ws['A'+str(row-1)] = passNum.capitalize() + ' Pass' - ws['A'+str(row-1)].font = Font(bold=True) - - writeBenchmarkNames(ws, row) - - # Write individual benchmark stats - for bench in benchmarks: - ws[col+str(row)] = benchStats[bench][passNum]['TotalProcessed'] - ws[col+str(row)].alignment = Alignment(horizontal='right') - if benchStats[bench][passNum]['EnumCnt'] != 0: - enumCntPcnt = float(benchStats[bench][passNum]['EnumCnt']) / benchStats[bench][passNum]['TotalProcessed'] * 100.0 - ws[chr(ord(col)+1)+str(row)] = str(benchStats[bench][passNum]['EnumCnt']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+1)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(benchStats[bench][passNum]['OptImpr']) / benchStats[bench][passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+2)+str(row)] = str(benchStats[bench][passNum]['OptImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+2)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(benchStats[bench][passNum]['OptNotImpr']) / benchStats[bench][passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+3)+str(row)] = str(benchStats[bench][passNum]['OptNotImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+3)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(benchStats[bench][passNum]['TimeoutImpr']) / benchStats[bench][passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+4)+str(row)] = str(benchStats[bench][passNum]['TimeoutImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+4)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(benchStats[bench][passNum]['TimeoutNotImpr']) / benchStats[bench][passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+5)+str(row)] = str(benchStats[bench][passNum]['TimeoutNotImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+5)+str(row)].alignment = Alignment(horizontal='right') - - ws[chr(ord(col)+6)+str(row)] = benchStats[bench][passNum]['AverageSizeToEnum'] - ws[chr(ord(col)+6)+str(row)].alignment = Alignment(horizontal='right') - - ws[chr(ord(col)+7)+str(row)] = benchStats[bench][passNum]['LargestOptimalRegion'] - ws[chr(ord(col)+7)+str(row)].alignment = Alignment(horizontal='right') - - ws[chr(ord(col)+8)+str(row)] = benchStats[bench][passNum]['LargestImprovedRegion'] - ws[chr(ord(col)+8)+str(row)].alignment = Alignment(horizontal='right') - - row += 1 - - # Write overall stats - ws[col+str(row)] = passStats[passNum]['TotalProcessed'] - enumCntPcnt = float(passStats[passNum]['EnumCnt']) / passStats[passNum]['TotalProcessed'] * 100.0 - ws[chr(ord(col)+1)+str(row)] = str(passStats[passNum]['EnumCnt']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+1)+str(row)].alignment = Alignment(horizontal='right') - - if passStats[passNum]['EnumCnt'] != 0: - enumCntPcnt = float(passStats[passNum]['OptImpr']) / passStats[passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+2)+str(row)] = str(passStats[passNum]['OptImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+2)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(passStats[passNum]['OptNotImpr']) / passStats[passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+3)+str(row)] = str(passStats[passNum]['OptNotImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+3)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(passStats[passNum]['TimeoutImpr']) / passStats[passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+4)+str(row)] = str(passStats[passNum]['TimeoutImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+4)+str(row)].alignment = Alignment(horizontal='right') - - enumCntPcnt = float(passStats[passNum]['TimeoutNotImpr']) / passStats[passNum]['EnumCnt'] * 100.0 - ws[chr(ord(col)+5)+str(row)] = str(passStats[passNum]['TimeoutNotImpr']) + ' ({:.2f}%)'.format(enumCntPcnt) - ws[chr(ord(col)+5)+str(row)].alignment = Alignment(horizontal='right') - - ws[chr(ord(col)+6)+str(row)] = passStats[passNum]['AverageSizeToEnum'] - ws[chr(ord(col)+6)+str(row)].alignment = Alignment(horizontal='right') - - ws[chr(ord(col)+7)+str(row)] = passStats[passNum]['LargestOptimalRegion'] - ws[chr(ord(col)+7)+str(row)].alignment = Alignment(horizontal='right') - - ws[chr(ord(col)+8)+str(row)] = passStats[passNum]['LargestImprovedRegion'] - ws[chr(ord(col)+8)+str(row)].alignment = Alignment(horizontal='right') - - # Prepare to write for next pass if there is any. - row += 3 - - wb.save(output) + return stats + + +def printStats(stats): + for bench in stats: + print('Printing stats for {}'.format(bench)) + for passNum in stats[bench]: + print(' Pass No. {}'.format(passNum)) + for stat in stats[bench][passNum]: + print(' {}: {}'.format(stat, stats[bench][passNum][stat])) + + +def separatePasses(stats): + ''' + Change mapping of stats from 'dict[bench --> passNum]' to 'dict[passNum --> bench]' + ''' + passStats = {} + for bench in stats: + for passNum in stats[bench]: + if passNum not in passStats.keys(): + passStats[passNum] = {} + passStats[passNum][bench] = stats[bench][passNum] + + return passStats + +def createSpreadsheets(stats, output): + if 'csv' not in output[-3:]: + output += '.csv' + + with open(output, 'w', newline='') as file: + # Column header for csv file + fieldnames = [ + 'Benchmark', + 'Regions processed', + 'Sched. Reverted', + 'Passed to B&B', + 'Optimal and improved', + 'Optimal and not improved', + 'Timed out and improved', + 'Timed out and not improved', + 'Avg. Region size passed to B&B', + 'Largest optimal region', + 'Largest improved region' + ] + writer = csv.DictWriter(file, fieldnames=fieldnames) + + for passNum in stats: + totalProcessed = 0 + totalReverted = 0 + totalEnumCnt = 0 + totalOptImpr = 0 + totalOptNotImpr = 0 + totalTimedoutImpr = 0 + totalTimedOutNotImpr = 0 + totalLargestOptRegion = -1 + totalLargestImprRegion = -1 + totalInstrToEnum = 0 + + writer.writerow({'Benchmark': 'Pass No. {}'.format(passNum)}) + writer.writeheader() + for bench in stats[passNum]: + # Format output with format: # (#.##%) + passedToEnum = getPercentageString( + stats[passNum][bench]['EnumCnt'], stats[passNum][bench]['TotalProcessed']) + optImprv = getPercentageString( + stats[passNum][bench]['OptImpr'], stats[passNum][bench]['EnumCnt']) + optNotImprv = getPercentageString( + stats[passNum][bench]['OptNotImpr'], stats[passNum][bench]['EnumCnt']) + timedoutImpr = getPercentageString( + stats[passNum][bench]['TimeoutImpr'], stats[passNum][bench]['EnumCnt']) + timedoutNotImpr = getPercentageString( + stats[passNum][bench]['TimeoutNotImpr'], stats[passNum][bench]['EnumCnt']) + + # Write the current bench's final formatted string to csv file + writer.writerow({ + 'Benchmark': bench, + 'Regions processed': stats[passNum][bench]['TotalProcessed'], + 'Sched. Reverted': stats[passNum][bench]['SchedRevert'], + 'Passed to B&B': passedToEnum, + 'Optimal and improved': optImprv, + 'Optimal and not improved': optNotImprv, + 'Timed out and improved': timedoutImpr, + 'Timed out and not improved': timedoutNotImpr, + 'Avg. Region size passed to B&B': stats[passNum][bench]['AverageSizeToEnum'], + 'Largest optimal region': stats[passNum][bench]['LargestOptimalRegion'], + 'Largest improved region': stats[passNum][bench]['LargestImprovedRegion'] + }) + + # Get stats for the overall run + totalProcessed += stats[passNum][bench]['TotalProcessed'] + totalReverted += stats[passNum][bench]['SchedRevert'] + totalEnumCnt += stats[passNum][bench]['EnumCnt'] + totalOptImpr += stats[passNum][bench]['OptImpr'] + totalOptNotImpr += stats[passNum][bench]['OptNotImpr'] + totalTimedoutImpr += stats[passNum][bench]['TimeoutImpr'] + totalTimedOutNotImpr += stats[passNum][bench]['TimeoutNotImpr'] + if totalLargestOptRegion < stats[passNum][bench]['LargestOptimalRegion']: + totalLargestOptRegion = stats[passNum][bench]['LargestOptimalRegion'] + if totalLargestImprRegion < stats[passNum][bench]['LargestImprovedRegion']: + totalLargestImprRegion = stats[passNum][bench]['LargestImprovedRegion'] + totalInstrToEnum += stats[passNum][bench]['TotalInstr'] + + passedToEnum_ = getPercentageString(totalEnumCnt, totalProcessed) + optImprv_ = getPercentageString(totalOptImpr, totalEnumCnt) + optNotImprv_ = getPercentageString(totalOptNotImpr, totalEnumCnt) + timedoutImpr_ = getPercentageString(totalTimedoutImpr, totalEnumCnt) + timedoutNotImpr_ = getPercentageString( + totalTimedOutNotImpr, totalEnumCnt) + totalCurAvgToEnum = -1 + if totalEnumCnt != 0: + totalCurAvgToEnum = totalInstrToEnum / totalEnumCnt + writer.writerow({ + 'Benchmark': 'Overall', + 'Regions processed': totalProcessed, + 'Sched. Reverted': totalReverted, + 'Passed to B&B': passedToEnum_, + 'Optimal and improved': optImprv_, + 'Optimal and not improved': optNotImprv_, + 'Timed out and improved': timedoutImpr_, + 'Timed out and not improved': timedoutNotImpr_, + 'Avg. Region size passed to B&B': totalCurAvgToEnum, + 'Largest optimal region': totalLargestOptRegion, + 'Largest improved region': totalLargestImprRegion + }) + + writer.writerow({'Benchmark': ''}) + def main(args): + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + filePaths = get_bench_log_paths(args.inputFolder, args.benchmark) + # Start stats collection - parseStats(args.inputFolder) + stats = parseStats(filePaths) if args.verbose: - printStats() + printStats(stats) + + finalStats = separatePasses(stats) if not args.disable: - createSpreadsheets(args.output) + filename = '' + if args.output is None: + filename = os.path.dirname('optsched-stats-' + args.inputFolder) + else: + filename = args.output + + createSpreadsheets(finalStats, filename) + if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Script to extract OptSched stats', \ - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser(description='Script to extract OptSched stats', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument(dest='inputFolder', + help='The path to a benchmark directory') parser.add_argument('--verbose', '-v', action='store_true', default=False, @@ -338,7 +327,6 @@ def main(args): help='Print the stats to terminal') parser.add_argument('--output', '-o', - default='optsched-stats', dest='output', help='Output spreadsheet filepath') @@ -347,10 +335,11 @@ def main(args): dest='disable', help='Disable spreadsheet output.') - parser.add_argument('--input', '-i', - default='.', - dest='inputFolder', - help='The path to scan for benchmark directories') + parser.add_argument('--benchmark', '-b', + default='plaid', + choices=['plaid', 'shoc'], + dest='benchmark', + help='Select the benchmarking suite to parse for.') args = parser.parse_args() diff --git a/util/plaidbench/get-sched-length.py b/util/plaidbench/get-sched-length.py index 7b230318..6aa0ed5a 100644 --- a/util/plaidbench/get-sched-length.py +++ b/util/plaidbench/get-sched-length.py @@ -1,16 +1,15 @@ #!/usr/bin/python3 ''' ********************************************************************************** -Description: Extract schedule length stats from plaidbench runs. +Description: Extract schedule length stats from a plaidbench run. Author: Vang Thao Created: December 30, 2019 -Last Update: December 30, 2019 +Last Update: September 2020 ********************************************************************************** OUTPUT: - This script takes in data from plaidbench runs and output a spreadsheet - containing the average schedule length for each benchmark and the overall - average schedule length. + This script takes in data from a plaidbench run and output a spreadsheet + containing the average schedule length for each benchmark. Spreadsheet 1: schedule-length.xlsx Requirements: @@ -21,169 +20,112 @@ HOW TO USE: 1.) Run a plaidbench benchmarks with run-plaidbench.sh to generate a directory containing the results for the run. - 2.) Move the directory into a separate folder containing only the - directories generated by the script. - 3.) Pass the path to the folder as an input to this script with - the -i option. + 2.) Pass the path to the folder as an input to this script Example: - ./get-sched-length.py -i /home/tom/plaidbench-runs - - where plaidbench-runs/ contains - plaidbench-optsched-01/ - plaidbench-optsched-02/ - ... - plaidbench-amd-01/ - ... + ./get-sched-length.py /home/tom/plaidbench-run-01 ''' +import argparse +import logging import os import re -import argparse +import sys from openpyxl import Workbook from openpyxl.styles import Font -# For AMD -RE_DAG_NAME = re.compile(r'Processing DAG (.*) with') -RE_SCHED_LENGTH = re.compile(r'The list schedule is of length (\d+) and') - -# For OptSched -RE_PASS_NUM = re.compile(r'End of (.*) pass through') -RE_DAG_INFO = re.compile(r'INFO: Best schedule for DAG (.*) has cost (\d+) and length (\d+). The schedule is (.*) \(Time') - -# Contains all of the stats -benchStats = {} -# Contain cumulative stats for the run -cumulativeStats = {} - -# List of benchmark names -benchmarks = [ - 'densenet121', - 'densenet169', - 'densenet201', - 'inception_resnet_v2', - 'inception_v3', - 'mobilenet', - 'nasnet_large', - 'nasnet_mobile', - 'resnet50', - 'vgg16', - 'vgg19', - 'xception', - 'imdb_lstm', -] -def parseStats(inputFolder, ignoreFolders): - scanDirPath = os.path.abspath(inputFolder) - - # Get name of all directories in the specified folder - subfolders = [f.name for f in os.scandir(path=scanDirPath) if f.is_dir() ] - - # For each folder - for folderName in subfolders: - if folderName in ignoreFolders: - continue - name = folderName.split('-') - - # Get the run number from the end - # of the folder name - runNumber = name[-1] - - # Get the name of the run - # and exclude the run number - nameOfRun = '-'.join(name[:-1]) - - # Create an entry in the stats for the - # name of the run - if (nameOfRun not in benchStats): - benchStats[nameOfRun] = {} - cumulativeStats[nameOfRun] = {} - - cumulativeStats[nameOfRun]['average'] = 0.0 - cumulativeStats[nameOfRun]['total'] = 0.0 - cumulativeStats[nameOfRun]['numRegions'] = 0 - cumulativeStats[nameOfRun]['maxLength'] = 0 - - for bench in benchmarks: - # Get the path to the log file - currentPath = os.path.join(inputFolder, folderName) - currentPath = os.path.join(currentPath, bench) - currentLogFile = os.path.join(currentPath, bench + '.log') - - stats = {} - stats['average'] = 0.0 - stats['total'] = 0.0 - stats['numRegions'] = 0 - stats['maxLength'] = 0 - - # First check if log file exists. - if os.path.exists(currentLogFile): - # Open log file if it exists. - with open(currentLogFile) as file: - # Read the whole log file - # and split the scheduling - # regions into a list - log = file.read() - blocks = log.split('********** Opt Scheduling **********')[1:] - - # Iterate over each scheduling region - for block in blocks: - dagName = '' - schedLength = 0 - - # Skip first pass because it isn't the - # final schedule - getPassNum = RE_PASS_NUM.search(block) - if getPassNum: - passNum = getPassNum.group(1) - if passNum == 'first': - continue - - # First check if B&B is enabled because - # with B&B enabled, the final output will - # be different. - # If B&B is not enabled, check for - # schedule from heuristic. - DAGInfo = RE_DAG_INFO.search(block) - if (DAGInfo): - dagName = DAGInfo.group(1) - schedLength = int(DAGInfo.group(3)) - else: - getSchedLength = RE_SCHED_LENGTH.search(block) - schedLength = int(getSchedLength.group(1)) - - stats['total'] += schedLength - stats['numRegions'] += 1 - - if stats['maxLength'] < schedLength: - stats['maxLength'] = schedLength - - if stats['numRegions'] != 0: - stats['average'] = stats['total']/stats['numRegions'] - - benchStats[nameOfRun][bench] = stats - - cumulativeStats[nameOfRun]['total'] += stats['total'] - cumulativeStats[nameOfRun]['numRegions'] += stats['numRegions'] - if cumulativeStats[nameOfRun]['maxLength'] < stats['maxLength']: - cumulativeStats[nameOfRun]['maxLength'] = stats['maxLength'] - - if cumulativeStats[nameOfRun]['numRegions'] != 0: - cumulativeStats[nameOfRun]['average'] = float(cumulativeStats[nameOfRun]['total']) \ - / cumulativeStats[nameOfRun]['numRegions'] - -def printStats(): - for nameOfRun in benchStats: - print('{}'.format(nameOfRun)) - for bench in benchmarks: - print(' {} : Average: {:0.2f} Max : {}'.format(bench, - benchStats[nameOfRun][bench]['average'], - benchStats[nameOfRun][bench]['maxLength'])) - print(' Overall Average : {:0.2f} Overall Max : {}'.format(cumulativeStats[nameOfRun]['average'], - cumulativeStats[nameOfRun]['maxLength'])) - -def createSpreadsheets(output): - if 'xls' not in output[-4:]: - output += '.xlsx' +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from readlogs import * + +def parseStats(filePaths): + # Get logger + logger = logging.getLogger('parseStats') + + # Overall stats for the benchmark suite + stats = {} + + for bench in filePaths: + curStats = {} + curStats['average'] = -1.0 + curStats['total'] = 0.0 + curStats['numRegions'] = 0 + curStats['maxLength'] = -1 + + # First check if log file exists. + if os.path.exists(filePaths[bench]): + # Open log file if it exists. + with open(filePaths[bench]) as file: + # Read the whole log file + # and split the scheduling + # regions into a list + log = file.read() + blocks = split_blocks(log) + + # Iterate over each scheduling region + for block in blocks: + events = keep_only_first_event(parse_events(block)) + + # Skip first pass because it isn't the + # final schedule + if ('PassFinished' in events.keys() and events['PassFinished']['num'] == 1): + continue + + # First check if B&B is enabled because + # with B&B enabled, the final output will + # be different. + # If B&B is not enabled, check for + # schedule from heuristic. + schedLength = -1 + if ('Enumerating' in events.keys()): + schedLength = events['BestResult']['length'] + else: + schedLength = events['HeuristicResult']['length'] + + curStats['total'] += schedLength + curStats['numRegions'] += 1 + + if curStats['maxLength'] < schedLength: + curStats['maxLength'] = schedLength + + if curStats['numRegions'] != 0: + curStats['average'] = curStats['total']/curStats['numRegions'] + + stats[bench] = curStats + + return stats + + +def printStats(stats): + overallRegions = 0 + overallTotal = 0 + overallAverage = -1 + overallMaxLength = -1 + + for bench in stats: + print('{} : Average: {:0.2f}, Max : {}, Regions: {}'.format( + bench, + stats[bench]['average'], + stats[bench]['maxLength'], + stats[bench]['numRegions'])) + + overallRegions += stats[bench]['numRegions'] + overallTotal += stats[bench]['total'] + + if overallMaxLength < stats[bench]['maxLength']: + overallMaxLength = stats[bench]['maxLength'] + + if overallRegions != 0: + overallAverage = overallTotal / overallRegions + + print('Overall : Average: {:0.2f} Max : {}, Regions: {}'.format( + overallAverage, + overallMaxLength, + overallRegions)) + + +def createSpreadsheets(stats, outputFile): + if 'xls' not in outputFile[-4:]: + outputFile += '.xlsx' # Create new excel worksheet wb = Workbook() @@ -191,59 +133,78 @@ def createSpreadsheets(output): # Grab the active worksheet ws = wb.active - # Insert title and benchmark names + # Insert title ws['A1'] = 'Benchmarks' ws['A1'].font = Font(bold=True) - row = 3 - for bench in benchmarks: - ws['A' + str(row)] = bench + # Stats entry + col = 'B' + row = 1 + ws[col+str(row)] = 'Average Sched. Length' + ws[chr(ord(col)+1)+str(row)] = 'Max Sched. Length' + ws[chr(ord(col)+2)+str(row)] = 'Regions' + + overallRegions = 0 + overallTotal = 0 + overallAverage = -1 + overallMaxLength = -1 + row = 2 + for bench in stats: + ws['A'+str(row)] = bench + ws[col+str(row)] = stats[bench]['average'] + overallTotal += stats[bench]['total'] + + ws[chr(ord(col)+1)+str(row)] = stats[bench]['maxLength'] + if overallMaxLength < stats[bench]['maxLength']: + overallMaxLength = stats[bench]['maxLength'] + + ws[chr(ord(col)+2)+str(row)] = stats[bench]['numRegions'] + overallRegions += stats[bench]['numRegions'] row += 1 - ws['A' + str(row)] = 'Overall' - ws['A' + str(row)].font = Font(bold=True) + if overallRegions != 0: + overallAverage = overallTotal / overallRegions + + ws['A'+str(row)] = 'Overall' + ws['A'+str(row)].font = Font(bold=True) + ws[col+str(row)] = overallAverage + ws[chr(ord(col)+1)+str(row)] = overallMaxLength + ws[chr(ord(col)+2)+str(row)] = overallRegions + + wb.save(outputFile) - # Stats entry - col = 'B' - for nameOfRun in benchStats: - row = 1 - ws[col + str(row)] = nameOfRun - row = 2 - ws[col+str(row)] = 'Average Sched. Length' - ws[chr(ord(col)+1)+str(row)] = 'Max Sched. Length' - - row = 3 - for bench in benchmarks: - ws[col+str(row)] = benchStats[nameOfRun][bench]['average'] - ws[chr(ord(col)+1)+str(row)] = benchStats[nameOfRun][bench]['maxLength'] - row += 1 - ws[col+str(row)] = cumulativeStats[nameOfRun]['average'] - ws[chr(ord(col)+1)+str(row)] = cumulativeStats[nameOfRun]['maxLength'] - - # Convert column char to ASCII value - # then increment it and convert - # back into char. Used to go to next - # column for next test run. - col = chr(ord(col)+2) - - wb.save(output) def main(args): - # Parse folders to ignore into a list - ignoreFolders = args.ignoreFolders.split(',') + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + # Get filepaths for the selected benchmark suite + filePaths = get_bench_log_paths(args.inputFolder, args.benchmark) # Start stats collection - parseStats(args.inputFolder, ignoreFolders) + stats = parseStats(filePaths) + # Print stats if enabled if args.verbose: - printStats() + printStats(stats) + # Create spreadsheet if not args.disable: - createSpreadsheets(args.output) + filename = '' + if args.output is None: + filename = os.path.dirname('schedule-length-' + args.inputFolder) + else: + filename = args.output + + createSpreadsheets(stats, filename) + if __name__ == '__main__': parser = argparse.ArgumentParser(description='Script to extract average schedule length.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument(dest='inputFolder', + help='The path to a benchmark directory') parser.add_argument('--verbose', '-v', action='store_true', default=False, @@ -251,25 +212,19 @@ def main(args): help='Print average schedule lengths to terminal') parser.add_argument('--output', '-o', - default='schedule-length', dest='output', help='Output spreadsheet filepath') parser.add_argument('--disable', '-d', - action='store_true', default=False, - dest='disable', + action='store_true', + default=False, dest='disable', help='Disable spreadsheet output.') - parser.add_argument('--input', '-i', - default='.', - dest='inputFolder', - help='The path to scan for benchmark directories') - - parser.add_argument('--ignore', - type=str, - default='', - dest='ignoreFolders', - help='List of folders to ignore separated by semi-colon') + parser.add_argument('--benchmark', '-b', + default='plaid', + choices=['plaid', 'shoc'], + dest='benchmark', + help='Select the benchmarking suite to parse for.') args = parser.parse_args() diff --git a/util/plaidbench/plaidbench-validation-test.py b/util/plaidbench/plaidbench-validation-test.py index ba825fe5..2d87f02e 100644 --- a/util/plaidbench/plaidbench-validation-test.py +++ b/util/plaidbench/plaidbench-validation-test.py @@ -1,4 +1,4 @@ -#/usr/bin/python3 +# /usr/bin/python3 ''' ********************************************************************************** Description: Validation script for OptSched with the plaidbench benchmarking @@ -25,42 +25,22 @@ 2.) Enter in the path to those directories as arguments to this script ''' -import sys -import re -import os import argparse +import logging +import os +import sys -# List of benchmark names -benchmarks = [ - 'densenet121', - 'densenet169', - 'densenet201', - 'inception_resnet_v2', - 'inception_v3', - 'mobilenet', - 'nasnet_large', - 'nasnet_mobile', - 'resnet50', - 'vgg16', - 'vgg19', - 'xception', - 'imdb_lstm', -] - -# Parse for DAG stats -RE_DAG_COST_LOWER_BOUND = re.compile(r'Lower bound of cost before scheduling: (\d+)') -RE_DAG_COST = re.compile(r'INFO: Best schedule for DAG (.*) has cost (\d+) and length (\d+). The schedule is (.*) \(Time') -# Parse for passthrough number -RE_PASS_NUM = re.compile(r'End of (.*) pass through') +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from readlogs import * # Store DAGs stats for each benchmark and passes dags = [] # Data structure # Run number - # Benchmark Name - # Passthrough Number - # DAG name - # DAG stats +# Benchmark Name +# Passthrough Number +# DAG name +# DAG stats # dags[Run number][Benchmark name][Passthrough number][Dag name][DAG stats] # Store number of DAGs @@ -68,9 +48,107 @@ # Comparison function, takes in a pass number # Valid arguments: 'first' or 'second' -def compareDags(displayMismatches, displayNumLargest, displayNumSmallest, passNum): - if numDags[0][passNum] != numDags[1][passNum]: - print('Error: Different number of dags in each log file for pass {}.'.format(passNum)) + + +def parseStats(filePaths): + # Get logger + logger = logging.getLogger('parseStats') + + # Temp variable to hold the benchmark's stats + stats = {} + + # Collect DAG info from the two run results + for i in range(0, 2): + stats[i] = {} + for bench in filePaths[i]: + logger.debug('Verifying file {} exists'.format( + filePaths[i][bench])) + if os.path.exists(filePaths[i][bench]): + logger.debug( + 'File found! Processing file {}'.format(filePaths[i][bench])) + # Open log file + with open(filePaths[i][bench]) as file: + # Declare containers for this benchmark + curStats = {} + # Use a list instead of a dict to store results for each + # scheduling region to account for duplicate kernels. The + # order of the scheduling regions should not change between + # runs. + curStats['first'] = [] + curStats['second'] = [] + # Read and split scheduling regions + log = file.read() + blocks = split_blocks(log) + for block in blocks: + events = keep_only_first_event(parse_events(block)) + # Get pass num + passNum = 'first' + if 'PassFinished' in events.keys(): + passNumInt = events['PassFinished']['num'] + if passNumInt == 1: + passNum = 'first' + elif passNumInt == 2: + passNum = 'second' + + # Get DAG stats + dagName = events['BestResult']['name'] + dagCost = events['BestResult']['cost'] + \ + events['CostLowerBound']['cost'] + dagLength = events['BestResult']['length'] + dagIsOptimal = events['BestResult']['optimal'] + dagInstrCount = events['ProcessDag']['num_instructions'] + + if events['BestResult']['cost'] < 0: + raise AssertionError("Cost is negative in the following block:\n" + block) + + # Add this DAG's stats to temp stats container + curStats[passNum].append([ + dagName, + dagCost, + dagLength, + dagIsOptimal, + dagInstrCount + ]) + + stats[i][bench] = curStats + else: + logger.warning( + 'Cannot find log file for benchmark {}.'.format(bench)) + + # printStats(stats) + return stats + + +def printStats(stats): + for index in stats: + print('Index: {}'.format(index)) + for bench in stats[index]: + print(' Bench: {}'.format(bench)) + for passNum in stats[index][bench]: + print(' {} pass'.format(passNum.capitalize())) + for region in stats[index][bench][passNum]: + print(' {}: dag cost: {}, length: {}, optimal: {}'. + format(region[0], region[1], region[2], region[3])) + + +def compareDags(displayMismatches, displayNumLargest, displayNumSmallest, stats, passNum): + # Get logger + logger = logging.getLogger('compareDags') + + totalDagCount1 = 0 + totalDagCount2 = 0 + + # Compare the number of scheduling regions in each test. We require the + # number of scheduling regions to be the same. + for bench in stats[0]: + totalDagCount1 += len(stats[0][bench][passNum]) + + for bench in stats[1]: + totalDagCount2 += len(stats[1][bench][passNum]) + + if totalDagCount1 != totalDagCount2: + logger.critical('Different number of dags in each log file.') + sys.exit() # The number of blocks that are optimal in both logs. optimalInBoth = 0 @@ -91,47 +169,66 @@ def compareDags(displayMismatches, displayNumLargest, displayNumSmallest, passNu # Dictionary with the sizes of the mismatches for each mismatched block and the size of the block. mismatches = {} - for bench in benchmarks: - for dagName in dags[0][bench][passNum]: - if dagName not in dags[1][bench][passNum]: - print('Error: Could not find DAG {} in benchmark {} in the second log file.'.format(\ - dagName, bench)) - continue - dag1 = dags[0][bench][passNum][dagName] - dag2 = dags[1][bench][passNum][dagName] - if dag1['isOptimal'] and dag2['isOptimal']: - optimalInBoth+=1 - if dag1['cost'] != dag2['cost']: + for bench in stats[0]: + for i in range(len(stats[0][bench][passNum])): + dag1 = stats[0][bench][passNum][i] + dag2 = stats[1][bench][passNum][i] + + dagName1 = dag1[0] + dagName2 = dag2[0] + dagCost1 = dag1[1] + dagCost2 = dag2[1] + dagLength1 = dag1[2] + dagLength2 = dag2[2] + dagIsOptimal1 = dag1[3] + dagIsOptimal2 = dag2[3] + dagInstrCount1 = dag1[4] + dagInstrCount2 = dag2[4] + + if dagName1 != dagName2 or dagInstrCount1 != dagInstrCount2: + logger.critical( + 'Processing {} in file 1 but could not find a match at the same location in file 2.'.format(dagName1)) + print('Expected {} with instr {} but found {} with instr {} instead'.format( + dagName1, dagInstrCount1, dagName2, dagInstrCount2)) + sys.exit() + + if dagIsOptimal1 and dagIsOptimal2: + optimalInBoth += 1 + if dagCost1 != dagCost2: # There was a mismatch where blocks are optimal in both logs but have different costs misNonEqual += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = abs(dag1['cost'] - dag2['cost']) - #print('Mismatch for dag ' + dagName + ' (Both optimal with non-equal cost)') - - elif dag1['isOptimal']: - optimalLog1+=1 - if dag1['cost'] > dag2['cost']: + mismatches[dagName1] = {} + mismatches[dagName1]['length'] = dagLength1 + mismatches[dagName1]['misSize'] = abs( + dagCost1 - dagCost2) + # print('Mismatch for dag ' + dagName + ' (Both optimal with non-equal cost)') + + elif dagIsOptimal1: + optimalLog1 += 1 + if dagCost1 > dagCost2: # There was a mismatch where block is optimal in log 1 but it has a higher cost than the non-optimal block in log 2 misBlk1Opt += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = dag1['cost'] - dag2['cost'] - #print('Mismatch for dag ' + dagName + ' (Only optimal in log 1 but has higher cost than the non-optimal block in log 2)') - - elif dag2['isOptimal']: - optimalLog2+=1 - if dag2['cost'] > dag1['cost']: + mismatches[dagName1] = {} + mismatches[dagName1]['length'] = dagLength1 + mismatches[dagName1]['misSize'] = dagCost1 - \ + dagCost2 + # print('Mismatch for dag ' + dagName + ' (Only optimal in log 1 but has higher cost than the non-optimal block in log 2)') + + elif dagIsOptimal2: + optimalLog2 += 1 + if dagCost2 > dagCost1: # There was a mismatch where block is optimal in log 2 but it has a higher cost than the non-optimal block in log 1 misBlk2Opt += 1 - mismatches[dagName] = {} - mismatches[dagName]['length'] = dag1['length'] - mismatches[dagName]['misSize'] = dag2['cost'] - dag1['cost'] - #print('Mismatch for dag ' + dagName + ' (Only optimal in log 2 but has higher cost than the non-optimal block in log 1)') + mismatches[dagName1] = {} + mismatches[dagName1]['length'] = dagLength1 + mismatches[dagName1]['misSize'] = dagCost2 - \ + dagCost1 + # print('Mismatch for dag ' + dagName + ' (Only optimal in log 2 but has higher cost than the non-optimal block in log 1)') + print('Optimal Block Stats for {} pass'.format(passNum)) print('-----------------------------------------------------------') - print('Blocks in log file 1: {}'.format(numDags[0][passNum])) - print('Blocks in log file 2: {}'.format(numDags[1][passNum])) + print('Blocks in log file 1: {}'.format(totalDagCount1)) + print('Blocks in log file 2: {}'.format(totalDagCount2)) print('Blocks that are optimal in both files: ' + str(optimalInBoth)) print('Blocks that are optimal in log 1 but not in log 2: ' + str(optimalLog1)) print('Blocks that are optimal in log 2 but not in log 1: ' + str(optimalLog2)) @@ -147,93 +244,68 @@ def compareDags(displayMismatches, displayNumLargest, displayNumSmallest, passNu if displayMismatches: if numLarMisPrt != 0: - print('The ' + str(numLarMisPrt) + ' mismatched blocks with the largest difference in cost') + print('The ' + str(numLarMisPrt) + + ' mismatched blocks with the largest difference in cost') print('-----------------------------------------------------------') - sortedMaxMis = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['misSize'], i[0]), reverse=True) + sortedMaxMis = sorted(mismatches.items(), key=lambda i: ( + mismatches[i[0]]['misSize'], i[0]), reverse=True) i = 1 for block in sortedMaxMis[:numLarMisPrt]: print(str(i) + ':') - print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) + print('Block Name: ' + block[0] + '\nLength: ' + str( + block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) i += 1 print('-----------------------------------------------------------\n') if numSmlBlkPrt != 0: print('The smallest ' + str(numSmlBlkPrt) + ' mismatched blocks') print('-----------------------------------------------------------') - sortedMisSize = sorted(mismatches.items(), key=lambda i: (mismatches[i[0]]['length'], i[0])) + sortedMisSize = sorted(mismatches.items(), key=lambda i: ( + mismatches[i[0]]['length'], i[0])) i = 1 for block in sortedMisSize[:numSmlBlkPrt]: print(str(i) + ':') - print('Block Name: ' + block[0] + '\nLength: ' + str(block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) + print('Block Name: ' + block[0] + '\nLength: ' + str( + block[1]['length']) + '\nDifference in cost: ' + str(block[1]['misSize'])) i += 1 print('-----------------------------------------------------------') - def main(args): - directories = [args.directory1, args.directory2] - # Collect DAG info from the two run results - for i in range(0,2): - # Temp variable to hold the number of DAGs - tempNumDags = {} - tempNumDags['first'] = 0 - tempNumDags['second'] = 0 - - # Temp variable to hold the benchmark's stats - benchStats = {} - for bench in benchmarks: - # Declare containers for this benchmark - benchStats[bench] = {} - benchStats[bench]['first'] = {} - benchStats[bench]['second'] = {} - - # Open log file - currentPath = os.path.join(directories[i], bench) - currentLogFile = os.path.join(currentPath, bench + '.log') - with open(currentLogFile) as logfile: - # Read and split scheduling regions - log = logfile.read() - blocks = log.split('********** Opt Scheduling **********')[1:] - for block in blocks: - # Get pass num - getPass = RE_PASS_NUM.search(block) - passNum = getPass.group(1) - - # Get DAG stats - dagLwrBound = RE_DAG_COST_LOWER_BOUND.search(block) - dagStats = RE_DAG_COST.search(block) - dag = {} - dagName = dagStats.group(1) - dag['dagName'] = dagName - dag['cost'] = int(dagStats.group(2)) + int(dagLwrBound.group(1)) - dag['length'] = dagStats.group(3) - dag['isOptimal'] = (dagStats.group(4) == 'optimal') - - # Add this DAG's stats to temp stats container - benchStats[bench][passNum][dagName] = dag - # Record number of DAGs - tempNumDags[passNum] += 1 - - # Move temp stats to global vars - dags.append(benchStats) - numDags.append(tempNumDags) + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + filePaths1 = get_bench_log_paths(args.directory1, args.benchmark) + filePaths2 = get_bench_log_paths(args.directory2, args.benchmark) + + filePaths = [filePaths1, filePaths2] + + stats = parseStats(filePaths) # Compare DAGs from first passthrough - compareDags(args.displayMismatches, args.displayNumLargest, args.displayNumSmallest, 'first') + compareDags(args.displayMismatches, args.displayNumLargest, + args.displayNumSmallest, stats, 'first') # Compare DAGs from second passthrough - compareDags(args.displayMismatches, args.displayNumLargest, args.displayNumSmallest, 'second') + compareDags(args.displayMismatches, args.displayNumLargest, + args.displayNumSmallest, stats, 'second') + if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Validation script for OptSched on plaidbench', \ - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser(description='Validation script for OptSched', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('--verbose', '-v', + action='store_true', default=False, + dest='verbose', + help='Print the stats to terminal') # Run 1 directory for plaidbench parser.add_argument('directory1', - help='Directory containing a plaidbench run') + help='Directory containing a benchmark run') # Run 2 directory for plaidbench parser.add_argument('directory2', - help='Directory containing a plaidbench run') + help='Directory containing a benchmark run') # Option to display mismatches, defaults to off parser.add_argument('--mismatches', '-m', action='store_true', @@ -251,6 +323,12 @@ def main(args): dest='displayNumSmallest', help='Print out x number of mismatches with smallest number of instructions. Requires display mismatches.') + parser.add_argument('--benchmark', '-b', + default='plaid', + choices=['plaid', 'shoc'], + dest='benchmark', + help='Select the benchmarking suite to parse for.') + args = parser.parse_args() main(args) diff --git a/util/plaidbench/run-plaidbench.py b/util/plaidbench/run-plaidbench.py index 2b112d7a..e2fd3827 100755 --- a/util/plaidbench/run-plaidbench.py +++ b/util/plaidbench/run-plaidbench.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -import subprocess import argparse import os +import subprocess #************************************************************************************** #Description: Run all plaidbench benchmarks and redirect output to a directory diff --git a/util/readlogs/__init__.py b/util/readlogs/__init__.py index 68317430..519dce8c 100644 --- a/util/readlogs/__init__.py +++ b/util/readlogs/__init__.py @@ -1,4 +1,95 @@ import json +import os +import re + +# Ignore these functions on the AMDGPU +# They are outputted before scheduling +OPT_IGNORE = [ + 'copyBufferRect', + 'copyBufferRectAligned', + 'copyBuffer', + 'copyBufferAligned', + 'fillBuffer', + 'copyBufferToImage', + 'copyImageToBuffer', + 'copyImage', + 'copyImage1DA', + 'fillImage', + 'scheduler' +] + +OPT_RE_OCCUPANCY = re.compile('Final occupancy for function (.*):(\d+)') +OPT_RE_REVERT_SCHED = re.compile( + r'Reverting Scheduling because of a decrease in occupancy from') + +def get_bench_log_paths(inputFolder, benchmark): + ''' + Returns a `dict[benchmark --> path/to/benchmark.log` mapping a benchmark to its corresponding file. + + Parameters: + inputFolder -- A string containing the name of a directory with plaidbench or SHOC results. + benchmark - Commandline argument indicating the benchmark (shoc, plaid). + ''' + + OPT_SHOC_BENCHMARKS = [ + # 'BusSpeedDownload', + # 'BusSpeedReadback', + # 'MaxFlops', + # 'DeviceMemory', + # 'KernelCompile', + # 'QueueDelay', + # 'BFS', + 'FFT', + 'GEMM', + 'MD', + # 'MD5Hash', + # 'Reduction', + # 'Scan', + 'Sort', + 'Spmv', + 'Stencil2D', + # 'Triad', + # 'S3D' + ] + + OPT_PLAID_BENCHMARKS = [ + 'densenet121', + 'densenet169', + 'densenet201', + 'inception_resnet_v2', + 'inception_v3', + 'mobilenet', + 'nasnet_large', + 'nasnet_mobile', + 'resnet50', + 'vgg16', + 'vgg19', + 'xception', + 'imdb_lstm', + ] + + filepaths = {} + + # Do a lowercase string comparison to determine the benchmark set + bench = benchmark.lower() + + # Paths for shoc benchmarks + if bench == 'shoc': + logDirectory = os.path.join(inputFolder, 'Logs') + for bench in OPT_SHOC_BENCHMARKS: + filename = 'dev0_{}.err'.format(bench) + filepath = os.path.join(logDirectory, filename) + filepaths[bench] = filepath + + # Paths for PlaidML benchmarks + elif bench == 'plaid': + for bench in OPT_PLAID_BENCHMARKS: + benchmarkDirectory = os.path.join(inputFolder, bench) + filename = '{}.log'.format(bench) + filepath = os.path.join(benchmarkDirectory, filename) + filepaths[bench] = filepath + + return filepaths def split_blocks(log): ''' @@ -60,3 +151,14 @@ def parse_as_singular_events(logs): for k, v in logs.items(): if len(v) != 1: raise AssertionError('Duplicate log events for event ' + k) return {k: v[0] for k, v in logs.items()} + + +def getPercentageString(num, dem): + ''' + Return string with percentage + ''' + if dem == 0: + return '0 (0.00%)' + + formattedPcnt = num / dem * 100 + return '{} ({:.2f}%)'.format(num, formattedPcnt)