Skip to content

Commit

Permalink
Fix import utils
Browse files Browse the repository at this point in the history
  • Loading branch information
Quincunx271 committed Aug 4, 2021
1 parent d9bbcdd commit 5363c47
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 68 deletions.
5 changes: 2 additions & 3 deletions util/analyze/imports/import_cpu2006.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3

import os
import re

from . import import_utils

Expand All @@ -13,8 +12,8 @@ def parse(file):
with open(file, 'r') as f:
return import_utils.parse_multi_bench_file(
f.read(),
benchstart=re.compile(r'Building (?P<name>\S*)'),
filename=re.compile(r'/[fc]lang\b.*\s(\S+\.\S+)\n'))
benchstart=r'Building (?P<name>\S*)',
filename=r'/[fc]lang\b.*\s(\S+\.\S+)\n')


if __name__ == '__main__':
Expand Down
145 changes: 80 additions & 65 deletions util/analyze/imports/import_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import pickle
import json
import itertools
import json
import pickle
import re
import sys
from collections import namedtuple
from dataclasses import dataclass
from typing import List, Match, Optional, Pattern, Union

from .._types import Logs, Benchmark, Block
from .._types import Benchmark, Block, Logs

_RE_REGION_INFO = re.compile(r'EVENT:.*ProcessDag.*"name": "(?P<name>[^"]*)"')
_REGION_DELIMITER = 'INFO: ********** Opt Scheduling **********'
_RE_REGION_DELIMITER = re.compile(re.escape(_REGION_DELIMITER))


def import_main(parsefn, *, description):
Expand All @@ -24,18 +26,39 @@ def import_main(parsefn, *, description):
pickle.dump(result, f)


def parse_multi_bench_file(logtext, *, benchstart, filename=None):
def parse_multi_bench_file(logtext: str, *, benchstart: Union[Pattern, str], filename: Optional[Union[Pattern, str]] = None):
if filename is not None:
filename = re.compile(filename)
benchstart = re.compile(benchstart)

def parse_bench(benchm: Match, nextm: Union[Match, _DummyEnd], is_first: bool = False):
# The RE can specify any extra properties.
info = benchm.groupdict()
# If this is the first benchmark in the file, we want to start from the
# start of the file so that we don't lose any information.
start = 0 if is_first else benchm.start()
end = nextm.start()
return _parse_benchmark(info, logtext,
start, end,
filenamere=filename)

bench_matches = list(benchstart.finditer(logtext))
benchmarks = []
for benchm, nextm in _splititer(benchstart, logtext):
bench = _parse_benchmark(benchm.groupdict(), logtext,
benchm.end(), nextm.start(),
filenamere=filename)
benchmarks.append(bench)

is_first: bool = True
for benchm, nextm in zip(
bench_matches,
[*bench_matches[1:], _DummyEnd(len(logtext))]
):
benchmarks.append(parse_bench(benchm, nextm, is_first))
is_first = False

return Logs(benchmarks)


def parse_single_bench_file(logtext, *, benchname, filename=None):
def parse_single_bench_file(logtext, *, benchname, filename: Optional[Union[Pattern, str]] = None):
if filename is not None:
filename = re.compile(filename)
return Logs([
_parse_benchmark(
{'name': benchname},
Expand All @@ -45,21 +68,10 @@ def parse_single_bench_file(logtext, *, benchname, filename=None):
])


_FileInfo = namedtuple('_FileInfo', ('filename', 'from_pos'))


def _each_cons(iterable, n):
'''
Iterates over each consecutive n items of the iterable.
_each_cons((1, 2, 3, 4), 2) # (1, 2), (2, 3), (3, 4)
'''
iters = [None] * n
iters[0] = iter(iterable)
for i in range(1, n):
iters[i - 1], iters[i] = itertools.tee(iters[i - 1])
next(iters[i], None)
return zip(*iters)
@dataclass
class _FileInfo:
filename: Optional[str]
from_pos: int


class _DummyEnd:
Expand All @@ -73,65 +85,68 @@ def end(self):
return self._end


def _splititer(regex, text, pos=0, endpos=None):
'''
'Splits' the string by the regular expression, using an iterable.
Returns both where the regex matches and where it matched next (or the end).
'''
if endpos is None:
endpos = len(text) - 1
def _filename_info(filenamere: Optional[Pattern], logtext: str, start: int, end: int) -> List[_FileInfo]:
if filenamere is None:
filenamere = re.compile(r'.^') # RE that doesn't match anything
files = []

return _each_cons(
itertools.chain(regex.finditer(text, pos, endpos),
(_DummyEnd(endpos + 1),)),
2
)
for filem in filenamere.finditer(logtext, start, end):
filename = filem.group(1)
filestart = filem.end()
files.append(_FileInfo(filename=filename, from_pos=filestart))

return files

def _parse_benchmark(info, logtext: str, start, end, *, filenamere):
NAME = info['name']

def _parse_benchmark(info: dict, logtext: str, start: int, end: int, *, filenamere: Optional[Pattern]):
BENCHNAME = info['name']

blocks = []

if filenamere and filenamere.search(logtext, start, end):
files = [
*(_FileInfo(filename=r.group(1), from_pos=r.end())
for r in filenamere.finditer(logtext, start, end)),
_FileInfo(filename=None, from_pos=len(logtext)),
][::-1]
else:
files = [
_FileInfo(filename=None, from_pos=start),
_FileInfo(filename=None, from_pos=len(logtext)),
][::-1]
files: List[_FileInfo] = _filename_info(filenamere, logtext, start, end)
if not files:
# We have an unknown file starting from the very beginning
files = [_FileInfo(filename=None, from_pos=start)]

# Allow us to peek ahead by giving a dummy "file" at the end which will never match a block
files.append(_FileInfo(filename=None, from_pos=end))
assert len(files) >= 2
file_pos = 0

block_matches1, block_matches2 = itertools.tee(_RE_REGION_DELIMITER.finditer(logtext, start, end))
next(block_matches2) # Drop first
block_matches2 = itertools.chain(block_matches2, (_DummyEnd(end),))

blocks = []

for regionm, nextm in _splititer(_RE_REGION_INFO, logtext, start, end):
assert regionm.end() > files[-1].from_pos
if regionm.end() > files[-2].from_pos:
files.pop()
is_first = True
for regionm, nextm in zip(block_matches1, block_matches2):
region_start = regionm.end()
if region_start > files[file_pos + 1].from_pos:
file_pos += 1

assert region_start > files[file_pos].from_pos

try:
filename = files[-1].filename
except NameError:
filename = None
filename = files[file_pos].filename if files[file_pos] else None

regioninfo = {
'name': regionm['name'],
'file': filename,
'benchmark': NAME,
'benchmark': BENCHNAME,
}
block = _parse_block(regioninfo, logtext,
regionm.start() - 1, nextm.start())
blocks.append(block)
blk_start = start if is_first else regionm.start()
blk_end = nextm.start()
blocks.append(_parse_block(regioninfo, logtext,
blk_start, blk_end))
is_first = False

return Benchmark(info, blocks)


def _parse_block(info, logtext: str, start, end):
events = _parse_events(logtext, start, end)
raw_log = logtext[start:end]
assert 'ProcessDag' in events
info['name'] = events['ProcessDag'][0]['name']

return Block(info, raw_log, events)

Expand Down

0 comments on commit 5363c47

Please sign in to comment.