Skip to content

Commit

Permalink
WIP parallelized snowcat
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Gilbert committed Nov 21, 2024
1 parent 71c12c3 commit 30e690c
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 30 deletions.
73 changes: 43 additions & 30 deletions pytimeloop/fastfusion/mapper/per_einsum_mapper_snowcat.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from copy import deepcopy
from collections import defaultdict
from collections.abc import Callable, Set
from itertools import combinations, product, permutations
from functools import reduce
from operator import or_, mul

from joblib import Parallel, delayed

from combinatorics.dependent_product import dependent_product
from combinatorics.splitter import split_dependent_product

Expand Down Expand Up @@ -109,39 +112,49 @@ def mac(mapping):
parallelized_spaces, task_spaces = \
split_dependent_product(n_split_min=n_jobs, spaces=subspaces)

Parallel(n_jobs=n_jobs)
partial_mappings = list(dependent_product(parallelized_spaces))

count = 0
print(len(list(dependent_product(subspaces))))
for partial_mapping in dependent_product(subspaces):
_, compiled_results = compile_mapping(
partial_mapping, workload, analyzer
)
tile_shape_explorer = explore_tile_shape(
partial_mapping,
einsum_shape,
compiled_results,
max_capacity,
max_fanout,
)
# HACKY: Pop out the subspace object as the first in the iterator
shape_subspace = next(tile_shape_explorer)

for shape, res in tile_shape_explorer:
count += 1
is_pareto, fulltiling = process_result(
res,
shape,
data[einsum_id],
einsum_id,
intermediate_tensors,
def per_worker_exploration(*args):
analyzer = LooptreeWorkloadDependencyAnalyzer(workload)
local_task_spaces = deepcopy(task_spaces)
local_task_spaces[0] = lambda : task_spaces[0](*args)
for partial_mapping in dependent_product(local_task_spaces):
_, compiled_results = compile_mapping(
partial_mapping, workload, analyzer
)
tile_shape_explorer = explore_tile_shape(
partial_mapping,
bindings,
workload,
energy_dict,
equivalent_groups,
explore_fusion_uneven=explore_glb_uneven
einsum_shape,
compiled_results,
max_capacity,
max_fanout,
)
# HACKY: Pop out the subspace object as the first in the iterator
shape_subspace = next(tile_shape_explorer)

for shape, res in tile_shape_explorer:
is_pareto, fulltiling = process_result(
res,
shape,
data[einsum_id],
einsum_id,
intermediate_tensors,
partial_mapping,
bindings,
workload,
energy_dict,
equivalent_groups,
explore_fusion_uneven=explore_glb_uneven
)
print(len(list(dependent_product(subspaces))))

# for pm in partial_mappings:
# per_worker_exploration(*pm)
result = Parallel(n_jobs=n_jobs)(delayed(per_worker_exploration)(*pm)
for pm in partial_mappings)

count = 0
print(len(list(dependent_product(subspaces))))
# if is_pareto:
# shape_subspace.register_pareto_point()

Expand Down
30 changes: 30 additions & 0 deletions tests/test_configs/snowcat.arch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
variables:
global_cycle_seconds: 1e-9
technology: "45nm"

architecture:
version: 0.4
nodes:
- !Component
name: MainMemory
class: DRAM
attributes: {width: 256, block_size: 32, word_bits: 8, datawidth: 8}
required_actions: ['read', 'write']
- !Component
name: GlobalBuffer
class: SRAM
attributes:
depth: 16384
width: 512
block_size: 32
word_bits: 8
datawidth: 8
n_rdwr_ports: 2
n_rd_ports: 0
n_wr_ports: 0
required_actions: ['read', 'write']
- !Component
name: MACC
class: intmac
attributes: {datawidth: 8, width: 8, cycle_time: 1e-9}
required_actions: ['compute']

0 comments on commit 30e690c

Please sign in to comment.