From 1cde00fca2f4b41e4608ae20800223d82f223bec Mon Sep 17 00:00:00 2001 From: Michael Gilbert Date: Mon, 11 Nov 2024 07:58:44 -0500 Subject: [PATCH] Deduplicate similar Einsums --- pytimeloop/fastfusion/layerdeduplication.py | 115 +++++++++--------- pytimeloop/fastfusion/mapper/mapper2.py | 106 +++++++++++++++- .../fastfusion/mapper/per_einsum_mapper.py | 5 +- .../fastfusion/mapper/shape_subspace2.py | 96 +++++++++++++++ tests/fastfusion/test_layerdeduplication.py | 26 +++- .../cascaded_mm_multi_32.workload.yaml | 32 ++--- 6 files changed, 298 insertions(+), 82 deletions(-) create mode 100644 pytimeloop/fastfusion/mapper/shape_subspace2.py diff --git a/pytimeloop/fastfusion/layerdeduplication.py b/pytimeloop/fastfusion/layerdeduplication.py index 0fb62b0..91c855f 100644 --- a/pytimeloop/fastfusion/layerdeduplication.py +++ b/pytimeloop/fastfusion/layerdeduplication.py @@ -1,5 +1,5 @@ from collections import defaultdict -from itertools import permutations +from itertools import permutations, product from pytimeloop.looptree.mapping_utilities import get_intermediate_tensors @@ -26,69 +26,72 @@ def is_equivalent(einsum_id1, einsum_id2, workload, analyzer): if einsum2_output_tensor is None: einsum2_output_tensor = set() - einsum1_tensors = einsum1_input_tensors | einsum1_output_tensor - einsum2_tensors = einsum2_input_tensors | einsum2_output_tensor - intermediate_tensors = get_intermediate_tensors(workload) - tensor_properties = defaultdict(set) - for tensor in einsum1_input_tensors | einsum2_input_tensors: - tensor_properties[tensor].add('input') - for tensor in einsum1_output_tensor | einsum2_output_tensor: - tensor_properties[tensor].add('input') - for tensor in intermediate_tensors: - if tensor not in tensor_properties: - continue - tensor_properties[tensor].add('intermediate') - tensor_properties = { - tensor: frozenset(properties) - for tensor, properties in tensor_properties.items() - } - property_to_tensors = defaultdict(lambda: (set(), set())) - for tensor, property in tensor_properties: - tensor_sets = property_to_tensors[property] - if tensor in einsum1_tensors: - tensor_sets[0].add(tensor) - else: - tensor_sets[1].add(tensor) - - for tensor_sets in property_to_tensors.values(): - if len(tensor_sets[0]) != len(tensor_sets[1]): - return None, None - - - - # Check if we can rename einsum1 ranks to create einsum2 - for renamed_ranks in permutations(einsum2_ranks): - rank_renaming = { - r1: r2 for r1, r2 in zip(einsum1_ranks, renamed_ranks) + all_tensor_properties = [] + all_tensors = [ + (einsum1_input_tensors, einsum1_output_tensor), + (einsum2_input_tensors, einsum2_output_tensor) + ] + for input_tensors, output_tensors in all_tensors: + tensor_properties = defaultdict(set) + for tensor in input_tensors: + tensor_properties[tensor].add('input') + for tensor in output_tensors: + tensor_properties[tensor].add('output') + for tensor in tensor_properties: + if tensor in intermediate_tensors: + tensor_properties[tensor].add('intermediate') + tensor_properties = { + tensor: frozenset(properties) + for tensor, properties in tensor_properties.items() } - # for tensor_renaming in get_tensor_renamings(property_to_tensors): - for renamed_input_tensors in permutations(einsum2_input_tensors): - input_tensor_renaming = { - t1: t2 for t1, t2 - in zip(einsum1_input_tensors, renamed_input_tensors) + all_tensor_properties.append(tensor_properties) + + property_to_tensors = defaultdict(lambda: (set(), set())) + for i, tensor_properties in enumerate(all_tensor_properties): + for tensor, property in tensor_properties.items(): + tensor_sets = property_to_tensors[property] + tensor_sets[i].add(tensor) + + # Check if we can rename tensors in einsum1 to einsum2 + for tensor_renaming in tensor_renamings(property_to_tensors): + # Check if we can rename einsum1 ranks to create einsum2 + for renamed_ranks in permutations(einsum2_ranks): + rank_renaming = { + r1: r2 for r1, r2 in zip(einsum1_ranks, renamed_ranks) } - for renamed_output_tensors in permutations(einsum2_output_tensor): - output_tensor_renaming = { - t1: t2 for t1, t2 - in zip(einsum1_output_tensor, renamed_output_tensors) - } - tensor_renaming = input_tensor_renaming | output_tensor_renaming + if not _shape_is_equivalent(rank_renaming, workload): + continue - if not _shape_is_equivalent(rank_renaming, workload): - continue + if not _dependency_is_equivalent(einsum_id1, + einsum_id2, + rank_renaming, + tensor_renaming, + analyzer): + continue - if not _dependency_is_equivalent(einsum_id1, - einsum_id2, - rank_renaming, - tensor_renaming, - analyzer): - continue + return rank_renaming, tensor_renaming + return None, None - return rank_renaming, tensor_renaming - return None, None +def tensor_renamings(property_to_tensors): + for tensors_of_1, tensors_of_2 in property_to_tensors.values(): + if len(tensors_of_1) != len(tensors_of_2): + return + + all_tensors_of_1 = [ + t + for tensors_of_1, _ in property_to_tensors.values() + for t in tensors_of_1 + ] + permutations_of_tensor_2_by_property = [] + for _, tensors_of_2 in property_to_tensors.values(): + permutations_of_tensor_2_by_property.append(permutations(tensors_of_2)) + for permutation_of_2 in product(*permutations_of_tensor_2_by_property): + permutation_of_2 = tuple(t for tupl in permutation_of_2 for t in tupl) + renaming = dict(zip(all_tensors_of_1, permutation_of_2)) + yield renaming def _shape_is_equivalent(rank_renaming, workload): diff --git a/pytimeloop/fastfusion/mapper/mapper2.py b/pytimeloop/fastfusion/mapper/mapper2.py index d5cb462..583a74d 100644 --- a/pytimeloop/fastfusion/mapper/mapper2.py +++ b/pytimeloop/fastfusion/mapper/mapper2.py @@ -1,4 +1,5 @@ from collections import defaultdict +from copy import deepcopy import logging.handlers from pathlib import Path import logging @@ -17,6 +18,7 @@ from pytimeloop.fastfusion.layerdeduplication import is_equivalent from pytimeloop.fastfusion.mapper.logging import make_queue_and_listener from pytimeloop.fastfusion.mapper.per_einsum_mapper import get_top_loop_jobs, mapper_place_fusion_level +from pytimeloop.fastfusion.sim import Tiling, Loop, TensorStorage from pytimeloop.timeloopfe.v4 import Ert from pytimeloop.timeloopfe.common.backend_calls import call_accelergy_verbose @@ -37,6 +39,8 @@ def mapper( log_queue, log_queue_listener = make_queue_and_listener() workload = LooptreeWorkload.parse_cfg(config.root["problem"]) + analyzer = LooptreeWorkloadDependencyAnalyzer(workload) + equivalent_groups = EquivalentGroups.from_workload(workload, analyzer) einsum_name_to_id = workload.einsum_name_to_id() @@ -47,8 +51,15 @@ def mapper( ert = Ert(ert_dict["ERT"]) energy_dict = ert.to_dict() + grouped_similar_einsums = convert_rank_to_group_renaming( + detect_similar_einsums(workload, analyzer), + equivalent_groups + ) + logger.info(f"Found {len(grouped_similar_einsums)} unique Einsums\n" + + f"\tConverter: {grouped_similar_einsums}") + args = get_top_loop_jobs( - einsum_name_to_id=einsum_name_to_id, + einsums_to_explore=list(grouped_similar_einsums.keys()), config=config, pe_array_constraint=pe_array_constraint, mac_array_constraint=mac_array_constraint, @@ -61,11 +72,11 @@ def mapper( ) print(f'Number of jobs: {len(args)}') - - logger.debug("Starting workers") + n_workers = 128 + logger.debug(f"Starting {n_workers} workers") log_queue_listener.start() - result = Parallel(n_jobs=128)( + result = Parallel(n_jobs=n_workers)( delayed(mapper_place_fusion_level)(**a) for a in args ) data = defaultdict(dict) @@ -78,5 +89,92 @@ def mapper( log_queue_listener.stop() logger.info(f"Mapper finished for {spec}") + + generated_data = {} + logger.info(f"Generating data for non-unique Einsums") + for ref_einsum, others in grouped_similar_einsums.items(): + for other_einsum, (rank_renaming, tensor_renaming) in others.items(): + logger.info(f"Generating data for {other_einsum}. " + + f"Rank renaming={rank_renaming}. " + + f"Tensor renaming={tensor_renaming}") + generated_data[other_einsum] = generate_data(data[ref_einsum], + rank_renaming, + tensor_renaming) + + for einsum, mapping in generated_data.items(): + data[einsum] = mapping + + logger.info(f"Final set of Einsums: {set(data.keys())}") + + # data has to come out in sorted Einsum-id order + data = {k: v for k, v in sorted(data.items(), key=lambda item: item[0])} + return data + +def generate_data(data, rank_renaming, tensor_renaming): + return { + _convert_tiling(tiling, rank_renaming, tensor_renaming) + : + _convert_stats(stats, rank_renaming, tensor_renaming) + for tiling, stats in data.items() + } + + +def _convert_tiling(tiling: Tiling, rank_renaming, tensor_renaming): + return Tiling( + loops=tuple(Loop(rank_renaming[l.rank_id], l.bound, l.is_spatial) + for l in tiling.loops), + tensors=frozenset(TensorStorage(tensor_renaming[ts.tensor_id], + ts.backer_id, + ts.above_loop_index, + ts.tile_size) + for ts in tiling.tensors) + ) + + +def _convert_stats(stats, rank_renaming, tensor_renaming): + return deepcopy(stats) + + +def detect_similar_einsums(workload, analyzer, return_all_as_unique=False): + if return_all_as_unique: + return {ref: {} for ref in workload.einsum_id_to_name()} + + ref_to_other_einsums = {} + for einsum in workload.einsum_id_to_name(): + found = False + for ref_einsum in ref_to_other_einsums: + rank_renaming, tensor_renaming = is_equivalent(ref_einsum, + einsum, + workload, + analyzer) + if rank_renaming is not None: + ref_to_other_einsums[ref_einsum][einsum] = (rank_renaming, + tensor_renaming) + found = True + break + if not found: + ref_to_other_einsums[einsum] = {} + return ref_to_other_einsums + + +def convert_rank_to_group_renaming(ref_to_other_einsums, equiv_ranks): + return { + ref: { + other: (_convert_rank_renaming(rank_renaming, equiv_ranks), + tensor_renaming) + for other, (rank_renaming, tensor_renaming) in others.items() + } + for ref, others in ref_to_other_einsums.items() + } + + +def _convert_rank_renaming(rank_renaming, equiv_ranks): + # The Tiling class uses string ids + return { + str(equiv_ranks.rank_to_group_id[r1]) + : + str(equiv_ranks.rank_to_group_id[r2]) + for r1, r2 in rank_renaming.items() + } diff --git a/pytimeloop/fastfusion/mapper/per_einsum_mapper.py b/pytimeloop/fastfusion/mapper/per_einsum_mapper.py index d91a42c..a758d69 100644 --- a/pytimeloop/fastfusion/mapper/per_einsum_mapper.py +++ b/pytimeloop/fastfusion/mapper/per_einsum_mapper.py @@ -83,6 +83,7 @@ def add_storage(self, target, dspaces, idx=None): else: self.mapping.insert(idx, node) + @log_worker(f"{__name__}:_mapper_place_fusion_level") def mapper_place_fusion_level( config, @@ -216,13 +217,13 @@ def get_top_loop_jobs( spec, explore_glb_uneven, explore_pe_uneven, - einsum_name_to_id, + einsums_to_explore, energy_dict, log_queue=None, verbose_stream=None, ): args = [] - for einsum_id in einsum_name_to_id.values(): + for einsum_id in einsums_to_explore: # if log_queue is not None: # log_queue.info(f"[{einsum_id}] Exploring mapspace of Einsum {einsum_id}") logfunc = lambda msg: None # log_queue.debug(f"[{einsum_id}] " + msg) diff --git a/pytimeloop/fastfusion/mapper/shape_subspace2.py b/pytimeloop/fastfusion/mapper/shape_subspace2.py new file mode 100644 index 0000000..3480fd0 --- /dev/null +++ b/pytimeloop/fastfusion/mapper/shape_subspace2.py @@ -0,0 +1,96 @@ +""" +General idea: +- Often, higher hardware utilization leads to better metrics +- When that is not the case, the cause is that the hardware resource is + shared and higher utilization by one user trades off utilization by + another. + +In terms of tile shape, the utilization of interest is: +- Higher buffer utilization due to smaller factor, larger tile shape of + temporal loops +Spatial loops are more complicated because larger tile shape means lower +parallel hardware utilization. + +To keep the shape iterator more generic, we allow tagging of a particular +loop with hints: +- Maximize tile shape +- Minimize tile shape +- Explore all +The maximize/minimize tile shape tags will cause the iterator to attempt +to quickly find the largest/smallest *valid* tile shape. +""" +from collections.abc import Callable +from enum import Enum + +from .shape_subspace import ShapeSubspace + + +class IteratorHint(Enum): + MAXIMIZE = 0 + MINIMIZE = 1 + EXPLORE = 2 + + +class FastShapeSubspaceIterator: + def __init__(self, + shape_subspace: ShapeSubspace, + hints: list[IteratorHint]): + self.shape_subspace = shape_subspace + self.hints = hints + + def explore_idx(self, idx: int): + hint = self.hints[idx] + if hint == IteratorHint.MAXIMIZE: + binary_search(min_val, + max_val, + evaluator, + search_max=True) + elif hint == IteratorHint.MINIMIZE: + binary_search(min_val, + max_val, + evaluator, + search_max=False) + elif hint == IteratorHint.EXPLORE: + exhaustive(min_val, + max_val, + evaluator) + pass + else: + raise ValueError(f"Unknown hint {hint}") + + +class EvaluationResult(Enum): + TOO_SMALL = 0 + VALID = 1 + TOO_LARGE = 2 + + +def binary_search(min: int, + max: int, + evaluate: Callable[[int], EvaluationResult], + search_max: bool): + if min > max: + raise ValueError("min must be lower or equal to max") + + while min < max - 1: + cur = (min + max) // 2 + cur_result = evaluate(cur) + if cur_result == EvaluationResult.TOO_LARGE: + max = cur - 1 + elif cur_result == EvaluationResult.TOO_SMALL: + min = cur + 1 + else: + if search_max: + min = cur + else: + max = cur + + assert min >= max - 1 and min < max + if search_max: + evaluate_order = [max, min] + else: + evaluate_order = [min, max] + for cur in evaluate_order: + if evaluate(cur) == EvaluationResult.VALID: + return cur + return None diff --git a/tests/fastfusion/test_layerdeduplication.py b/tests/fastfusion/test_layerdeduplication.py index 667406f..f794012 100644 --- a/tests/fastfusion/test_layerdeduplication.py +++ b/tests/fastfusion/test_layerdeduplication.py @@ -7,7 +7,7 @@ class TestLayerDeduplication(LoadConfigMixin, unittest.TestCase): - def test_is_equivalent_mismatch(self): + def test_is_equivalent_mismatch_from_shape(self): config, spec = self.load_config([ 'four_level.arch.yaml', 'cascaded_mm.workload.yaml' @@ -21,7 +21,7 @@ def test_is_equivalent_mismatch(self): self.assertIs(rank_renaming, None) self.assertIs(tensor_renaming, None) - def test_is_equivalent_match(self): + def test_is_equivalent_mismatch_from_tensor_roles(self): config, spec = self.load_config([ 'four_level.arch.yaml', 'cascaded_mm_32.workload.yaml' @@ -32,5 +32,23 @@ def test_is_equivalent_match(self): rank_renaming, tensor_renaming = \ is_equivalent(0, 1, workload, analyzer) - self.assertEqual(rank_renaming, {0: 9, 1: 10, 2: 11}) - self.assertEqual(tensor_renaming, {0: 2, 1: 3, 2: 4}) \ No newline at end of file + self.assertEqual(rank_renaming, None) + self.assertEqual(tensor_renaming, None) + + def test_is_equivalent_matches(self): + config, spec = self.load_config([ + 'four_level.arch.yaml', + 'cascaded_mm_multi_32.workload.yaml' + ]) + workload = LooptreeWorkload.parse_cfg(config.root['problem']) + analyzer = LooptreeWorkloadDependencyAnalyzer(workload) + + rank_renaming, tensor_renaming = \ + is_equivalent(0, 1, workload, analyzer) + self.assertEqual(rank_renaming, None) + self.assertEqual(tensor_renaming, None) + + rank_renaming, tensor_renaming = \ + is_equivalent(1, 2, workload, analyzer) + self.assertEqual(rank_renaming, {9: 16, 10: 17, 11: 18}) + self.assertEqual(tensor_renaming, {2: 4, 3: 5, 4: 6}) \ No newline at end of file diff --git a/tests/test_configs/cascaded_mm_multi_32.workload.yaml b/tests/test_configs/cascaded_mm_multi_32.workload.yaml index fe5ebac..848c8dd 100644 --- a/tests/test_configs/cascaded_mm_multi_32.workload.yaml +++ b/tests/test_configs/cascaded_mm_multi_32.workload.yaml @@ -6,7 +6,7 @@ problem: - {name: Fmap1, dimensions: [ Fmap1_C, Fmap1_P ], projection: '[ C1, P1 ]'} - {name: Filter1, dimensions: [ Filter1_C, Filter1_M ], projection: '[ C1, M1 ]'} - {name: Fmap2, dimensions: [ Fmap2_C, Fmap2_P ], projection: '[ M1, P1 ]', read_write: True} - instance: 0 <= P1 < 32 and 0 <= M1 < 32 and 0 <= C1 < 32 + instance: 0 <= P1 < 2 and 0 <= M1 < 2 and 0 <= C1 < 2 - shape: name: Fc2 @@ -15,7 +15,7 @@ problem: - {name: Fmap2, dimensions: [ Fmap2_C, Fmap2_P ], projection: '[ C2, P2 ]'} - {name: Filter2, dimensions: [ Filter2_C, Filter2_M ], projection: '[ C2, M2 ]'} - {name: Fmap3, dimensions: [ Fmap3_C, Fmap3_P ], projection: '[ M2, P2 ]', read_write: True} - instance: 0 <= P2 < 32 and 0 <= M2 < 32 and 0 <= C2 < 32 + instance: 0 <= P2 < 2 and 0 <= M2 < 2 and 0 <= C2 < 2 - shape: name: Fc3 @@ -24,7 +24,7 @@ problem: - {name: Fmap3, dimensions: [ Fmap3_C, Fmap3_P ], projection: '[ C3, P3 ]'} - {name: Filter3, dimensions: [ Filter3_C, Filter3_M ], projection: '[ C3, M3 ]'} - {name: Fmap4, dimensions: [ Fmap4_C, Fmap4_P ], projection: '[ M3, P3 ]', read_write: True} - instance: 0 <= P3 < 32 and 0 <= M3 < 32 and 0 <= C3 < 32 + instance: 0 <= P3 < 2 and 0 <= M3 < 2 and 0 <= C3 < 2 - shape: name: Fc4 @@ -33,7 +33,7 @@ problem: - {name: Fmap4, dimensions: [ Fmap4_C, Fmap4_P ], projection: '[ C4, P4 ]'} - {name: Filter4, dimensions: [ Filter4_C, Filter4_M ], projection: '[ C4, M4 ]'} - {name: Fmap5, dimensions: [ Fmap5_C, Fmap5_P ], projection: '[ M4, P4 ]', read_write: True} - instance: 0 <= P4 < 32 and 0 <= M4 < 32 and 0 <= C4 < 32 + instance: 0 <= P4 < 2 and 0 <= M4 < 2 and 0 <= C4 < 2 - shape: name: Fc5 @@ -42,7 +42,7 @@ problem: - {name: Fmap5, dimensions: [ Fmap5_C, Fmap5_P ], projection: '[ C5, P5 ]'} - {name: Filter5, dimensions: [ Filter5_C, Filter5_M ], projection: '[ C5, M5 ]'} - {name: Fmap6, dimensions: [ Fmap6_C, Fmap6_P ], projection: '[ M5, P5 ]', read_write: True} - instance: 0 <= P5 < 32 and 0 <= M5 < 32 and 0 <= C5 < 32 + instance: 0 <= P5 < 2 and 0 <= M5 < 2 and 0 <= C5 < 2 - shape: name: Fc6 @@ -51,7 +51,7 @@ problem: - {name: Fmap6, dimensions: [ Fmap6_C, Fmap6_P ], projection: '[ C6, P6 ]'} - {name: Filter6, dimensions: [ Filter6_C, Filter6_M ], projection: '[ C6, M6 ]'} - {name: Fmap7, dimensions: [ Fmap7_C, Fmap7_P ], projection: '[ M6, P6 ]', read_write: True} - instance: 0 <= P6 < 32 and 0 <= M6 < 32 and 0 <= C6 < 32 + instance: 0 <= P6 < 2 and 0 <= M6 < 2 and 0 <= C6 < 2 - shape: name: Fc7 @@ -60,7 +60,7 @@ problem: - {name: Fmap7, dimensions: [ Fmap7_C, Fmap7_P ], projection: '[ C7, P7 ]'} - {name: Filter7, dimensions: [ Filter7_C, Filter7_M ], projection: '[ C7, M7 ]'} - {name: Fmap8, dimensions: [ Fmap8_C, Fmap8_P ], projection: '[ M7, P7 ]', read_write: True} - instance: 0 <= P7 < 32 and 0 <= M7 < 32 and 0 <= C7 < 32 + instance: 0 <= P7 < 2 and 0 <= M7 < 2 and 0 <= C7 < 2 - shape: name: Fc8 @@ -69,7 +69,7 @@ problem: - {name: Fmap8, dimensions: [ Fmap8_C, Fmap8_P ], projection: '[ C8, P8 ]'} - {name: Filter8, dimensions: [ Filter8_C, Filter8_M ], projection: '[ C8, M8 ]'} - {name: Fmap9, dimensions: [ Fmap9_C, Fmap9_P ], projection: '[ M8, P8 ]', read_write: True} - instance: 0 <= P8 < 32 and 0 <= M8 < 32 and 0 <= C8 < 32 + instance: 0 <= P8 < 2 and 0 <= M8 < 2 and 0 <= C8 < 2 # - shape: # name: Fc9 @@ -78,7 +78,7 @@ problem: # - {name: Fmap9, dimensions: [ Fmap9_C, Fmap9_P ], projection: '[ C9, P9 ]'} # - {name: Filter9, dimensions: [ Filter9_C, Filter9_M ], projection: '[ C9, M9 ]'} # - {name: Fmap10, dimensions: [ Fmap10_C, Fmap10_P ], projection: '[ M9, P9 ]', read_write: True} - # instance: 0 <= P9 < 32 and 0 <= M9 < 32 and 0 <= C9 < 32 + # instance: 0 <= P9 < 2 and 0 <= M9 < 2 and 0 <= C9 < 2 # - shape: # name: Fc10 @@ -87,7 +87,7 @@ problem: # - {name: Fmap10, dimensions: [ Fmap10_C, Fmap10_P ], projection: '[ C10, P10 ]'} # - {name: Filter10, dimensions: [ Filter10_C, Filter10_M ], projection: '[ C10, M10 ]'} # - {name: Fmap11, dimensions: [ Fmap11_C, Fmap11_P ], projection: '[ M10, P10 ]', read_write: True} - # instance: 0 <= P10 < 32 and 0 <= M10 < 32 and 0 <= C10 < 32 + # instance: 0 <= P10 < 2 and 0 <= M10 < 2 and 0 <= C10 < 2 # - shape: # name: Fc11 @@ -96,7 +96,7 @@ problem: # - {name: Fmap11, dimensions: [ Fmap11_C, Fmap11_P ], projection: '[ C11, P11 ]'} # - {name: Filter11, dimensions: [ Filter11_C, Filter11_M ], projection: '[ C11, M11 ]'} # - {name: Fmap12, dimensions: [ Fmap12_C, Fmap12_P ], projection: '[ M11, P11 ]', read_write: True} - # instance: 0 <= P11 < 32 and 0 <= M11 < 32 and 0 <= C11 < 32 + # instance: 0 <= P11 < 2 and 0 <= M11 < 2 and 0 <= C11 < 2 # - shape: # name: Fc12 @@ -105,7 +105,7 @@ problem: # - {name: Fmap12, dimensions: [ Fmap12_C, Fmap12_P ], projection: '[ C12, P12 ]'} # - {name: Filter12, dimensions: [ Filter12_C, Filter12_M ], projection: '[ C12, M12 ]'} # - {name: Fmap13, dimensions: [ Fmap13_C, Fmap13_P ], projection: '[ M12, P12 ]', read_write: True} - # instance: 0 <= P12 < 32 and 0 <= M12 < 32 and 0 <= C12 < 32 + # instance: 0 <= P12 < 2 and 0 <= M12 < 2 and 0 <= C12 < 2 # - shape: # name: Fc13 @@ -114,7 +114,7 @@ problem: # - {name: Fmap13, dimensions: [ Fmap13_C, Fmap13_P ], projection: '[ C13, P13 ]'} # - {name: Filter13, dimensions: [ Filter13_C, Filter13_M ], projection: '[ C13, M13 ]'} # - {name: Fmap14, dimensions: [ Fmap14_C, Fmap14_P ], projection: '[ M13, P13 ]', read_write: True} - # instance: 0 <= P13 < 32 and 0 <= M13 < 32 and 0 <= C13 < 32 + # instance: 0 <= P13 < 2 and 0 <= M13 < 2 and 0 <= C13 < 2 # - shape: # name: Fc14 @@ -123,7 +123,7 @@ problem: # - {name: Fmap14, dimensions: [ Fmap14_C, Fmap14_P ], projection: '[ C14, P14 ]'} # - {name: Filter14, dimensions: [ Filter14_C, Filter14_M ], projection: '[ C14, M14 ]'} # - {name: Fmap15, dimensions: [ Fmap15_C, Fmap15_P ], projection: '[ M14, P14 ]', read_write: True} - # instance: 0 <= P14 < 32 and 0 <= M14 < 32 and 0 <= C14 < 32 + # instance: 0 <= P14 < 2 and 0 <= M14 < 2 and 0 <= C14 < 2 # - shape: # name: Fc15 @@ -132,7 +132,7 @@ problem: # - {name: Fmap15, dimensions: [ Fmap15_C, Fmap15_P ], projection: '[ C15, P15 ]'} # - {name: Filter15, dimensions: [ Filter15_C, Filter15_M ], projection: '[ C15, M15 ]'} # - {name: Fmap16, dimensions: [ Fmap16_C, Fmap16_P ], projection: '[ M15, P15 ]', read_write: True} - # instance: 0 <= P15 < 32 and 0 <= M15 < 32 and 0 <= C15 < 32 + # instance: 0 <= P15 < 2 and 0 <= M15 < 2 and 0 <= C15 < 2 # - shape: # name: Fc16 @@ -141,4 +141,4 @@ problem: # - {name: Fmap16, dimensions: [ Fmap16_C, Fmap16_P ], projection: '[ C16, P16 ]'} # - {name: Filter16, dimensions: [ Filter16_C, Filter16_M ], projection: '[ C16, M16 ]'} # - {name: Fmap17, dimensions: [ Fmap17_C, Fmap17_P ], projection: '[ M16, P16 ]', read_write: True} - # instance: 0 <= P16 < 32 and 0 <= M16 < 32 and 0 <= C16 < 32 \ No newline at end of file + # instance: 0 <= P16 < 2 and 0 <= M16 < 2 and 0 <= C16 < 2 \ No newline at end of file