From f7036d7258b6506bbc472787532cb4be29d6341e Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 11:52:19 -0800
Subject: [PATCH 01/39] Add in fixes for suitesparse_runner script

---
 Makefile                      | 12 ++++++------
 scripts/suitesparse_runner.sh | 25 +++++++++++--------------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index 0a4882a9..b3ace47e 100644
--- a/Makefile
+++ b/Makefile
@@ -31,12 +31,12 @@ endif
 
 ifeq ("$(NEVA)","ON")
 	CMD := OMP_PROC_BIND=true LD_LIBRARY_PATH=compiler/build/lib/:$(LD_LIBRARY_PATH) numactl -C 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -m 0 compiler/build/taco-bench $(BENCHFLAGS)
-	export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
-	export FROSTT_PATH=/nobackup/owhsu/sparse-datasets/frostt/
-	export SUITESPARSE_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/suitesparse-formatted
-	export FROSTT_FORMATTED_TACO_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted/taco-tensor
-	export FROSTT_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted
-	export TACO_TENSOR_PATH=/nobackup/owhsu/sparse-datasets
+	#export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
+	#export FROSTT_PATH=/nobackup/owhsu/sparse-datasets/frostt/
+	#export SUITESPARSE_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/suitesparse-formatted
+	#export FROSTT_FORMATTED_TACO_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted/taco-tensor
+	#export FROSTT_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted
+	#export TACO_TENSOR_PATH=/nobackup/owhsu/sparse-datasets
 else ifeq ("$(LANKA)", "ON")
 	CMD := OMP_PROC_BIND=true LD_LIBRARY_PATH=compiler/build/lib/:$(LD_LIBRARY_PATH) numactl -C 0,2,4,6,8,10,24,26,28,30,32,34 -m 0 compiler/build/taco-bench $(BENCHFLAGS)
 	export SUITESPARSE_PATH=/data/scratch/changwan/florida_all
diff --git a/scripts/suitesparse_runner.sh b/scripts/suitesparse_runner.sh
index 33b95b6b..b933021e 100755
--- a/scripts/suitesparse_runner.sh
+++ b/scripts/suitesparse_runner.sh
@@ -8,28 +8,25 @@ set -u
 
 cwd=$(pwd)
 sspath=$SUITESPARSE_PATH
+
 # LANKA
-if [ $2 -eq 1 ]; then
-	lanka=ON
-	neva=OFF
-elif [ $2 -eq 2 ]; then
-	lanka=OFF
-	neva=ON
-else
-	lanka=OFF
-	neva=OFF
-fi
+ if [ $2 -eq 1 ]; then
+ 	lanka=ON
+ 	neva=OFF
+ elif [ $2 -eq 2 ]; then
+ 	lanka=OFF
+ 	neva=ON
+ else
+ fi
 
 out=suitesparse-bench/taco
 
 mkdir -p "$out"
 
 while read line; do
-	if [ $2 -eq 1 ]; then
+	if [ $LANKA -eq 1 ]; then
 		matrix="$sspath/$line/$line.mtx"
-	elif [ $2 -eq 2 ]; then
-		matrix="$sspath/$line.mtx"
-	else
+    else
 		matrix="$sspath/$line.mtx"
 	fi
 	csvout="$out/result-$line.csv"

From d485cdaf9a46b1976c066a322adbd14c9b0d43c9 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 13:23:03 -0800
Subject: [PATCH 02/39] Fixes to suitesparse_runner

---
 Makefile                      | 32 ++++++++++++--------------------
 scripts/suitesparse_runner.sh |  4 +++-
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/Makefile b/Makefile
index b3ace47e..b65225ed 100644
--- a/Makefile
+++ b/Makefile
@@ -31,20 +31,20 @@ endif
 
 ifeq ("$(NEVA)","ON")
 	CMD := OMP_PROC_BIND=true LD_LIBRARY_PATH=compiler/build/lib/:$(LD_LIBRARY_PATH) numactl -C 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 -m 0 compiler/build/taco-bench $(BENCHFLAGS)
-	#export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
-	#export FROSTT_PATH=/nobackup/owhsu/sparse-datasets/frostt/
-	#export SUITESPARSE_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/suitesparse-formatted
-	#export FROSTT_FORMATTED_TACO_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted/taco-tensor
-	#export FROSTT_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted
-	#export TACO_TENSOR_PATH=/nobackup/owhsu/sparse-datasets
+	# export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
+	# export FROSTT_PATH=/nobackup/owhsu/sparse-datasets/frostt/
+	# export SUITESPARSE_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/suitesparse-formatted
+	# export FROSTT_FORMATTED_TACO_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted/taco-tensor
+	# export FROSTT_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted
+	# export TACO_TENSOR_PATH=/nobackup/owhsu/sparse-datasets
 else ifeq ("$(LANKA)", "ON")
 	CMD := OMP_PROC_BIND=true LD_LIBRARY_PATH=compiler/build/lib/:$(LD_LIBRARY_PATH) numactl -C 0,2,4,6,8,10,24,26,28,30,32,34 -m 0 compiler/build/taco-bench $(BENCHFLAGS)
-	export SUITESPARSE_PATH=/data/scratch/changwan/florida_all
-	export FROSTT_PATH=/data/scratch/owhsu/datasets/frostt
-	export TACO_TENSOR_PATH=/data/scratch/owhsu/datasets
-	export SUITESPARSE_FORMATTED_PATH=/data/scratch/owhsu/datasets/suitesparse-formatted
-	export FROSTT_FORMATTED_TACO_PATH=/data/scratch/owhsu/datasets/frostt-formatted/taco-tensor
-	export FROSTT_FORMATTED_PATH=/data/scratch/owhsu/datasets/frostt-formatted
+	# export SUITESPARSE_PATH=/data/scratch/changwan/florida_all
+	# export FROSTT_PATH=/data/scratch/owhsu/datasets/frostt
+	# export TACO_TENSOR_PATH=/data/scratch/owhsu/datasets
+	# export SUITESPARSE_FORMATTED_PATH=/data/scratch/owhsu/datasets/suitesparse-formatted
+	# export FROSTT_FORMATTED_TACO_PATH=/data/scratch/owhsu/datasets/frostt-formatted/taco-tensor
+	# export FROSTT_FORMATTED_PATH=/data/scratch/owhsu/datasets/frostt-formatted
 else
 	CMD := LD_LIBRARY_PATH=compiler/build/lib/:$(LD_LIBRARY_PATH) compiler/build/taco-bench $(BENCHFLAGS)
 endif
@@ -114,14 +114,6 @@ suitesparse-formats: guard-SUITESPARSE_FORMATTED_PATH guard-SUITESPARSE_PATH
 frostt-formats: taco/build guard-FROSTT_FORMATTED_PATH guard-FROSTT_PATH
 	./scripts/generate_frostt_formats.sh
 
-.PHONY: env
-env:
-	export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
-	export FROSTT_PATH=/nobackup/owhsu/sparse-datasets/frostt/
-	export SUITESPARSE_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/suitesparse-formatted
-	export FROSTT_FORMATTED_TACO_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted/taco-tensor
-	export FROSTT_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted
-
 .PHONY: pydepends
 pydepends:
 	conda env export > environment.yml
diff --git a/scripts/suitesparse_runner.sh b/scripts/suitesparse_runner.sh
index b933021e..8aa4aace 100755
--- a/scripts/suitesparse_runner.sh
+++ b/scripts/suitesparse_runner.sh
@@ -17,6 +17,8 @@ sspath=$SUITESPARSE_PATH
  	lanka=OFF
  	neva=ON
  else
+ 	lanka=OFF
+ 	neva=OFF
  fi
 
 out=suitesparse-bench/taco
@@ -24,7 +26,7 @@ out=suitesparse-bench/taco
 mkdir -p "$out"
 
 while read line; do
-	if [ $LANKA -eq 1 ]; then
+	if [ $2 -eq 1 ]; then
 		matrix="$sspath/$line/$line.mtx"
     else
 		matrix="$sspath/$line.mtx"

From f8b0fa5c5c8318bbd6bf5746800665d9458a195e Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 13:40:00 -0800
Subject: [PATCH 03/39] Add in fix to bench_csv_aggregator

---
 scripts/bench_csv_aggregator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/bench_csv_aggregator.py b/scripts/bench_csv_aggregator.py
index 58a6f1f1..c24b7789 100644
--- a/scripts/bench_csv_aggregator.py
+++ b/scripts/bench_csv_aggregator.py
@@ -24,7 +24,7 @@ def aggregateTacoBenches(folder, outfile, taco=False, labelSet=None):
                 # Discard the first 9 lines. This corresponds to the
                 # google-benchmark generated header.
                 if taco:
-                    for i in range(0, 10):
+                    for i in range(0, 9):
                         f.readline()
                 # Open the rest of the file as a CSV.
                 reader = csv.reader(f)

From 704d00c8d196a343b1f0a318711d1a5e4da4b943 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 15:24:51 -0800
Subject: [PATCH 04/39] Add in info about scripts

---
 compiler/README.md | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 compiler/README.md

diff --git a/compiler/README.md b/compiler/README.md
new file mode 100644
index 00000000..cfa57852
--- /dev/null
+++ b/compiler/README.md
@@ -0,0 +1,6 @@
+Name   | Tags   | Description   | 
+--------------------------------
+`./scripts/suitesparse_runner.sh <TXT> <0|1|2>` | `cpu-taco`, `ss`, `machine`   | Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/`. 
+`python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]` | `cpu-taco`, `sam`, `ss`, `frostt`  | Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs
+ 
+

From f6b558e81d5ccb67461b2de331cbfddd11e18962 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 15:33:42 -0800
Subject: [PATCH 05/39] Fix a README mistake

---
 compiler/README.md | 6 ------
 scripts/README.md  | 5 +++--
 2 files changed, 3 insertions(+), 8 deletions(-)
 delete mode 100644 compiler/README.md

diff --git a/compiler/README.md b/compiler/README.md
deleted file mode 100644
index cfa57852..00000000
--- a/compiler/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-Name   | Tags   | Description   | 
---------------------------------
-`./scripts/suitesparse_runner.sh <TXT> <0|1|2>` | `cpu-taco`, `ss`, `machine`   | Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/`. 
-`python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]` | `cpu-taco`, `sam`, `ss`, `frostt`  | Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs
- 
-
diff --git a/scripts/README.md b/scripts/README.md
index b004db54..ae9b9207 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,4 +1,5 @@
-----------------------------------
 | File Name | Usage | Description |
 -----------------------------------
-| 
+`./scripts/suitesparse_runner.sh <TXT> <0|1|2>` | `cpu-taco`, `ss`, `machine`   | Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/`. 
+`python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]` | `cpu-taco`, `sam`, `ss`, `frostt`  | Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs
+ 

From 394581b5d55c562297ce87d0837bdfea9dfbf8d5 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 15:34:35 -0800
Subject: [PATCH 06/39] Get md table working

---
 scripts/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/README.md b/scripts/README.md
index ae9b9207..468459ef 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,5 +1,5 @@
 | File Name | Usage | Description |
 -----------------------------------
-`./scripts/suitesparse_runner.sh <TXT> <0|1|2>` | `cpu-taco`, `ss`, `machine`   | Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/`. 
-`python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]` | `cpu-taco`, `sam`, `ss`, `frostt`  | Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs
+| `./scripts/suitesparse_runner.sh <TXT> <0|1|2>` | `cpu-taco`, `ss`, `machine`   | Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/` |
+| `python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]` | `cpu-taco`, `sam`, `ss`, `frostt`  | Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs |
  

From afabcc421686029631e6f37b819a2e5da8d0a7de Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 10 Feb 2023 16:41:56 -0800
Subject: [PATCH 07/39] Update README.md

---
 scripts/README.md | 52 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/scripts/README.md b/scripts/README.md
index 468459ef..a4e0fac6 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -1,5 +1,49 @@
-| File Name | Usage | Description |
------------------------------------
-| `./scripts/suitesparse_runner.sh <TXT> <0|1|2>` | `cpu-taco`, `ss`, `machine`   | Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/` |
-| `python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]` | `cpu-taco`, `sam`, `ss`, `frostt`  | Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs |
+# Tags
+1. `cpu-taco` means it is used to get baseline runtime for TACO running on the CPU
+2. `sam` means it is used to get cycles for SAM
+3. `format` means it is used to format the data into the correct datastructures for SAM
+4. `ss` means it is used to run SuiteSparse benchmarks
+5.  `frostt` means it is used to run FROSTT benchmarks
+6.  `synth` means it is used to run synthetic data benchmarks
+7.  `machine` means it needs an argument to tell the script about the machine running (local, Neva/Kiwi, or Lanka)
+8.  `sam-mem` means it is used for the SAM memory modeling simulator
+9.  `artifact` means it is used for the [SAM ASPLOS '23 artifact evaluation](https://github.com/weiya711/sam-artifact) (ASPLOS23 AE)
+10.  `plot` means it is used to plot data
+
+# Command and Desciption
+
+1. `./scriptsadvanced_simulator_runner.sh ... TODO`
  
+2. `python scripts/artifact_docker_copy.py --output_dir <OUT_DIR> --docker_id <CONTAINER_ID>`
+    
+    Tags: `artifact` 
+    Description: Extracts all figures from docker to your local machine (see [Validate All Results](https://github.com/weiya711/sam-artifact#Validate-All-Results) in the SAM artifact evaluation README
+3. `python scripts/bench_csv_aggregator.py <IN_DIR> <OUT_CSV> [--taco] [--label_set_file <FILE>]`
+
+    Tags: `cpu-taco`, `sam`, `ss`, `frostt`  
+    Description: Aggregates all csvs from a directory into one file. Can do this for either the TACO-generated (`--taco`) or SAM-generated CSVs
+4. `./scripts/clean_memory_model.sh` 
+
+    Tags: `sam-mem`, `artifact`
+    Description: Cleans all directories related to the SAM memory modeling simulator and is used in the ASPLOS23 AE
+5. `python scripts/collect_node_counts.py [--sam_graphs <SAM_DIR> --output_log <OUT_LOG>]
+
+    Tags: `artifact`
+    Description: `make sam` must be run before this script. This generates Table 1 in the ASPLOS '23 SAM paper for the AE
+6. `python converter.py ... TODO`
+    Tags: `sam`
+    Description: Converts JSONs to CSVs for SAM/pytest benchmarks
+7. `python scripts/datastructure_suitesparse.py ... TODO` 
+    Note: Do not use this, the `generate_suitesparse_formats.sh` script should be used instead
+8. `python datastructure_tns.py` 
+    TODO
+9. `python divvy_runs.py`
+    TODO
+
+
+43. `./scripts/suitesparse_runner.sh <TXT> <0|1|2>`
+
+    Tags: `cpu-taco`, `ss`, `machine`   
+    Description: Gets the TACO CPU runtime baselines for SuiteSparse and stores it to `suitesparse-bench/taco/`
+
+50. 

From 9c3109bd1b16cb27d0c2cca473bb721f8851385e Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 17 Feb 2023 10:18:48 -0800
Subject: [PATCH 08/39] Add in scripts for onyx tiling

---
 sam/onyx/synthetic/generate_fixed_nnz_mats.py | 20 +++++++++++++++----
 sam/sim/src/channel.py                        |  1 +
 sam/sim/src/tiling/memory_config_onyx.yaml    |  4 ++--
 sam/util.py                                   |  4 ++--
 scripts/generate_sparsity_sweep_mem_model.sh  |  2 +-
 scripts/prepare_files.sh                      |  4 ++--
 6 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/sam/onyx/synthetic/generate_fixed_nnz_mats.py b/sam/onyx/synthetic/generate_fixed_nnz_mats.py
index ec099dfb..6671fcc2 100644
--- a/sam/onyx/synthetic/generate_fixed_nnz_mats.py
+++ b/sam/onyx/synthetic/generate_fixed_nnz_mats.py
@@ -1,7 +1,7 @@
 import scipy.io
 import scipy.sparse
 import numpy as np
-
+import argparse
 
 def generate_mat(nnz, dim):
     return scipy.sparse.random(dim, dim, nnz / (dim**2), data_rvs=np.ones)
@@ -14,9 +14,21 @@ def write_mtx(path, t):
 if __name__ == "__main__":
     seed = 0
     np.random.seed(seed)
-    # 1024
-    dims = list(range(1024, 15721, 1336))
-    nnzs = [5000, 10000, 25000, 50000]
+
+    parser = argparse.ArgumentParser(description="Create some random matrices of given nnz and dim")
+    parser.add_argument('--nnz', type=int, nargs='+', help='nnz')
+    parser.add_argument('--dim', type=int, nargs='+', help='dim')
+    parser.add_argument('--extensor', action='store_true', help='generate extensor dims and nnzs') 
+    args = parser.parse_args()
+    
+
+    if args.extensor:
+        dims = list(range(1024, 15721, 1336))
+        nnzs = [5000, 10000, 25000, 50000]
+    else:
+        dims = args.dim
+        nnzs = args.nnz
+    print("RUNNING:", dims, nnzs)
 
     for nnz in nnzs:
         for dim in dims:
diff --git a/sam/sim/src/channel.py b/sam/sim/src/channel.py
index 87fd36fe..6e20222f 100644
--- a/sam/sim/src/channel.py
+++ b/sam/sim/src/channel.py
@@ -219,6 +219,7 @@ def input_token_(self, token):
         self.downstream_token = token
 
 
+# FIXME: Follow code style and fix class naming convention and make sure it's base is primitive...
 class memory_block():
     def __init__(self, name="B", skip_blocks=False, element_size=2, level=None, indexes=2,
                  size=1000 * 2, nbuffer=False, latency=10, debug=False, bandwidth=2,
diff --git a/sam/sim/src/tiling/memory_config_onyx.yaml b/sam/sim/src/tiling/memory_config_onyx.yaml
index befb9cad..74c80c27 100644
--- a/sam/sim/src/tiling/memory_config_onyx.yaml
+++ b/sam/sim/src/tiling/memory_config_onyx.yaml
@@ -15,5 +15,5 @@ Bytes_per_element: 2 # Number
 n_levels: 3
 level_names: ["Main", "Glb", "Mem"]
 Main_tile_size: None
-Glb_tile_size: 16 # 16 # 120 # n = (nxn) elements
-Mem_tile_size: 128 # Size of one dense dimension. 8 = (8x8) 
+Glb_tile_size: 8 # 8 = (8x8) = 64 elements
+Mem_tile_size: 45 # Size of one dense dimension. 45 = (45*45) = 2025 
diff --git a/sam/util.py b/sam/util.py
index 96cc15c5..7044e94d 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -7,7 +7,7 @@
 import shutil
 import numpy as np
 import math
-import pydata
+import sparse 
 
 from pathlib import Path
 from dataclasses import dataclass
@@ -601,7 +601,7 @@ def safeCastPydataTensorToInts(tensor):
         # else:
         #     data[i] = int(tensor.data[i])
         data[i] = round_sparse(tensor.data[i])
-    return pydata.sparse.COO(tensor.coords, data, tensor.shape)
+    return sparse.COO(tensor.coords, data, tensor.shape)
 
 
 def parse_taco_format(infilename, outdir, tensorname, format_str):
diff --git a/scripts/generate_sparsity_sweep_mem_model.sh b/scripts/generate_sparsity_sweep_mem_model.sh
index 4ac85ed3..67a339b9 100755
--- a/scripts/generate_sparsity_sweep_mem_model.sh
+++ b/scripts/generate_sparsity_sweep_mem_model.sh
@@ -2,7 +2,7 @@ SECONDS=0
 
 mkdir extensor_mtx
 cd extensor_mtx
-python ../sam/onyx/synthetic/generate_fixed_nnz_mats.py
+python ../sam/onyx/synthetic/generate_fixed_nnz_mats.py --extensor
 cd ..
 ELAPSED="Elapsed: $(($SECONDS / 3600))hrs $((($SECONDS / 60) % 60))min $(($SECONDS % 60))sec"
 printf "$ELAPSED"
diff --git a/scripts/prepare_files.sh b/scripts/prepare_files.sh
index bed6796c..f14f0380 100755
--- a/scripts/prepare_files.sh
+++ b/scripts/prepare_files.sh
@@ -9,6 +9,6 @@ basedir=$(pwd)
 
 rm -rf $basedir/tiles/*
 
-./scripts/tile_ext.sh $1 memory_config_extensor_17M_llb.yaml
+./scripts/tile_ext.sh $1 $2
 
-python scripts/generate_gold_matmul_tiled.py --yaml_name memory_config_extensor_17M_llb.yaml
+python scripts/generate_gold_matmul_tiled.py --yaml_name $2

From 5968062fe822792ce92901b48d4407d884fd4afb Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 17 Feb 2023 10:26:54 -0800
Subject: [PATCH 09/39] Add in file, oops

---
 scripts/prepare_tiles_onyx.sh | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100755 scripts/prepare_tiles_onyx.sh

diff --git a/scripts/prepare_tiles_onyx.sh b/scripts/prepare_tiles_onyx.sh
new file mode 100755
index 00000000..7bea7baf
--- /dev/null
+++ b/scripts/prepare_tiles_onyx.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#sbatch -n 1
+#sbatch --mem 120000
+#sbatch -p lanka-v3
+#sbatch --exclusive
+
+
+basedir=$(pwd)
+yaml_fname=memory_config_onyx.yaml
+line=random_sparsity
+
+nnz=$1
+dim=$2
+echo "running for point nnz=$nnz and dimsize=$dim"
+
+export sam_home=$basedir
+export tiled_suitesparse_formatted_path=${sam_home}/tiles/matmul_ikj/formatted
+export tiled_output_path=${sam_home}/tiles/matmul_ikj/output/
+
+pushd .
+
+mkdir extensor_mtx
+cd extensor_mtx
+python ../sam/onyx/synthetic/generate_fixed_nnz_mats.py --nnz $nnz --dim $dim
+cd ..
+
+mkdir -p $path
+
+mkdir -p $basedir/tiles/
+rm -rf $basedir/tiles/*
+
+./scripts/prepare_files.sh extensor_${nnz}_${dim}.mtx $yaml_fname 
+

From b4dba5dfcbb770e02f7577ef0fd72090cbf35e21 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 7 Apr 2023 09:41:53 -0700
Subject: [PATCH 10/39] Add in fixes to tile script and add in a suitesparse
 runner

---
 sam/sim/src/tiling/tile.py                 |  7 ++--
 scripts/suitesparse_memory_model_runner.sh | 38 ++++++++++++++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)
 create mode 100755 scripts/suitesparse_memory_model_runner.sh

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 2ff1afe4..79d773d5 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -12,8 +12,9 @@
 from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter
 from sam.sim.src.tiling.process_expr import parse_all, update_dict
 
-SAM_STRS = {"matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)"}
-
+SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
+            "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
+            "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)"}
 
 def print_dict(dd):
     for k, v in dd.items():
@@ -275,7 +276,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
     parser = argparse.ArgumentParser(description='Tile matrices')
     parser.add_argument("--input_tensor", type=str, default=None)
     parser.add_argument("--gen_tensor", action="store_true")
-    parser.add_argument("--cotile", type=str, default=None)
+    parser.add_argument("--cotile", type=str, default=None, description="Name of kernel if it needs to be cotiled")
     parser.add_argument("--output_dir_path", type=str, default="./tiles")
     parser.add_argument("--hw_config", type=str, default=None)
     parser.add_argument("--multilevel", action="store_true")
diff --git a/scripts/suitesparse_memory_model_runner.sh b/scripts/suitesparse_memory_model_runner.sh
new file mode 100755
index 00000000..0e9c2dd0
--- /dev/null
+++ b/scripts/suitesparse_memory_model_runner.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+#SBATCH -N 1
+#SBATCH --mem 120000
+#SBATCH -p lanka-v3
+#SBATCH --exclusive
+
+benchout=memory_model_out
+
+basedir=$(pwd)
+bench=matmul_ikj_tile_pipeline_final
+yaml_fname=memory_config_onyx.yaml
+path=$basedir/$benchout
+
+fname=$1
+
+echo "Running for suitesparse $fname"
+
+export SAM_HOME=$basedir
+export TILED_SUITESPARSE_FORMATTED_PATH=${SAM_HOME}/tiles/matmul_ikj/formatted
+export TILED_OUTPUT_PATH=${SAM_HOME}/tiles/matmul_ikj/output/
+
+pushd .
+
+mkdir -p $path
+
+mkdir -p $basedir/tiles/
+rm -rf $basedir/tiles/*
+
+./scripts/prepare_files.sh $fname.mtx $yaml_fname 
+
+cd $basedir/sam/sim
+pytest test/advanced-simulator/test_$bench.py --ssname $fname -s --check-gold --skip-empty --nbuffer --yaml_name=$yaml_fname  --benchmark-json=$path/mem_model_$fname.json 
+
+python $basedir/scripts/converter.py --json_name $path/mem_model_$fname.json	
+
+python3 $basedir/scripts/bench_csv_aggregator.py $path $basedir/$benchout/$bench.csv
+
+popd

From cc9667fff404824e9678e12be0c7f9185d2ffdbb Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Fri, 26 May 2023 09:25:05 -0700
Subject: [PATCH 11/39] Add in elemmul to cpu runs

---
 compiler/baseline.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/compiler/baseline.cpp b/compiler/baseline.cpp
index 9919a762..f911d9b0 100644
--- a/compiler/baseline.cpp
+++ b/compiler/baseline.cpp
@@ -315,7 +315,8 @@ enum SuiteSparseOp {
     SDDMM = 4,
     MATTRANSMUL = 5,
     RESIDUAL = 6,
-    MMADD = 7
+    MMADD = 7,
+    MMMUL = 8
 };
 
 std::string opName(SuiteSparseOp op) {
@@ -341,6 +342,9 @@ std::string opName(SuiteSparseOp op) {
         case MMADD: {
             return "mmadd";
         }
+	case MMMUL: {
+	    return "mmmul"
+	}
         default:
             return "";
     }
@@ -467,6 +471,13 @@ static void bench_suitesparse(benchmark::State &state, SuiteSparseOp op, int fil
                 result(i, j) = ssTensor(i, j) + otherShifted(i, j);
                 break;
             }
+            case MMMUL: {
+                result = Tensor<int64_t>("result", ssTensor.getDimensions(), ssTensor.getFormat(), fill_value);
+
+                IndexVar i, j, k;
+                result(i, j) = ssTensor(i, j) * otherShifted(i, j);
+                break;
+            }
             case MATTRANSMUL: {
                 result = Tensor<int64_t>("result", {DIM1}, Format(Sparse), fill_value);
 
@@ -516,4 +527,5 @@ static void bench_suitesparse(benchmark::State &state, SuiteSparseOp op, int fil
     // TODO: need to fix for DCSC for this
     TACO_BENCH_ARGS(bench_suitesparse, mat_mattransmul, MATTRANSMUL);
     TACO_BENCH_ARGS(bench_suitesparse, matmul_spmm, SPMM);
+    TACO_BENCH_ARGS(bench_suitesparse, mat_elemmul, MMMUL);
 

From 95562954d327b44ed6b90978ecf4c03de1eebf7b Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Wed, 19 Jul 2023 10:34:16 -0700
Subject: [PATCH 12/39] tiling for apps

---
 sam/sim/src/tiling/tile.py                 | 29 ++++++++++++++++++----
 scripts/prepare_files.sh                   |  2 +-
 scripts/suitesparse_memory_model_runner.sh | 16 ++++++++----
 scripts/tile_ext.sh                        |  9 ++++---
 setup_tiling_mat.py                        | 25 +++++++++++++++++++
 5 files changed, 67 insertions(+), 14 deletions(-)
 create mode 100644 setup_tiling_mat.py

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 79d773d5..7674afeb 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -6,15 +6,24 @@
 import yaml
 import copy
 import pickle
+import random
 
 from itertools import compress
 from pathlib import Path
+
+import sys
+custom_path = '/home/avb03/sam'
+sys.path.append(custom_path)
+
 from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter
 from sam.sim.src.tiling.process_expr import parse_all, update_dict
 
 SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
             "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
-            "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)"}
+            "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
+            "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
+            "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
+            "mat_mattransmul": "X(i,j)=B(i,j)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss -f=d:ss  -s=reorder(i,j,k)"}
 
 def print_dict(dd):
     for k, v in dd.items():
@@ -186,8 +195,17 @@ def get_other_tensors(app_str, tensor):
 
     elif "mat_sddmm" in app_str:
         pass
-    elif "mat_mattransmul" in app_str or "mat_residual" in app_str:
-        pass
+    elif "mat_mattransmul" in app_str:
+        print("Writing other tensors...")
+        rows, cols = tensor.shape # i,j
+        tensor_c = scipy.sparse.random(cols, 1)
+        tensor_d = scipy.sparse.random(rows, 1)
+
+        tensors.append(tensor_c)
+        tensors.append(tensor_d)
+
+    elif "mat_residual" in app_str:
+        pass    
     elif "mat_vecmul" in app_str:
         pass
     else:
@@ -202,6 +220,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
     tensors = get_other_tensors(app_str, tensors[0])
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
+    print(ivars)
 
     sizes_dict = {}
     for i, name in enumerate(names):
@@ -275,8 +294,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Tile matrices')
     parser.add_argument("--input_tensor", type=str, default=None)
-    parser.add_argument("--gen_tensor", action="store_true")
-    parser.add_argument("--cotile", type=str, default=None, description="Name of kernel if it needs to be cotiled")
+    parser.add_argument("--gen_tensor", action="store_false")
+    parser.add_argument("--cotile", type=str, default=None)
     parser.add_argument("--output_dir_path", type=str, default="./tiles")
     parser.add_argument("--hw_config", type=str, default=None)
     parser.add_argument("--multilevel", action="store_true")
diff --git a/scripts/prepare_files.sh b/scripts/prepare_files.sh
index f14f0380..4e3c82ed 100755
--- a/scripts/prepare_files.sh
+++ b/scripts/prepare_files.sh
@@ -11,4 +11,4 @@ rm -rf $basedir/tiles/*
 
 ./scripts/tile_ext.sh $1 $2
 
-python scripts/generate_gold_matmul_tiled.py --yaml_name $2
+python3 scripts/generate_gold_matmul_tiled.py --yaml_name $2
diff --git a/scripts/suitesparse_memory_model_runner.sh b/scripts/suitesparse_memory_model_runner.sh
index 0e9c2dd0..ccea10e6 100755
--- a/scripts/suitesparse_memory_model_runner.sh
+++ b/scripts/suitesparse_memory_model_runner.sh
@@ -7,17 +7,22 @@
 benchout=memory_model_out
 
 basedir=$(pwd)
-bench=matmul_ikj_tile_pipeline_final
+# bench=matmul_ijk_tile_pipeline_final
 yaml_fname=memory_config_onyx.yaml
 path=$basedir/$benchout
 
 fname=$1
 
+appname=$2
+
 echo "Running for suitesparse $fname"
 
 export SAM_HOME=$basedir
-export TILED_SUITESPARSE_FORMATTED_PATH=${SAM_HOME}/tiles/matmul_ikj/formatted
-export TILED_OUTPUT_PATH=${SAM_HOME}/tiles/matmul_ikj/output/
+# export TILED_SUITESPARSE_FORMATTED_PATH=${SAM_HOME}/tiles/matmul_ijk/formatted
+# export TILED_OUTPUT_PATH=${SAM_HOME}/tiles/matmul_ijk/output/
+
+export TILED_SUITESPARSE_FORMATTED_PATH=${SAM_HOME}/tiles/${appname}/formatted
+export TILED_OUTPUT_PATH=${SAM_HOME}/tiles/${appname}/output/
 
 pushd .
 
@@ -29,9 +34,10 @@ rm -rf $basedir/tiles/*
 ./scripts/prepare_files.sh $fname.mtx $yaml_fname 
 
 cd $basedir/sam/sim
-pytest test/advanced-simulator/test_$bench.py --ssname $fname -s --check-gold --skip-empty --nbuffer --yaml_name=$yaml_fname  --benchmark-json=$path/mem_model_$fname.json 
+# python3 -m pytest test/advanced-simulator/test_$bench.py --ssname $fname -s --check-gold --skip-empty --nbuffer --yaml_name=$yaml_fname  --benchmark-json=$path/mem_model_$fname.json 
+# pytest test/advanced-simulator/test_$bench.py --ssname $fname -s --check-gold --skip-empty --nbuffer --yaml_name=$yaml_fname  --benchmark-json=$path/mem_model_$fname.json 
 
-python $basedir/scripts/converter.py --json_name $path/mem_model_$fname.json	
+# python3 $basedir/scripts/converter.py --json_name $path/mem_model_$fname.json	
 
 python3 $basedir/scripts/bench_csv_aggregator.py $path $basedir/$benchout/$bench.csv
 
diff --git a/scripts/tile_ext.sh b/scripts/tile_ext.sh
index 2c8a54e3..46c56e70 100755
--- a/scripts/tile_ext.sh
+++ b/scripts/tile_ext.sh
@@ -1,7 +1,10 @@
 #!/bin/bash
 
 BENCHMARKS=(
-  matmul_ikj
+#   matmul_ijk
+#   mat_elemadd
+	# mat_elemmul
+	mat_mattransmul
 )
 
 # THIS FILE MUST BE RUN FROM sam/ location
@@ -22,10 +25,10 @@ for b in ${!BENCHMARKS[@]}; do
 	rm -rf $basedir/tiles/*
 
 	echo "Tiling mtx file"
-	python $basedir/sam/sim/src/tiling/tile.py --extensor --input_path $ext_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
+	python3 $basedir/sam/sim/src/tiling/tile.py --extensor --input_path $ext_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
 
 	echo "Generating input format files for $ext_path..."
-	python $basedir/scripts/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
+	python3 $basedir/scripts/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
 
 done
 
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
new file mode 100644
index 00000000..580dedcf
--- /dev/null
+++ b/setup_tiling_mat.py
@@ -0,0 +1,25 @@
+import subprocess
+import glob
+import shutil
+import os
+
+data = ['rel5']
+# app_name = "mat_elemadd"
+app_name = "mat_elemmul"
+# data_file = open("scripts/tensor_names/suitesparse_valid_mid50.txt")
+# data_file_lines = data_file.readlines()
+# for line in data_file_lines:
+#    data.append(line[:-1])
+
+for datum in data:
+   mtx_file = glob.glob(f"/nobackup/owhsu/sparse-datasets/suitesparse/{datum}.mtx")[0]
+   shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
+   
+   command = f"./scripts/suitesparse_memory_model_runner.sh {datum} {app_name}"
+   os.system(command)
+
+   copy_rename = f"cp -r tiles/{app_name} tiles_compiled/{app_name}_{datum}"
+   os.system(copy_rename)
+
+   docker_copy_command = f"docker cp tiles_compiled/{app_name}_{datum} avb03-sparse-tiling:/aha/garnet/tiles_{app_name}_{datum}"
+   os.system(docker_copy_command) 
\ No newline at end of file

From 3074adeaba1aab09f237e278fdb22af3f114ae82 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Wed, 19 Jul 2023 16:31:51 -0700
Subject: [PATCH 13/39] mat_mattransmul prob

---
 sam/sim/src/tiling/tile.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 7674afeb..942035b2 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -23,12 +23,17 @@
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
-            "mat_mattransmul": "X(i,j)=B(i,j)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss -f=d:ss  -s=reorder(i,j,k)"}
+            "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)"}
 
 def print_dict(dd):
     for k, v in dd.items():
         print(k, ":", v)
 
+def print_ast(node):
+    for child in ast.iter_child_nodes(node):
+        print_ast(child)
+    print(node)
+
 
 def get_ivars(names, expr):
     [lhs, rhs] = expr.split("=")
@@ -95,7 +100,12 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
     tile_sizes = dict()
     order = len(tensor.shape)
 
-    tensor_points = tensor.todok()
+    tensor_coo = scipy.sparse.coo_matrix(tensor)
+    tensor_points = tensor_coo.todok()
+
+    print("ivar_map: ", ivar_map)
+    print("split_map: ", split_map)
+    print("order = ", order)
 
     new_shape = []
     for lvl in range(order):
@@ -158,10 +168,15 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
         tensor_format = permutation_strs[i]
         ivar_map = dict()
         order = len(tensor.shape)
+        print("order is ", order)
         for dim in range(order):
+            print("tensor format: ", tensor_format)
+            print("dim is ", dim)
+            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim+1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
+            print("ivar_map is ", ivar_map)
 
         tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
         tiled_tensors[tensor_name] = tiles
@@ -198,8 +213,8 @@ def get_other_tensors(app_str, tensor):
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
         rows, cols = tensor.shape # i,j
-        tensor_c = scipy.sparse.random(cols, 1)
-        tensor_d = scipy.sparse.random(rows, 1)
+        tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
+        tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
 
         tensors.append(tensor_c)
         tensors.append(tensor_d)
@@ -220,8 +235,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
     tensors = get_other_tensors(app_str, tensors[0])
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
-    print(ivars)
 
+    import pdb; pdb.set_trace();
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -257,6 +272,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 
                 if cotiled is None:
                     # First iteration of tiling
+                    print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
+                    print("format_permutations: ", format_permutations)
                     cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map)
                 else:
                     # recursively tile the blocks

From 5df6eacaebb0589b12f9d713890149ab93f6a145 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Wed, 19 Jul 2023 16:36:50 -0700
Subject: [PATCH 14/39] setup script added

---
 setup_tiling_mat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 580dedcf..4d156366 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -5,7 +5,7 @@
 
 data = ['rel5']
 # app_name = "mat_elemadd"
-app_name = "mat_elemmul"
+app_name = "mat_mattransmul"
 # data_file = open("scripts/tensor_names/suitesparse_valid_mid50.txt")
 # data_file_lines = data_file.readlines()
 # for line in data_file_lines:

From ceb4ff17b6a44ece7c16ed707a3c7a2d5b12d49b Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Wed, 26 Jul 2023 17:29:33 -0700
Subject: [PATCH 15/39] Add in tiling script to tile tensors for mattransmul.
 Frostt tensors are IP

---
 sam/sim/src/tiling/tile.py | 182 ++++++++++++++++++++++++++++++-------
 sam/util.py                |  31 ++++---
 scripts/tile_ext.sh        |   2 +-
 setup_tiling_mat.py        |   8 +-
 4 files changed, 173 insertions(+), 50 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 942035b2..462da75d 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -7,6 +7,7 @@
 import copy
 import pickle
 import random
+import sparse
 
 from itertools import compress
 from pathlib import Path
@@ -15,7 +16,7 @@
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
-from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter
+from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, PydataSparseTensorDumper, SUITESPARSE_PATH, FROSTT_PATH
 from sam.sim.src.tiling.process_expr import parse_all, update_dict
 
 SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
@@ -88,6 +89,72 @@ def parse_sam_input(string):
     ivars = [ivars[tensor] for tensor in tensors]
     return tensors, permutations, ivars
 
+# Outputs Pydata/sparse tensor tiles, given a pydata/sparse tensor (DOK or COO)
+# ASSUME: tensor is a scipy.sparse.coo_matrix
+# TODO: new_ivar_order right now is assumed to be one fixed order
+#       In the future, will have to take into acocunt all reorderings
+def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None):
+    human_readable = False
+
+    tiles = dict()
+    tile_sizes = dict()
+    order = len(tensor.shape)
+
+    tensor_coo = sparse.COO(tensor)
+    tensor_points = sparse.DOK.from_coo(tensor_coo)
+
+    print("ivar_map: ", ivar_map)
+    print("split_map: ", split_map)
+    print("order = ", order)
+
+    new_shape = []
+    for lvl in range(order):
+        ivar = ivar_map[lvl]
+        sf = split_map[ivar]
+        new_shape.append(sf)
+
+    for crds, val in tensor_points.data.items():
+        point = list(crds)
+
+        new_point = []
+        tile_id = []
+        for lvl in range(order):
+            ivar = ivar_map[lvl]
+            sf = split_map[ivar]
+
+            new_point.append(point[lvl] % sf)
+            tile_id.append(int(point[lvl] / sf))
+
+        # Add in value to the new_point as well
+        new_point.append(val)
+        tile_id = tuple(tile_id)
+
+        if tile_id in tiles:
+            tiles[tile_id].append(new_point)
+        else:
+            tiles[tile_id] = [new_point]
+
+    # sort the new coo lists
+    for key, val in tiles.items():
+        if human_readable:
+            dok = sorted(val)
+        else:
+            dok = sparse.DOK(tuple(new_shape))
+            for point in val:
+                dok[tuple(point[0:-1])] = point[-1]
+
+        tiles[key] = dok
+
+    for tile_id, tile_dok in tiles.items():
+        tile = tile_dok.to_coo()
+        # FIXME: This size number isn't correct for tensor tiles
+        nonempty_rows = tile.nnz
+        nonempty_row_ind = np.where(nonempty_rows > 0)[0]
+        tile_sizes[tile_id] = tile.nnz * 2 + 2 * len(nonempty_row_ind) + 3
+
+    return tiles, tile_sizes
+
+
 
 # Outputs COO tiles, given a COO tensor
 # ASSUME: tensor is a scipy.sparse.coo_matrix
@@ -159,7 +226,7 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
 # permutation_strs: list of permutation_strs [ss01, ss10] following tensor_names (from SAM)
 # ivar_strs: list of ivar_strs ["ik", "kj"] following tensor_names (from SAM)
 # split_map: dictionary of split factors (from hardware)
-def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
+def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, higher_order=False):
     tiled_tensors = dict()
     tiled_tensor_sizes = dict()
 
@@ -178,7 +245,11 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
             ivar_map[lvl_permutation] = ivar
             print("ivar_map is ", ivar_map)
 
-        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
+        if higher_order:
+            tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map)
+        else:
+            tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
+        
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
@@ -231,7 +302,7 @@ def get_other_tensors(app_str, tensor):
     return tensors
 
 
-def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
+def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, higher_order=False):
     tensors = get_other_tensors(app_str, tensors[0])
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
@@ -274,7 +345,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
                     # First iteration of tiling
                     print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
                     print("format_permutations: ", format_permutations)
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map)
+                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map, higher_order)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
@@ -284,9 +355,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
                         new_cotiled[name] = {}
                         new_cotiled_sizes[name] = {}
                         for tile_id, tile in cotiled[name].items():
-                            new_cotiled_temp, new_cotiled_sizes_temp = cotile_coo(name, [tile.tocoo()],
+                            if higher_order:
+                                tile_in_coo = tile.to_coo()
+                            else:
+                                tile_in_coo = tile.tocoo()
+                            new_cotiled_temp, new_cotiled_sizes_temp = cotile_coo(name, [tile_in_coo],
                                                                                   [format_permutations[i]], [ivars[i]],
-                                                                                  split_map)
+                                                                                  split_map, higher_order)
 
                             for kk, vv in copy.deepcopy(new_cotiled_temp)[name].items():
                                 new_tile_id = tuple(list(tile_id) + list(kk))
@@ -309,31 +384,66 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 inputCache = InputCacheSuiteSparse()
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Tile matrices')
-    parser.add_argument("--input_tensor", type=str, default=None)
-    parser.add_argument("--gen_tensor", action="store_false")
-    parser.add_argument("--cotile", type=str, default=None)
-    parser.add_argument("--output_dir_path", type=str, default="./tiles")
-    parser.add_argument("--hw_config", type=str, default=None)
-    parser.add_argument("--multilevel", action="store_true")
-    parser.add_argument("--input_path", type=str, default=None)
-    parser.add_argument("--extensor", action="store_true")
+    parser = argparse.ArgumentParser(description='script that tiles tensors')
+    parser.add_argument("--tensor_type", choices=['ex', 'gen', 'file', 'ss', 'frostt'], help='The \
+        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map) \
+        type of tensor to tile: extensor(ex), generated (gen), \
+        SuiteSparse (ss), FROSTT (frostt), or input file (file)')
+    parser.add_argument("--higher_order", action="store_true", help="If \
+    true then we want to process a higher-order tensor. With higher-order set to true, if \
+    'tensor_type' is: \
+    \n 'gen' then a 3-tensor is generated instead of matrix. \
+    \n 'file' then a .tns file is read instead of a .mtx file. \
+    \n 'ss' then other matrices used with SuiteSparse are .tns instead of .mtx files. \
+    \n 'frostt' should always have 'higher_order' set as true.")
+
+    parser.add_argument("--input_tensor", type=str, default=None,
+            help="Input tensor NAME if tensor_type is set to 'file'. \
+            This is for use with SuiteSparse or FROSTT")
+    parser.add_argument("--input_path", type=str, default=None, help="Input tensor path")
+    parser.add_argument("--output_dir_path", type=str, default="./tiles",
+            help='Output path, directory where tiles get written to')
+    parser.add_argument("--hw_config", type=str, default=None,
+            help='Path to the hardware config yaml')
+
+    parser.add_argument("--cotile", type=str, default=None, help='If \
+            this is true cotile multiple tensors, else tile one tensor only')
+    parser.add_argument("--multilevel", action="store_true", help='If \
+            multilevel is true there will exist more than one level of tiles, \
+            else only tile once')
+    parser.add_argument("--seed", type=int, default=0, help="Random seed")
 
     args = parser.parse_args()
 
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+
     tensor = None
     cwd = os.getcwd()
-    if args.gen_tensor:
-        tensor = scipy.sparse.random(16, 16)
-    elif args.extensor:
+    if args.tensor_type == "gen":
+        if args.higher_order:
+            tensor = sparse.COO(sparse.random((16, 16, 16)))
+        else:
+            tensor = scipy.sparse.random(16, 16)
+    elif args.tensor_type == "ex":
         tensor = scipy.io.mmread(args.input_path)
+    elif args.tensor_type == "ss":
+            assert args.input_tensor is not None
+            tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
+            ss_tensor = SuiteSparseTensor(tensor_path)
+            tensor = inputCache.load(ss_tensor, False)
+    elif args.tensor_type == "frostt":
+            assert args.input_tensor is not None
+            assert args.higher_order
+
+            tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
+
+            # FIXME: This is broken
+            frostt_tensor = FrosttTensor(tensor_path)
+            tensor = inputCache.load(ss_tensor, False)
+
     else:
-        assert args.input_tensor is not None
-        SS_PATH = os.getenv('SUITESPARSE_PATH', default=os.path.join(cwd, 'suitesparse'))
-        # print("PATH:", SS_PATH)
-        tensor_path = os.path.join(SS_PATH, args.input_tensor + ".mtx")
-        ss_tensor = SuiteSparseTensor(tensor_path)
-        tensor = inputCache.load(ss_tensor, False)
+        raise ValueError("This choice of 'tensor_type' is unreachable") 
 
     split_map = {"i": 16, "j": 16, "k": 16}
 
@@ -345,7 +455,6 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
         print("TILES:")
         print_dict(tiles)
     else:
-
         output_mtx_name = os.path.join(args.output_dir_path, args.cotile, "mtx")
         output_mtx_path = Path(output_mtx_name)
         output_mtx_path.mkdir(parents=True, exist_ok=True)
@@ -354,21 +463,30 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
         if args.multilevel:
             assert args.cotile is not None
             cotiled_tensors = cotile_multilevel_coo(args.cotile, args.hw_config, [tensor],
-                                                    os.path.join(args.output_dir_path, args.cotile))
+                                                    os.path.join(args.output_dir_path,
+                                                        args.cotile),
+                                                    args.higher_order)
         elif args.cotile is not None:
             tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
             names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-            cotiled_tensors = cotile_coo(names, [tensor, tensor2], format_permutations, ivars, split_map)
+            cotiled_tensors = cotile_coo(names, [tensor, tensor2],
+                    format_permutations, ivars, split_map, args.higher_order)
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
         for name in names:
             for tile_id, tile in cotiled_tensors[name].items():
                 [str(item) for item in tile_id]
-                filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id]) + ".mtx"
+                filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id])
+                filename += ".tns" if args.higher_order else ".mtx"
                 mtx_path_name = os.path.join(output_mtx_name, filename)
                 print(tile)
-                print(mtx_path_name, cwd)
-                scipy.io.mmwrite(mtx_path_name, tile)
-                print(os.path.exists(mtx_path_name))
+                print("Output path:", mtx_path_name)
+
+                if args.higher_order:
+                    tns_dumper = PydataSparseTensorDumper()
+                    print(tile.shape)
+                    tns_dumper.dump(tile, mtx_path_name)
+                else:
+                    scipy.io.mmwrite(mtx_path_name, tile)
diff --git a/sam/util.py b/sam/util.py
index 7044e94d..1d7588a4 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -143,21 +143,22 @@ def load(self, path):
 
 # PydataSparseTensorLoader loads a sparse tensor from a file into
 # a pydata.sparse tensor.
-# class PydataSparseTensorLoader:
-#     def __init__(self):
-#         self.loader = TnsFileLoader()
-#
-#     def load(self, path):
-#         dims, coords, values = self.loader.load(path)
-#         return sparse.COO(coords, values, tuple(dims))
-#
-# # PydataSparseTensorDumper dumps a sparse tensor to a the desired file.
-# class PydataSparseTensorDumper:
-#     def __init__(self):
-#         self.dumper = TnsFileDumper()
-#
-#     def dump(self, tensor, path):
-#         self.dumper.dump_dict_to_file(tensor.shape, sparse.DOK(tensor).data, path)
+class PydataSparseTensorLoader:
+    def __init__(self):
+        self.loader = TnsFileLoader()
+
+    def load(self, path):
+        dims, coords, values = self.loader.load(path)
+        return sparse.COO(coords, values, tuple(dims))
+
+# PydataSparseTensorDumper dumps a sparse tensor to a the desired file.
+class PydataSparseTensorDumper:
+    def __init__(self):
+        self.dumper = TnsFileDumper()
+
+    def dump(self, tensor, path):
+        assert isinstance(tensor, sparse.DOK), "The tensor needs to be a pydata/sparse DOK format"
+        self.dumper.dump_dict_to_file(tensor.shape, tensor.data, path)
 #
 #
 #
diff --git a/scripts/tile_ext.sh b/scripts/tile_ext.sh
index 46c56e70..1df28a71 100755
--- a/scripts/tile_ext.sh
+++ b/scripts/tile_ext.sh
@@ -25,7 +25,7 @@ for b in ${!BENCHMARKS[@]}; do
 	rm -rf $basedir/tiles/*
 
 	echo "Tiling mtx file"
-	python3 $basedir/sam/sim/src/tiling/tile.py --extensor --input_path $ext_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
+	python3 $basedir/sam/sim/src/tiling/tile.py --tensor_type dataset --input_path $ext_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
 
 	echo "Generating input format files for $ext_path..."
 	python3 $basedir/scripts/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 4d156366..0c646b2d 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -3,6 +3,8 @@
 import shutil
 import os
 
+from sam.util import SUITESPARSE_PATH
+
 data = ['rel5']
 # app_name = "mat_elemadd"
 app_name = "mat_mattransmul"
@@ -12,14 +14,16 @@
 #    data.append(line[:-1])
 
 for datum in data:
-   mtx_file = glob.glob(f"/nobackup/owhsu/sparse-datasets/suitesparse/{datum}.mtx")[0]
+   mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]
+   os.makedirs("extensor_mtx", exist_ok=True)
    shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
    
    command = f"./scripts/suitesparse_memory_model_runner.sh {datum} {app_name}"
    os.system(command)
 
+   os.makedirs("tiles_compiled", exist_ok=True)
    copy_rename = f"cp -r tiles/{app_name} tiles_compiled/{app_name}_{datum}"
    os.system(copy_rename)
 
    docker_copy_command = f"docker cp tiles_compiled/{app_name}_{datum} avb03-sparse-tiling:/aha/garnet/tiles_{app_name}_{datum}"
-   os.system(docker_copy_command) 
\ No newline at end of file
+   os.system(docker_copy_command) 

From 9e1b84d1c21d0e2868e319c08fda5de8057da6c9 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Wed, 26 Jul 2023 17:46:14 -0700
Subject: [PATCH 16/39] Add pydata/sparse to requirements for CI

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 1b7cab3d..8671a953 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -25,5 +25,6 @@ PyYAML==6.0
 requests==2.28.2
 scipy==1.10.0
 six==1.16.0
+sparse==0.13.0
 tomli==2.0.1
 tqdm==4.64.1

From 7a06b1f9a5830456e24629e0db7a17a71e2abd5e Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Wed, 26 Jul 2023 17:54:45 -0700
Subject: [PATCH 17/39] Update gitignore

---
 .gitignore                 | 12 ++++---
 sam/sim/src/tiling/tile.py | 73 ++++++++++++++++++++------------------
 sam/util.py                | 23 ++++++------
 3 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/.gitignore b/.gitignore
index 82490b68..74d80687 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,9 @@ __pycache__/
 *.txt
 *.out
 
+# Generated Python Package files
+*.egg-info/
+
 # Files for MacOS and IDEs
 .DS_store
 .idea/
@@ -38,13 +41,14 @@ compiler/benchmark/
 # Generated SAM simulator tests
 */sim/test/apps/test_*.py
 
-*.gv
-
+# Tensor files
 *.mtx
 *.tns
 
-# Temporary matrices
+# Temporary or generated tensor directories 
 tmp_mat*/
+tiles/
+synthetic/
 
 # Jupyter notebook checkpoints
 .ipynb_checkpoints/
@@ -52,5 +56,3 @@ tmp_mat*/
 # Generated formatted tensor files
 tensor_*_mode_*
 
-# Tensor files
-tiles/
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 462da75d..f278baf7 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -1,35 +1,38 @@
-import numpy as np
-import scipy.sparse
-import os
 import argparse
 import ast
-import yaml
 import copy
+import os
 import pickle
 import random
+import sys
+from pathlib import Path
+
+import numpy as np
+import scipy.sparse
 import sparse
+import yaml
 
-from itertools import compress
-from pathlib import Path
+from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, PydataSparseTensorDumper, \
+    SUITESPARSE_PATH, FROSTT_PATH
+from sam.sim.src.tiling.process_expr import parse_all
 
-import sys
+# FIXME: This should not be here... Set your SAM_HOME directory
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
-from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, PydataSparseTensorDumper, SUITESPARSE_PATH, FROSTT_PATH
-from sam.sim.src.tiling.process_expr import parse_all, update_dict
-
-SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
+SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)",
             "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)"}
 
+
 def print_dict(dd):
     for k, v in dd.items():
         print(k, ":", v)
 
+
 def print_ast(node):
     for child in ast.iter_child_nodes(node):
         print_ast(child)
@@ -89,6 +92,7 @@ def parse_sam_input(string):
     ivars = [ivars[tensor] for tensor in tensors]
     return tensors, permutations, ivars
 
+
 # Outputs Pydata/sparse tensor tiles, given a pydata/sparse tensor (DOK or COO)
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
@@ -155,7 +159,6 @@ def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None):
     return tiles, tile_sizes
 
 
-
 # Outputs COO tiles, given a COO tensor
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
@@ -239,7 +242,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
         for dim in range(order):
             print("tensor format: ", tensor_format)
             print("dim is ", dim)
-            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim+1])
+            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
@@ -249,7 +252,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
             tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map)
         else:
             tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
-        
+
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
@@ -283,7 +286,7 @@ def get_other_tensors(app_str, tensor):
         pass
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
-        rows, cols = tensor.shape # i,j
+        rows, cols = tensor.shape  # i,j
         tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
         tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
 
@@ -291,7 +294,7 @@ def get_other_tensors(app_str, tensor):
         tensors.append(tensor_d)
 
     elif "mat_residual" in app_str:
-        pass    
+        pass
     elif "mat_vecmul" in app_str:
         pass
     else:
@@ -307,7 +310,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-    import pdb; pdb.set_trace();
+    import pdb
+    pdb.set_trace()
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -345,7 +349,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                     # First iteration of tiling
                     print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
                     print("format_permutations: ", format_permutations)
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map, higher_order)
+                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map,
+                                                        higher_order)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
@@ -398,13 +403,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
     \n 'frostt' should always have 'higher_order' set as true.")
 
     parser.add_argument("--input_tensor", type=str, default=None,
-            help="Input tensor NAME if tensor_type is set to 'file'. \
+                        help="Input tensor NAME if tensor_type is set to 'file'. \
             This is for use with SuiteSparse or FROSTT")
     parser.add_argument("--input_path", type=str, default=None, help="Input tensor path")
     parser.add_argument("--output_dir_path", type=str, default="./tiles",
-            help='Output path, directory where tiles get written to')
+                        help='Output path, directory where tiles get written to')
     parser.add_argument("--hw_config", type=str, default=None,
-            help='Path to the hardware config yaml')
+                        help='Path to the hardware config yaml')
 
     parser.add_argument("--cotile", type=str, default=None, help='If \
             this is true cotile multiple tensors, else tile one tensor only')
@@ -428,22 +433,22 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
     elif args.tensor_type == "ex":
         tensor = scipy.io.mmread(args.input_path)
     elif args.tensor_type == "ss":
-            assert args.input_tensor is not None
-            tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
-            ss_tensor = SuiteSparseTensor(tensor_path)
-            tensor = inputCache.load(ss_tensor, False)
+        assert args.input_tensor is not None
+        tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
+        ss_tensor = SuiteSparseTensor(tensor_path)
+        tensor = inputCache.load(ss_tensor, False)
     elif args.tensor_type == "frostt":
-            assert args.input_tensor is not None
-            assert args.higher_order
+        assert args.input_tensor is not None
+        assert args.higher_order
 
-            tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
+        tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
 
-            # FIXME: This is broken
-            frostt_tensor = FrosttTensor(tensor_path)
-            tensor = inputCache.load(ss_tensor, False)
+        # FIXME: This is broken
+        frostt_tensor = FrosttTensor(tensor_path)
+        tensor = inputCache.load(frostt_tensor, False)
 
     else:
-        raise ValueError("This choice of 'tensor_type' is unreachable") 
+        raise ValueError("This choice of 'tensor_type' is unreachable")
 
     split_map = {"i": 16, "j": 16, "k": 16}
 
@@ -464,14 +469,14 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             assert args.cotile is not None
             cotiled_tensors = cotile_multilevel_coo(args.cotile, args.hw_config, [tensor],
                                                     os.path.join(args.output_dir_path,
-                                                        args.cotile),
+                                                                 args.cotile),
                                                     args.higher_order)
         elif args.cotile is not None:
             tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
             names, format_permutations, ivars = parse_sam_input(args.cotile)
 
             cotiled_tensors = cotile_coo(names, [tensor, tensor2],
-                    format_permutations, ivars, split_map, args.higher_order)
+                                         format_permutations, ivars, split_map, args.higher_order)
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
diff --git a/sam/util.py b/sam/util.py
index 1d7588a4..ff3d29bf 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -1,24 +1,20 @@
-import scipy.sparse
-import scipy.io
-import os
 import glob
-import numpy
 import itertools
-import shutil
-import numpy as np
 import math
-import sparse 
-
-from pathlib import Path
+import os
+import shutil
 from dataclasses import dataclass
+from pathlib import Path
 
-import os
-import math
 import numpy
+import numpy as np
+import scipy.io
+import scipy.sparse
+import sparse
 
 # All environment variables for SAM should live here or in make file
 cwd = os.getcwd()
-SAM_HOME = os.getenv('HOSTNAME', default=cwd)
+SAM_HOME = os.getenv('SAM_HOME', default=cwd)
 HOSTNAME = os.getenv('HOSTNAME', default="local")
 SUITESPARSE_PATH = os.getenv('SUITESPARSE_PATH', default=os.path.join(SAM_HOME, "data", "suitesparse"))
 SUITESPARSE_FORMATTED_PATH = os.getenv('SUITESPARSE_FORMATTED_PATH', default=os.path.join(SAM_HOME, "data",
@@ -151,6 +147,7 @@ def load(self, path):
         dims, coords, values = self.loader.load(path)
         return sparse.COO(coords, values, tuple(dims))
 
+
 # PydataSparseTensorDumper dumps a sparse tensor to a the desired file.
 class PydataSparseTensorDumper:
     def __init__(self):
@@ -159,6 +156,8 @@ def __init__(self):
     def dump(self, tensor, path):
         assert isinstance(tensor, sparse.DOK), "The tensor needs to be a pydata/sparse DOK format"
         self.dumper.dump_dict_to_file(tensor.shape, tensor.data, path)
+
+
 #
 #
 #

From 1b45a01e952656b52034005dfab5e2361850773f Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Wed, 26 Jul 2023 18:13:48 -0700
Subject: [PATCH 18/39] Fix pycodestyle for CI

---
 sam/sim/src/tiling/tile.py |  7 ++++---
 sam/util.py                | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index f278baf7..73ee06fd 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -12,8 +12,8 @@
 import sparse
 import yaml
 
-from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, PydataSparseTensorDumper, \
-    SUITESPARSE_PATH, FROSTT_PATH
+from sam.util import SUITESPARSE_PATH, SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, \
+    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor
 from sam.sim.src.tiling.process_expr import parse_all
 
 # FIXME: This should not be here... Set your SAM_HOME directory
@@ -387,6 +387,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
 
 inputCache = InputCacheSuiteSparse()
+inputCacheTensor = InputCacheTensor()
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='script that tiles tensors')
@@ -445,7 +446,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
         # FIXME: This is broken
         frostt_tensor = FrosttTensor(tensor_path)
-        tensor = inputCache.load(frostt_tensor, False)
+        tensor = inputCacheTensor.load(frostt_tensor, False)
 
     else:
         raise ValueError("This choice of 'tensor_type' is unreachable")
diff --git a/sam/util.py b/sam/util.py
index ff3d29bf..480b4101 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -546,6 +546,20 @@ def load(self, tensor, suiteSparse, cast, format_str):
             return self.tensor
 
 
+# FrosttTensor represents a tensor in the FROSTT dataset.
+class FrosttTensor:
+    def __init__(self, path):
+        self.path = path
+        self.__name__ = self.__str__()
+
+    def __str__(self):
+        f = os.path.split(self.path)[1]
+        return f.replace(".tns", "")
+
+    def load(self):
+        return PydataSparseTensorLoader().load(self.path)
+
+
 # PydataMatrixMarketTensorLoader loads tensors in the matrix market format
 # into pydata.sparse matrices.
 # class PydataMatrixMarketTensorLoader:

From 875569dca8578277e0e24167a4f566076d91891f Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Wed, 26 Jul 2023 18:22:06 -0700
Subject: [PATCH 19/39] spmv sparsity sweep stuff

---
 generate_spmv_sparsity_sweep.py            | 164 +++++++++++++++++++
 sam/sim/src/tiling/memory_config_onyx.yaml |   2 +-
 sam/sim/src/tiling/tile.py                 |  99 ++++++++++--
 sam/util.py                                |  13 +-
 scripts/datastructure_suitesparse.py       |   7 +-
 scripts/datastructure_tns.py               | 180 ++++++++++++++++-----
 scripts/datastructure_tns_old.py           | 127 +++++++++++++++
 scripts/generate_gold_matmul_tiled.py      |  17 +-
 scripts/generate_suitesparse_formats.sh    |  20 +--
 scripts/prepare_files.sh                   |   2 +-
 setup_tiling_mat.py                        |   6 +
 spmv_sparsity_sweep.py                     |  42 +++++
 12 files changed, 605 insertions(+), 74 deletions(-)
 create mode 100644 generate_spmv_sparsity_sweep.py
 create mode 100644 scripts/datastructure_tns_old.py
 create mode 100644 spmv_sparsity_sweep.py

diff --git a/generate_spmv_sparsity_sweep.py b/generate_spmv_sparsity_sweep.py
new file mode 100644
index 00000000..3d2f00e0
--- /dev/null
+++ b/generate_spmv_sparsity_sweep.py
@@ -0,0 +1,164 @@
+#script to generate 50 random 3D tensors (seeded, produces same 50 each time)
+import numpy as np
+import random
+import os
+import scipy.io as sio
+import scipy.sparse as sps
+# from scipy.io import mmread
+
+# Set the seed value
+#previously used to be this: seed_value = 42
+seed_value = 100
+random.seed(seed_value)
+np.random.seed(seed_value)
+
+#generating matrix dimensions and storing results in an array, array size is 2, 1 matrix and 2 dimensions per matrix
+
+#conditions which need to be met for each set of 3 tensor dimensions: no dimension can't be 0, and can't have a tensor with more than 900 elements (meaning dimension1*dimension2*dimension3 <= 900)
+#note try to make it so no dimension is 1 or 2 (gives slight issues later, esp 2nd and 3rd dimensions)
+dimensions = [0] * 2
+dimensions_onematrix = [0] * 2
+
+# x goes from 0 to __ (before 0 to 49)
+for x in range(1):
+    # dimensions_onematrix[0] = random.randint(1,60)
+    # dimensions_onematrix[1] = random.randint(3,60)
+
+    # while((dimensions_onetensor[0]*dimensions_onetensor[1]*dimensions_onetensor[2])>901):
+    #     dimensions_onematrix[0] = random.randint(1,60)
+    #     dimensions_onematrix[1] = random.randint(3,60)
+    #     dimensions_onematrix[2] = random.randint(3,60)
+    dimensions_onematrix[0] = 10
+    dimensions_onematrix[1] = 10
+
+    dimensions[x*3] = dimensions_onematrix[0]
+    dimensions[(x*3)+1] = dimensions_onematrix[1]
+
+    dimensions_onematrix[0] = 0
+    dimensions_onematrix[1] = 0
+    #print('\n')
+
+
+#Generating matrix values based on the dimensions now stored in the dimensions (2 elem) array
+#i goes from 0 to __ (before 0 to 49)
+matrix_num = 1
+randomNumber = 0
+numToInsert = 0
+countnnz = 0
+#can add in as many sparsity numbers here (num elements in the sparsities array = num matrices being generated)
+sparsities = [0.5]
+# NEED TO CHANGE suitesparse_path for this to work: frostt_path = os.environ['FROSTT_PATH']
+ss_path = ''
+for i in range(1):
+    filename = os.path.join(ss_path, "rand_matrix"+str(matrix_num)+".mtx")
+    sparsity = sparsities[i]
+    f = open(filename, "w")
+    f.write('\n')
+    lineToAddInFile = ""
+    arr = np.zeros([dimensions[i*3],dimensions[(i*3)+1]], dtype=int)
+    for x in range(len(arr)):
+        for y in range(len(arr[x])):
+            #TO CHANGE SPARSITY: generate random number from 1 to 9; if 1,2,3,7,8,9 don't add a num in, only add if 4,5,6
+            # randomNumber = random.randint(1,9)
+            randomNumber = random.random()
+            if(randomNumber>sparsity):
+                numToInsert = random.randint(1,100)
+                arr[x][y] = numToInsert
+                numToInsert = 0
+                randomNumber = 0
+            #print(arr[x][y][z])
+            if(arr[x][y]!=0):
+                #tensor files are not 0 indexed - say want to insert a point at (0,0,0), then need to feed in (1,1,1) to the tensor file to insert at the (0,0,0) location
+                lineToAddInFile="" + str(x+1) + " " + str(y+1) + " " + str(arr[x][y])
+                countnnz += 1
+                f.write(lineToAddInFile + '\n')
+    # writing in first line in file:
+    with open(filename, 'r') as f:
+        content = f.read()
+    updated_content = ""+str(dimensions[i*3]) + " " + str(dimensions[i*3+1]) + " " + str(countnnz) + content
+    with open(filename, 'w') as f:
+        f.write(updated_content)
+
+    with open(filename, 'r') as file:
+        data = file.readlines()
+    
+    header = data.pop(0)
+    num_rows, num_cols, num_nonzeros = map(int, header.strip().split())
+    matrix_data = []
+    row_indices = []
+    col_indices = []
+    for line in data:
+        row, col, value = map(float, line.strip().split())
+        row_indices.append(int(row) - 1)  # Convert to 0-based indexing
+        col_indices.append(int(col) - 1)  # Convert to 0-based indexing
+        matrix_data.append(value)
+    matrix = sps.coo_matrix((matrix_data, (row_indices, col_indices)), shape=(num_rows, num_cols))
+    output_file = os.path.join(ss_path, "rand_matrix"+str(matrix_num)+".mat")
+    sio.savemat(output_file, {'matrix': matrix}, do_compression=True)
+
+    # vec = sps.random(dimensions[i*3+1], 1, 0, data_rvs=np.ones)
+    vec = np.ones(dimensions[i*3+1])
+    output_file1 = os.path.join(ss_path, "rand_vector"+str(matrix_num)+".mat")
+    sio.savemat(output_file1, {'vector': vec}, do_compression=True)
+
+
+    # f.close()
+    # a = mmread(filename)
+    # a.toarray()
+    # scipy.io.savemat("rand_matrix"+str(matrix_num)+".mat", {'mydata': a})
+
+    # f.write(""+str(dimensions[i*3]) + " " + str(dimensions[i*3+1]) + " " + str(countnnz))
+    # f.write("\n")
+    matrix_num = matrix_num + 1
+
+
+#first step: one randomly generated 3D tensor given first set dimensions
+#Note: generally if 2/3 elems in a tensor is 0, it can be considered sparse
+#approach: 2/3 of the time add in a 0, 1/3 of the time add in an integer from 0 to 100 (use randint to generate num from 1 to 9 inclusive, and depending on where the num is, insert number or not)
+#print('dimensions:')
+#print(dimensions[0])
+#print(dimensions[1])
+#print(dimensions[2])
+#print('tensor vals')
+
+"""
+arr = np.zeros([dimensions[0],dimensions[1],dimensions[2]], dtype=int)
+randomNumber = 0
+numToInsert = 0
+for x in range(len(arr)):
+    for y in range(len(arr[x])):
+        for z in range(len(arr[x][y])):
+            #generate random number from 1 to 9; if 1,2,3,7,8,9 don't add a num in, only add if 4,5,6
+            randomNumber = random.randint(1,9)
+            if(randomNumber==4 or randomNumber==5 or randomNumber==6):
+                numToInsert = random.randint(1,100)
+                arr[x][y][z] = numToInsert
+                numToInsert = 0
+            print(arr[x][y][z])
+
+            #lineToAddInFile="" + str(x) + " " + str(y) + " " + str(z) + " " + str(arr[x][y][z])
+            #f.write(lineToAddInFile + '\n')
+
+print('dimensions:')
+print(dimensions[3])
+print(dimensions[4])
+print(dimensions[5])
+print('tensor vals')
+arr = np.zeros([dimensions[3],dimensions[4],dimensions[5]], dtype=int)
+randomNumber = 0
+numToInsert = 0
+for x in range(len(arr)):
+    for y in range(len(arr[x])):
+        for z in range(len(arr[x][y])):
+            #generate random number from 1 to 9; if 1,2,3,7,8,9 don't add a num in, only add if 4,5,6
+            randomNumber = random.randint(1,9)
+            if(randomNumber==4 or randomNumber==5 or randomNumber==6):
+                numToInsert = random.randint(1,100)
+                arr[x][y][z] = numToInsert
+                numToInsert = 0
+                randomNumber = 0
+            print(arr[x][y][z])
+
+            #lineToAddInFile="" + str(x) + " " + str(y) + " " + str(z) + " " + str(arr[x][y][z])
+            #f.write(lineToAddInFile + '\n')
+"""
diff --git a/sam/sim/src/tiling/memory_config_onyx.yaml b/sam/sim/src/tiling/memory_config_onyx.yaml
index 74c80c27..b565bce0 100644
--- a/sam/sim/src/tiling/memory_config_onyx.yaml
+++ b/sam/sim/src/tiling/memory_config_onyx.yaml
@@ -16,4 +16,4 @@ n_levels: 3
 level_names: ["Main", "Glb", "Mem"]
 Main_tile_size: None
 Glb_tile_size: 8 # 8 = (8x8) = 64 elements
-Mem_tile_size: 45 # Size of one dense dimension. 45 = (45*45) = 2025 
+Mem_tile_size: 30 #45 # Size of one dense dimension. 45 = (45*45) = 2025 
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 942035b2..9360f3e0 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -7,11 +7,16 @@
 import copy
 import pickle
 import random
+import sys
 
 from itertools import compress
 from pathlib import Path
 
-import sys
+custom_path = '/home/avb03/sam/scripts'
+sys.path.append(custom_path)
+
+from util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
+
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
@@ -153,6 +158,68 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
 
     return tiles, tile_sizes
 
+def tile_coo_tensor_named(tensor_name, tensor, ivar_map, split_map, new_ivar_order=None):
+    human_readable = False
+
+    tiles = dict()
+    tile_sizes = dict()
+    order = len(tensor.shape)
+    # if tensor_name == 'd' or tensor_name == 'c':
+    #     order = 1
+
+    tensor_coo = scipy.sparse.coo_matrix(tensor)
+    tensor_points = tensor_coo.todok()
+
+    print("ivar_map: ", ivar_map)
+    print("split_map: ", split_map)
+    print("order: ", order)
+
+    new_shape = []
+    for lvl in range(order):
+        ivar = ivar_map[lvl]
+        sf = split_map[ivar]
+        new_shape.append(sf)
+
+    print("new_shape: ", new_shape)
+    for crds, val in tensor_points.items():
+        point = list(crds)
+
+        new_point = []
+        tile_id = []
+        for lvl in range(order):
+            ivar = ivar_map[lvl]
+            sf = split_map[ivar]
+
+            new_point.append(point[lvl] % sf)
+            tile_id.append(int(point[lvl] / sf))
+
+        # Add in value to the new_point as well
+        new_point.append(val)
+        tile_id = tuple(tile_id)
+
+        if tile_id in tiles:
+            tiles[tile_id].append(new_point)
+        else:
+            tiles[tile_id] = [new_point]
+
+    # sort the new coo lists
+    for key, val in tiles.items():
+        if human_readable:
+            dok = sorted(val)
+        else:
+            dok = scipy.sparse.dok_matrix(tuple(new_shape))
+            for point in val:
+                dok[tuple(point[0:-1])] = point[-1]
+
+        tiles[key] = dok
+
+    for tile_id, tile_dok in tiles.items():
+        tile = tile_dok.tocoo()
+        nonempty_rows = tile.getnnz(axis=1)
+        nonempty_row_ind = np.where(nonempty_rows > 0)[0]
+        tile_sizes[tile_id] = tile.nnz * 2 + 2 * len(nonempty_row_ind) + 3
+
+    return tiles, tile_sizes
 
 # tensor_names: list of tensor names [B,C,D] (from SAM)
 # tensors: list of scipy.sparse.coo_matrix following tensor_names (from SAM)
@@ -168,7 +235,10 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
         tensor_format = permutation_strs[i]
         ivar_map = dict()
         order = len(tensor.shape)
-        print("order is ", order)
+        # if tensor_name == 'd' or tensor_name == 'c':
+        #     order = 1
+        print("tensor name = ", tensor_name)
+        print("order in cotile_coo = ", order)
         for dim in range(order):
             print("tensor format: ", tensor_format)
             print("dim is ", dim)
@@ -178,7 +248,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
             ivar_map[lvl_permutation] = ivar
             print("ivar_map is ", ivar_map)
 
-        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
+        tiles, tile_sizes = tile_coo_tensor_named(tensor_name, tensor, ivar_map, split_map)
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
@@ -189,6 +259,8 @@ def get_other_tensors(app_str, tensor):
     tensors = []
     tensors.append(tensor)
 
+    # formatWriter = FormatWriter(cast_int=True)
+
     if "matmul" in app_str:
         print("Writing shifted...")
         shifted = ScipyTensorShifter().shiftLastMode(tensor)
@@ -213,9 +285,13 @@ def get_other_tensors(app_str, tensor):
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
         rows, cols = tensor.shape # i,j
-        tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
-        tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
+        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+        tensor_d = scipy.sparse.random(rows, 1, data_rvs=np.ones).toarray().flatten()
 
+        # # import pdb; pdb.set_trace()
+
+        # tensor_c_coo = formatWriter.convert_format(tensor_c, "coo")
+        # tensor_d_coo = formatWriter.convert_format(tensor_d, "coo")
         tensors.append(tensor_c)
         tensors.append(tensor_d)
 
@@ -236,7 +312,9 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-    import pdb; pdb.set_trace();
+    print("cotile_multilevel_coo tensors: ", tensors)
+
+    # import pdb; pdb.set_trace();
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -275,12 +353,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
                     print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
                     print("format_permutations: ", format_permutations)
                     cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map)
+                    print("cotiled is ", cotiled)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
                     new_cotiled_sizes = {}
                     for i, name in enumerate(names):
-
+                        # import pdb; pdb.set_trace()
                         new_cotiled[name] = {}
                         new_cotiled_sizes[name] = {}
                         for tile_id, tile in cotiled[name].items():
@@ -295,6 +374,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
                             for kk, vv in copy.deepcopy(new_cotiled_sizes_temp)[name].items():
                                 new_tile_id = tuple(list(tile_id) + list(kk))
                                 new_cotiled_sizes[name][new_tile_id] = vv
+                        print(new_cotiled_temp)
                     cotiled = copy.deepcopy(new_cotiled)
                     cotiled_sizes = copy.deepcopy(new_cotiled_sizes)
 
@@ -311,7 +391,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Tile matrices')
     parser.add_argument("--input_tensor", type=str, default=None)
-    parser.add_argument("--gen_tensor", action="store_false")
+    parser.add_argument("--gen_tensor", action="store_true")
     parser.add_argument("--cotile", type=str, default=None)
     parser.add_argument("--output_dir_path", type=str, default="./tiles")
     parser.add_argument("--hw_config", type=str, default=None)
@@ -345,7 +425,6 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
         print("TILES:")
         print_dict(tiles)
     else:
-
         output_mtx_name = os.path.join(args.output_dir_path, args.cotile, "mtx")
         output_mtx_path = Path(output_mtx_name)
         output_mtx_path.mkdir(parents=True, exist_ok=True)
@@ -363,6 +442,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
+        print("cotiled_tensors.keys(): ", names)
+        print("cotiled_tensors: ", cotiled_tensors)
         for name in names:
             for tile_id, tile in cotiled_tensors[name].items():
                 [str(item) for item in tile_id]
diff --git a/sam/util.py b/sam/util.py
index 7044e94d..f2382f16 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -207,12 +207,13 @@ def shiftLastMode(self, tensor):
 
 @dataclass
 class DoublyCompressedMatrix:
-    shape: (int)
-    seg0: [int]
-    crd0: [int]
-    seg1: [int]
-    crd1: [int]
-    data: [float]
+    # shape: (int)
+    shape = [int]
+    seg0 = [int]
+    crd0 = [int]
+    seg1 = [int]
+    crd1 = [int]
+    data = [float]
 
 
 # ScipyMatrixMarketTensorLoader loads tensors in the matrix market format
diff --git a/scripts/datastructure_suitesparse.py b/scripts/datastructure_suitesparse.py
index cb24ec39..6c4bace4 100644
--- a/scripts/datastructure_suitesparse.py
+++ b/scripts/datastructure_suitesparse.py
@@ -37,6 +37,7 @@ def write_datastructure_bench(args, tensor, out_path, tiles=None):
     print("Writing " + args.name + " for test " + args.benchname + "...")
 
     dirname = args.output_dir_path if args.output_dir_path is not None else os.path.join(out_path, args.name, args.benchname)
+    print("dirname: " + dirname)
     if tiles is not None:
         dirname = os.path.join(dirname, tiles)
     dirpath = Path(dirname)
@@ -128,7 +129,8 @@ def write_datastructure_bench(args, tensor, out_path, tiles=None):
 
     elif "mat_mattransmul" in args.benchname:
         formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
-        if not args.no_gen_other:
+        # if not args.no_gen_other:
+        if False:
             tensorname = 'd'
             vec = scipy.sparse.random(shape[0], 1, density=args.density, data_rvs=np.ones)
             vec = vec.toarray().flatten()
@@ -184,7 +186,7 @@ def write_datastructure_bench(args, tensor, out_path, tiles=None):
 parser.add_argument('--output_dir_path', type=str, default=None)
 parser.add_argument('--tiles', action='store_true')
 parser.add_argument('--no_gen_other', action='store_true', help="Whether this"
-                    "script should generate the randmo 'other' tensors")
+                    "script should generate the random 'other' tensors")
 parser.add_argument('--seed', type=int, default=0, help='Random seed needed for gen_other')
 parser.add_argument('--density', type=int, default=0.25, help='If gen_other, used for density of "other" tensor')
 args = parser.parse_args()
@@ -249,6 +251,7 @@ def write_datastructure_bench(args, tensor, out_path, tiles=None):
         formatWriter.writeout(trans_shifted, format_str, trans_filename)
 elif args.hw:
     if args.tiles and tensor is not None:
+        print("tensor lengths = ", len(tensor))
         for i, ten in enumerate(tensor):
             tile_name = os.path.split(mtx_files[i])[1].split(".")[0]
             write_datastructure_tiles(args, ten, out_path, tile_name)
diff --git a/scripts/datastructure_tns.py b/scripts/datastructure_tns.py
index ccf60fae..16e00afc 100644
--- a/scripts/datastructure_tns.py
+++ b/scripts/datastructure_tns.py
@@ -1,10 +1,21 @@
 import argparse
 import os
+import shutil
+import scipy.sparse
+import numpy as np
+import sys
+import random
+import shutil
+
 from pathlib import Path
 from util import parse_taco_format
 
-cwd = os.getcwd()
+from util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
+# custom_path = '/nobackup/jadivara/sam/sam/util.py'
+# sys.path.append(custom_path)
+# from  import SUITESPARSE_FORMATTED_PATH, ScipyTensorShifter
 
+cwd = os.getcwd()
 
 formats = ["sss012", "ss01", "dss", "dds", "ddd", "dsd", "sdd", "sds", "ssd"]
 
@@ -21,9 +32,10 @@
                     help='Format filenames as in AHA SCGRA <tensor_<name>_mode_<n|type>')
 parser.add_argument('-np', '--numpy', action='store_true', default=False, help='Format numpy tensors')
 parser.add_argument('-b', '--bench', type=str, default=None, help='Name of benchmark')
+parser.add_argument('--density', type=int, default=0.25, help='If gen_other, used for density of "other" tensor')
+parser.add_argument('-cast', '--cast', action='store_true', default=False, help='Safe sparsity cast to int for values')
 
 args = parser.parse_args()
-
 if args.other:
     if args.suitesparse:
         outdir_name = os.getenv('SUITESPARSE_FORMATTED_PATH', default=os.path.join(cwd, 'mode-formats'))
@@ -44,70 +56,158 @@
 out_path = Path(outdir_name)
 out_path.mkdir(parents=True, exist_ok=True)
 
+formatWriter = FormatWriter(args.cast)
+
 if args.name is None:
     print("Please enter a tensor name")
     exit()
 
-
+#breakpoint()
 if args.format is not None:
     assert args.format in formats
     levels = args.format[:-3]
-    if args.other:
+
+    if os.path.exists('sam/FROST_FORMATTED/rand_tensor*'):
+        shutil.rmtree('sam/FROST_FORMATTED/rand_tensor*')
+    
+    if args.bench != "tensor3_elemadd" and args.bench != "tensor3_innerprod":
         assert args.bench is not None
+        #$FROSTT_FORMATTED_TACO_PATH
+        taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO"
+        outdir_other_name = os.path.join(outdir_name, args.name, args.bench)
+        # outdir_other_name = os.path.join(outdir_name, args.name, 'other', otherfile[:-4])
+        outdir_orig_path = Path(outdir_other_name)
+        outdir_orig_path.mkdir(parents=True, exist_ok=True)
+
+        name = None
+        taco_format_orig_filename = os.path.join(taco_format_dirname, args.name + "_" + levels + '.txt')
+
+        inputCache = InputCacheSuiteSparse()
 
-        otherfileNames = [f for f in os.listdir(taco_format_dirname) if
-                          os.path.isfile(os.path.join(taco_format_dirname, f)) and args.name in f]
+        if args.bench == "tensor3_ttv":
+            outdir_orig_name = os.path.join(outdir_name, args.name, args.bench, args.format)
+            outdir_orig_path = Path(outdir_orig_name)
+            outdir_orig_path.mkdir(parents=True, exist_ok=True)
+
+            taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
+            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+            #Need this line? formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+            file_path_name = os.path.join(outdir_orig_name, "tensor_B_mode_shape")
+            file1 = open(file_path_name, 'r')
+            shape = [0]*3
+            lines = file1.readlines()
+            count = 0
+
+            # Strips the newline character
+            for line in lines:
+                shape[count] = int(line)
+                count += 1
+            # coo = inputCache.load(tensor, False)
+            
+            # formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+            tensorname = 'c'
+            vec = scipy.sparse.random(shape[2], 1, density=args.density, data_rvs=np.ones)
+            vec = vec.toarray().flatten()
+            tensor_out_path = os.path.join(out_path, args.name, args.bench, args.format)
+            formatWriter.writeout_separate_vec(vec, tensor_out_path, tensorname)
+
+            # vec = scipy.sparse.random(shape[2], 1, data_rvs=np.ones)
+            # vec = vec.toarray().flatten()
+            # formatWriter.writeout_separate_vec(vec, out_path, tensorname)
+            #FormatWriter.writeout_separate_vec(vec, out_path, tensorname, tensorname)
+            #formatWriter.writeout_separate_sparse_only()
+        elif args.bench == "tensor3_ttm":
+            outdir_orig_name = os.path.join(outdir_name, args.name, args.bench, args.format)
+            outdir_orig_path = Path(outdir_orig_name)
+            outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
-        for otherfile in otherfileNames:
-            taco_format_orig_filename = os.path.join(taco_format_dirname, otherfile)
-            outdir_other_name = os.path.join(outdir_name, args.name, args.bench)
-            # outdir_other_name = os.path.join(outdir_name, args.name, 'other', otherfile[:-4])
-            outdir_orig_path = Path(outdir_other_name)
+            taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
+            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+            #Need this line? formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+            file_path_name = os.path.join(outdir_orig_name, "tensor_B_mode_shape")
+            file1 = open(file_path_name, 'r')
+            shape = [0]*3
+            lines = file1.readlines()
+            count = 0
+
+            # Strips the newline character
+            for line in lines:
+                shape[count] = int(line)
+                count += 1
+            # coo = inputCache.load(tensor, False)
+            dimension_k = random.randint(min(shape), 10)
+            dimension_l = shape[2] 
+            dimension_j = shape[1]
+
+            # formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+            tensorname = 'C'
+            matrix = scipy.sparse.random(dimension_k, dimension_l, density=args.density, data_rvs=np.ones).toarray()
+            tensor_out_path = os.path.join(out_path, args.name, args.bench, args.format)
+            formatWriter.writeout_separate_sparse_only(matrix, tensor_out_path, tensorname)
+
+            # vec = scipy.sparse.random(shape[2], 1, data_rvs=np.ones)
+            # vec = vec.toarray().flatten()
+            # formatWriter.writeout_separate_vec(vec, out_path, tensorname)
+            #FormatWriter.writeout_separate_vec(vec, out_path, tensorname, tensorname)
+            #formatWriter.writeout_separate_sparse_only()
+        elif args.bench == "tensor3_mttkrp":
+            outdir_orig_name = os.path.join(outdir_name, args.name, args.bench, args.format)
+            outdir_orig_path = Path(outdir_orig_name)
             outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
-            name = None
-            if args.bench == "mat_residual":
-                if "mode0" in otherfile:
-                    name = 'b'
-                elif "mode1" in otherfile:
-                    name = 'd'
-                else:
-                    raise NotImplementedError
-            elif args.bench == "mat_mattransmul":
-                if "mode0" in otherfile:
-                    name = 'd'
-                elif "mode1" in otherfile:
-                    name = 'f'
-                else:
-                    raise NotImplementedError
-            elif "mat_vecmul" in args.bench:
-                if "mode1" in otherfile:
-                    name = 'c'
-                elif "mode0" in otherfile:
-                    continue
-                else:
-                    raise NotImplementedError
-            else:
-                raise NotImplementedError
-
-            assert name is not None, "Other tensor name was not set properly and is None"
-            parse_taco_format(taco_format_orig_filename, outdir_other_name, name, args.format, hw_filename=args.hw)
+            taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
+            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+            
+            file_path_name = os.path.join(outdir_orig_name, "tensor_B_mode_shape")
+            file1 = open(file_path_name, 'r')
+            shape = [0]*3
+            lines = file1.readlines()
+            count = 0
+
+            # Strips the newline character
+            for line in lines:
+                shape[count] = int(line)
+                count += 1
+            
+            dimension_i = shape[0]
+            dimension_k = shape[1]
+            dimension_l = shape[2]
+            dimension_j = random.randint(min(shape), 10)
+
+            # formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+            tensorname = 'C'
+            matrix = scipy.sparse.random(dimension_j, dimension_k, density=args.density, data_rvs=np.ones).toarray()
+            tensor_out_path = os.path.join(out_path, args.name, args.bench, args.format)
+            formatWriter.writeout_separate_sparse_only(matrix, tensor_out_path, tensorname)
+
+            tensorname = 'D'
+            matrix = scipy.sparse.random(dimension_j, dimension_l, density=args.density, data_rvs=np.ones).toarray()
+            tensor_out_path = os.path.join(out_path, args.name, args.bench, args.format)
+            formatWriter.writeout_separate_sparse_only(matrix, tensor_out_path, tensorname)
+        else:
+            raise NotImplementedError
+
+        assert tensorname is not None, "Other tensor name was not set properly and is None"
+        # parse_taco_format(taco_format_orig_filename, outdir_other_name, tensorname, args.format, hw_filename=args.hw)
 
     else:
+        #this code is used for: tensor3_elemadd, tensor3_innerprod
         taco_format_orig_filename = os.path.join(taco_format_dirname, args.name + "_" + levels + '.txt')
         taco_format_shift_filename = os.path.join(taco_format_dirname, args.name + '_shift_' + levels + '.txt')
 
         # Original
-        outdir_orig_name = os.path.join(outdir_name, args.name, 'orig', args.format)
+        outdir_orig_name = os.path.join(outdir_name, args.name, args.bench, args.format)
         outdir_orig_path = Path(outdir_orig_name)
         outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
+        taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
         parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
 
         # Shifted
         if args.shift:
-            outdir_shift_name = os.path.join(outdir_name, args.name, 'shift', args.format)
+            outdir_shift_name = os.path.join(outdir_name, args.name, args.bench, args.format)
             outdir_shift_path = Path(outdir_shift_name)
             outdir_shift_path.mkdir(parents=True, exist_ok=True)
 
+            taco_format_shift_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_shift_" + levels + '.txt'
             parse_taco_format(taco_format_shift_filename, outdir_shift_name, 'C', args.format, hw_filename=args.hw)
diff --git a/scripts/datastructure_tns_old.py b/scripts/datastructure_tns_old.py
new file mode 100644
index 00000000..863f1d0b
--- /dev/null
+++ b/scripts/datastructure_tns_old.py
@@ -0,0 +1,127 @@
+import argparse
+import os
+from pathlib import Path
+from util import parse_taco_format
+
+cwd = os.getcwd()
+
+
+formats = ["sss012", "ss01", "dss", "dds", "ddd", "dsd", "sdd", "sds", "ssd"]
+
+parser = argparse.ArgumentParser(description="Process some Frostt tensors into per-level datastructures")
+parser.add_argument('-n', '--name', metavar='fname', type=str, action='store',
+                    help='tensor name to run format conversion on one frostt tensor')
+parser.add_argument('-f', '--format', metavar='fformat', type=str, action='store',
+                    help='The format that the tensor should be converted to')
+parser.add_argument('-i', '--int', action='store_false', default=True, help='Safe sparsity cast to int for values')
+parser.add_argument('-s', '--shift', action='store_false', default=True, help='Also format shifted tensor')
+parser.add_argument('-o', '--other', action='store_true', default=False, help='Format other tensor')
+parser.add_argument('-ss', '--suitesparse', action='store_true', default=False, help='Format suitesparse other tensor')
+parser.add_argument('-hw', '--hw', action='store_true', default=False,
+                    help='Format filenames as in AHA SCGRA <tensor_<name>_mode_<n|type>')
+parser.add_argument('-np', '--numpy', action='store_true', default=False, help='Format numpy tensors')
+parser.add_argument('-b', '--bench', type=str, default=None, help='Name of benchmark')
+
+args = parser.parse_args()
+
+if args.other:
+    if args.suitesparse:
+        outdir_name = os.getenv('SUITESPARSE_FORMATTED_PATH', default=os.path.join(cwd, 'mode-formats'))
+    else:
+        outdir_name = os.getenv('FROSTT_FORMATTED_PATH', default=os.path.join(cwd, 'mode-formats'))
+    taco_format_dirname = os.getenv('TACO_TENSOR_PATH')
+    if taco_format_dirname is None:
+        print("Please set the TACO_TENSOR_PATH environment variable")
+        exit()
+    taco_format_dirname = os.path.join(taco_format_dirname, "other")
+    # taco_format_dirname = os.path.join(taco_format_dirname, "other-formatted-taco")
+else:
+    outdir_name = os.getenv('FROSTT_FORMATTED_PATH', default=os.path.join(cwd, 'mode-formats'))
+    taco_format_dirname = os.getenv('FROSTT_FORMATTED_TACO_PATH')
+    if taco_format_dirname is None:
+        print("Please set the FROSTT_FORMATTED_TACO_PATH environment variable")
+        exit()
+
+out_path = Path(outdir_name)
+out_path.mkdir(parents=True, exist_ok=True)
+
+print("args.name is ", args.name)
+
+if args.name is None:
+    print("Please enter a tensor name")
+    exit()
+
+print("\nhere after Please enter tensor name\n")
+
+if args.format is not None:
+    assert args.format in formats
+    levels = args.format[:-3]
+    if args.other:
+        assert args.bench is not None
+        
+        print("here to get other file names\n")
+
+        otherfileNames = [f for f in os.listdir(taco_format_dirname) if
+                          os.path.isfile(os.path.join(taco_format_dirname, f)) and args.name in f]
+
+        print("have otherfileNames\n")
+        print(os.listdir(outdir_name))
+        print("length of otherfilenames is: ", len(otherfileNames), "\n")
+
+        for otherfile in otherfileNames:
+            print("iterate thru otherfileNames\n")
+            taco_format_orig_filename = os.path.join(taco_format_dirname, otherfile)
+            # outdir_other_name = os.path.join(outdir_name, args.name, args.bench)
+            outdir_other_name = os.path.join(outdir_name, args.name, 'other', otherfile[:-4])
+            outdir_orig_path = Path(outdir_other_name)
+            outdir_orig_path.mkdir(parents=True, exist_ok=True)
+
+            name = None
+            if args.bench == "mat_residual":
+                if "mode0" in otherfile:
+                    name = 'b'
+                elif "mode1" in otherfile:
+                    name = 'd'
+                else:
+                    raise NotImplementedError
+            elif args.bench == "mat_mattransmul":
+                if "mode0" in otherfile:
+                    name = 'd'
+                elif "mode1" in otherfile:
+                    name = 'f'
+                else:
+                    raise NotImplementedError
+            elif "mat_vecmul" in args.bench:
+                if "mode1" in otherfile:
+                    name = 'c'
+                elif "mode0" in otherfile:
+                    continue
+                else:
+                    raise NotImplementedError
+            else:
+                raise NotImplementedError
+
+            assert name is not None, "Other tensor name was not set properly and is None"
+            parse_taco_format(taco_format_orig_filename, outdir_other_name, name, args.format, hw_filename=args.hw)
+
+    else:
+        print("in else statement\n")
+        taco_format_orig_filename = os.path.join(taco_format_dirname, args.name + "_" + levels + '.txt')
+        taco_format_shift_filename = os.path.join(taco_format_dirname, args.name + '_shift_' + levels + '.txt')
+
+        # Original
+        outdir_orig_name = os.path.join(outdir_name, args.name, 'orig', args.format)
+        outdir_orig_path = Path(outdir_orig_name)
+        outdir_orig_path.mkdir(parents=True, exist_ok=True)
+
+        print("parse taco format\n")
+
+        parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+
+        # Shifted
+        if args.shift:
+            outdir_shift_name = os.path.join(outdir_name, args.name, 'shift', args.format)
+            outdir_shift_path = Path(outdir_shift_name)
+            outdir_shift_path.mkdir(parents=True, exist_ok=True)
+
+            parse_taco_format(taco_format_shift_filename, outdir_shift_name, 'C', args.format, hw_filename=args.hw)
diff --git a/scripts/generate_gold_matmul_tiled.py b/scripts/generate_gold_matmul_tiled.py
index 64ff4946..0112a4e7 100644
--- a/scripts/generate_gold_matmul_tiled.py
+++ b/scripts/generate_gold_matmul_tiled.py
@@ -10,6 +10,9 @@
 
 from pathlib import Path
 
+# app_name = "mat_elemadd"
+# app_name = "mat_elemmul"
+app_name = "mat_mattransmul"
 
 # FIXME: (owhsu) this should be imported from util
 def round_sparse(x):
@@ -25,7 +28,7 @@ def round_sparse(x):
 
 def generate_gold_matmul_tiled(tile_crd_b, tile_crd_c, dirname, out_format="ss01"):
     # CSR
-    formatted_dir = "./tiles/matmul_ikj/mtx"
+    formatted_dir = f"./tiles/{app_name}/mtx"
     B_dir = "tensor_B_tile_"
     for a in tile_crd_b:
         B_dir += str(a) + "_"
@@ -69,6 +72,7 @@ def generate_gold_matmul_tiled(tile_crd_b, tile_crd_c, dirname, out_format="ss01
             itr += 1
         C_scipy = C_scipy.tocsc()
         gold_nd = (B_scipy @ C_scipy)
+        # gold_nd = B_scipy.dot(C_scipy)
         gold_out = gold_nd.tocoo()
         assert tile_crd_b[1] == tile_crd_c[0] and tile_crd_b[3] == tile_crd_c[2]
         scipy.io.mmwrite(
@@ -80,25 +84,26 @@ def generate_gold_matmul_tiled(tile_crd_b, tile_crd_c, dirname, out_format="ss01
     parser = argparse.ArgumentParser(description="Generate tiled output gold")
     parser.add_argument("--yaml_name", type=str, default="memory_config_real.yaml")
     args = parser.parse_args()
-    outdir = "./tiles/matmul_ikj/output/"
+    outdir = f"./tiles/{app_name}/output/"
     outpath = Path(outdir)
-    outpath.mkdir(parents=True, exist_ok=True)
+    outpath.mkdir(parents=True)
 
     # generate_gold_matmul_tiled([0, 1, 2, 9], [1, 0, 9, 0], outdir)
 
     # generate_gold_matmul_tiled([0, 1, 0, 7], [1, 0, 7, 0], outdir)
     # quit()    with open("/nobackup/rsharma3/Sparsity/simulator/old_sam/sam/tiles/matmul_ikj/tensor_sizes", "rb") as ff:
 
-    with open("./tiles/matmul_ikj/tensor_sizes", "rb") as ff:
+    with open(f"./tiles/{app_name}/tensor_sizes", "rb") as ff:
         sizes_dict_level_full = pickle.load(ff)
 
     with open("./sam/sim/src/tiling/" + args.yaml_name, "r") as stream:
         loop_config = yaml.safe_load(stream)
 
+    print("sizes_dict_level_full", sizes_dict_level_full)
     struct = {
         "i00": 1 + int(sizes_dict_level_full["B"][0]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
-        "k00": 1 + int(sizes_dict_level_full["B"][1]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
-        "j00": 1 + int(sizes_dict_level_full["C"][1]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
+        "k00": 1 + int(sizes_dict_level_full["c"][0]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
+        "j00": 1 + int(sizes_dict_level_full["d"][0]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
         "i0": loop_config["Glb_tile_size"], "k0": loop_config["Glb_tile_size"], "j0": loop_config["Glb_tile_size"]}
     print(struct)
     # quit()
diff --git a/scripts/generate_suitesparse_formats.sh b/scripts/generate_suitesparse_formats.sh
index a12d9eea..d335bd18 100755
--- a/scripts/generate_suitesparse_formats.sh
+++ b/scripts/generate_suitesparse_formats.sh
@@ -3,15 +3,17 @@
 #SBATCH -t 360
 
 BENCHMARKS=(
-  matmul_ikj
-  matmul_ijk
-  matmul_kij
-  mat_elemmul
-  mat_elemadd
-  mat_elemadd3
+#  matmul_ikj
+#  matmul_ijk
+#  matmul_kij
+#  mat_elemmul
+#  mat_elemadd
+#  mat_elemadd3
   mat_residual
   mat_mattransmul
-  mat_identity
+  mat_vecmul
+#  mat_identity
+#  mat_sddmm
 )
 
 # This is a list of benchmarks that have "other" tensors that are generated
@@ -32,10 +34,10 @@ for b in ${!BENCHMARKS[@]}; do
 		sspath=${SUITESPARSE_PATH}/$name
 		echo "Generating input format files for $name..."
 
-		SUITESPARSE_TENSOR_PATH=$sspath python $basedir/scripts/datastructure_suitesparse.py -n $name -hw -b $bench 
+		SUITESPARSE_TENSOR_PATH=$sspath python3 $basedir/scripts/datastructure_suitesparse.py -n $name -hw -b $bench 
 		if [[ $OTHERBENCHES =~ "$bench" ]]; then
 			echo "Generating format of 'other' tensor"
-			python $basedir/scripts/datastructure_tns.py -n $line -f ss01 --other -ss -b $bench -hw
+			python3 $basedir/scripts/datastructure_tns.py -n $line -f ss01 --other -ss -b $bench -hw
 		fi
 	
 	done <$textfile
diff --git a/scripts/prepare_files.sh b/scripts/prepare_files.sh
index 4e3c82ed..5f8cb587 100755
--- a/scripts/prepare_files.sh
+++ b/scripts/prepare_files.sh
@@ -11,4 +11,4 @@ rm -rf $basedir/tiles/*
 
 ./scripts/tile_ext.sh $1 $2
 
-python3 scripts/generate_gold_matmul_tiled.py --yaml_name $2
+# python3 scripts/generate_gold_matmul_tiled.py --yaml_name $2
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 4d156366..fd944c71 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -11,6 +11,12 @@
 # for line in data_file_lines:
 #    data.append(line[:-1])
 
+if not os.path.exists("extensor_mtx"):
+   os.mkdir("extensor_mtx")
+
+if not os.path.exists("tiles_compiled"):
+   os.mkdir("tiles_compiled")
+
 for datum in data:
    mtx_file = glob.glob(f"/nobackup/owhsu/sparse-datasets/suitesparse/{datum}.mtx")[0]
    shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
diff --git a/spmv_sparsity_sweep.py b/spmv_sparsity_sweep.py
new file mode 100644
index 00000000..8847e00b
--- /dev/null
+++ b/spmv_sparsity_sweep.py
@@ -0,0 +1,42 @@
+import numpy as np
+import scipy.io as sio
+import scipy.sparse as sp
+import os
+import random
+
+num_rows = 10
+num_cols = 10
+density = 0.1
+
+seed_value = 100
+random.seed(seed_value)
+np.random.seed(seed_value)
+
+if not os.path.exists('spmv_sparsity_sweep'):
+    os.makedirs('spmv_sparsity_sweep')
+else:
+    os.system("rm -rf spmv_sparsity_sweep/*")
+
+if not os.path.exists('spmv_sparsity_sweep/MAT_FILES'):
+    os.makedirs('spmv_sparsity_sweep/MAT_FILES')
+else:
+    os.system("rm -rf spmv_sparsity_sweep/MAT_FILES/*")
+    os.makedirs('spmv_sparsity_sweep/MAT_FILES')
+
+if not os.path.exists('spmv_sparsity_sweep/MTX_FILES'):
+    os.makedirs('spmv_sparsity_sweep/MTX_FILES')
+else:
+    os.system("rm -rf spmv_sparsity_sweep/MTX_FILES/*")
+    os.makedirs('spmv_sparsity_sweep/MTX_FILES')
+
+matrix = sp.random(num_rows, num_cols, density, data_rvs=np.ones, random_state=seed_value)
+print(matrix)
+
+probability = 0.7  # Adjust this value to control the ratio of 1s to 0s in vector
+vector = np.random.choice([0, 1], size=num_cols, p=[1 - probability, probability])
+print(vector)
+
+sio.mmwrite('spmv_sparsity_sweep/MTX_FILES/matrix.mtx', matrix)
+
+sio.savemat('spmv_sparsity_sweep/MAT_FILES/matrix.mat', {'matrix': matrix})
+sio.savemat('spmv_sparsity_sweep/MAT_FILES/vector.mat', {'vector': vector})

From 68f05676f22d069c3a2f3b054de6a835c5aa46a5 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Thu, 27 Jul 2023 16:07:46 -0700
Subject: [PATCH 20/39] pushing all fixed merge conflicts

---
 sam/sim/src/tiling/tile.py                    | 195 ++++--------------
 .../formatting/datastructure_suitesparse.py   |   3 +-
 scripts/suitesparse_memory_model_runner.sh    |   4 +-
 scripts/tile_ext.sh                           |  34 ---
 scripts/tiling/generate_gold_matmul_tiled.py  |   1 +
 scripts/tiling/prepare_files.sh               |   2 +-
 scripts/tiling/tile_ext.sh                    |   8 +-
 setup_tiling_mat.py                           |  11 +-
 8 files changed, 52 insertions(+), 206 deletions(-)
 delete mode 100755 scripts/tile_ext.sh

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 971bfbbb..b4982c7c 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -1,50 +1,35 @@
+import numpy as np
+import scipy.sparse
+import os
 import argparse
 import ast
+import yaml
 import copy
-import os
 import pickle
 import random
-import sys
-<<<<<<< HEAD
+import sparse
 
 from itertools import compress
 from pathlib import Path
 
-custom_path = '/home/avb03/sam/scripts'
-sys.path.append(custom_path)
-
-from util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
-
-=======
-from pathlib import Path
-
-import numpy as np
-import scipy.sparse
-import sparse
-import yaml
-
-from sam.util import SUITESPARSE_PATH, SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, \
-    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor
-from sam.sim.src.tiling.process_expr import parse_all
-
-# FIXME: This should not be here... Set your SAM_HOME directory
->>>>>>> 1b45a01e952656b52034005dfab5e2361850773f
+import sys
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
-SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)",
+from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, PydataSparseTensorDumper, SUITESPARSE_PATH, FROSTT_PATH
+from sam.sim.src.tiling.process_expr import parse_all, update_dict
+
+SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
             "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)"}
 
-
 def print_dict(dd):
     for k, v in dd.items():
         print(k, ":", v)
 
-
 def print_ast(node):
     for child in ast.iter_child_nodes(node):
         print_ast(child)
@@ -104,7 +89,6 @@ def parse_sam_input(string):
     ivars = [ivars[tensor] for tensor in tensors]
     return tensors, permutations, ivars
 
-
 # Outputs Pydata/sparse tensor tiles, given a pydata/sparse tensor (DOK or COO)
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
@@ -171,6 +155,7 @@ def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None):
     return tiles, tile_sizes
 
 
+
 # Outputs COO tiles, given a COO tensor
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
@@ -235,68 +220,6 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
 
     return tiles, tile_sizes
 
-def tile_coo_tensor_named(tensor_name, tensor, ivar_map, split_map, new_ivar_order=None):
-    human_readable = False
-
-    tiles = dict()
-    tile_sizes = dict()
-    order = len(tensor.shape)
-    # if tensor_name == 'd' or tensor_name == 'c':
-    #     order = 1
-
-    tensor_coo = scipy.sparse.coo_matrix(tensor)
-    tensor_points = tensor_coo.todok()
-
-    print("ivar_map: ", ivar_map)
-    print("split_map: ", split_map)
-    print("order: ", order)
-
-    new_shape = []
-    for lvl in range(order):
-        ivar = ivar_map[lvl]
-        sf = split_map[ivar]
-        new_shape.append(sf)
-
-    print("new_shape: ", new_shape)
-    for crds, val in tensor_points.items():
-        point = list(crds)
-
-        new_point = []
-        tile_id = []
-        for lvl in range(order):
-            ivar = ivar_map[lvl]
-            sf = split_map[ivar]
-
-            new_point.append(point[lvl] % sf)
-            tile_id.append(int(point[lvl] / sf))
-
-        # Add in value to the new_point as well
-        new_point.append(val)
-        tile_id = tuple(tile_id)
-
-        if tile_id in tiles:
-            tiles[tile_id].append(new_point)
-        else:
-            tiles[tile_id] = [new_point]
-
-    # sort the new coo lists
-    for key, val in tiles.items():
-        if human_readable:
-            dok = sorted(val)
-        else:
-            dok = scipy.sparse.dok_matrix(tuple(new_shape))
-            for point in val:
-                dok[tuple(point[0:-1])] = point[-1]
-
-        tiles[key] = dok
-
-    for tile_id, tile_dok in tiles.items():
-        tile = tile_dok.tocoo()
-        nonempty_rows = tile.getnnz(axis=1)
-        nonempty_row_ind = np.where(nonempty_rows > 0)[0]
-        tile_sizes[tile_id] = tile.nnz * 2 + 2 * len(nonempty_row_ind) + 3
-
-    return tiles, tile_sizes
 
 # tensor_names: list of tensor names [B,C,D] (from SAM)
 # tensors: list of scipy.sparse.coo_matrix following tensor_names (from SAM)
@@ -312,28 +235,21 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
         tensor_format = permutation_strs[i]
         ivar_map = dict()
         order = len(tensor.shape)
-        # if tensor_name == 'd' or tensor_name == 'c':
-        #     order = 1
-        print("tensor name = ", tensor_name)
-        print("order in cotile_coo = ", order)
+        print("order is ", order)
         for dim in range(order):
             print("tensor format: ", tensor_format)
             print("dim is ", dim)
-            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
+            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim+1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
             print("ivar_map is ", ivar_map)
 
-<<<<<<< HEAD
-        tiles, tile_sizes = tile_coo_tensor_named(tensor_name, tensor, ivar_map, split_map)
-=======
         if higher_order:
             tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map)
         else:
             tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
-
->>>>>>> 1b45a01e952656b52034005dfab5e2361850773f
+        
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
@@ -344,8 +260,6 @@ def get_other_tensors(app_str, tensor):
     tensors = []
     tensors.append(tensor)
 
-    # formatWriter = FormatWriter(cast_int=True)
-
     if "matmul" in app_str:
         print("Writing shifted...")
         shifted = ScipyTensorShifter().shiftLastMode(tensor)
@@ -369,25 +283,15 @@ def get_other_tensors(app_str, tensor):
         pass
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
-<<<<<<< HEAD
         rows, cols = tensor.shape # i,j
-        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
-        tensor_d = scipy.sparse.random(rows, 1, data_rvs=np.ones).toarray().flatten()
-=======
-        rows, cols = tensor.shape  # i,j
         tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
         tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
->>>>>>> 1b45a01e952656b52034005dfab5e2361850773f
-
-        # # import pdb; pdb.set_trace()
 
-        # tensor_c_coo = formatWriter.convert_format(tensor_c, "coo")
-        # tensor_d_coo = formatWriter.convert_format(tensor_d, "coo")
         tensors.append(tensor_c)
         tensors.append(tensor_d)
 
     elif "mat_residual" in app_str:
-        pass
+        pass    
     elif "mat_vecmul" in app_str:
         pass
     else:
@@ -403,14 +307,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-<<<<<<< HEAD
-    print("cotile_multilevel_coo tensors: ", tensors)
-
-    # import pdb; pdb.set_trace();
-=======
-    import pdb
-    pdb.set_trace()
->>>>>>> 1b45a01e952656b52034005dfab5e2361850773f
+    import pdb; pdb.set_trace();
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -448,19 +345,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                     # First iteration of tiling
                     print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
                     print("format_permutations: ", format_permutations)
-<<<<<<< HEAD
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map)
-                    print("cotiled is ", cotiled)
-=======
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map,
-                                                        higher_order)
->>>>>>> 1b45a01e952656b52034005dfab5e2361850773f
+                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map, higher_order)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
                     new_cotiled_sizes = {}
                     for i, name in enumerate(names):
-                        # import pdb; pdb.set_trace()
+
                         new_cotiled[name] = {}
                         new_cotiled_sizes[name] = {}
                         for tile_id, tile in cotiled[name].items():
@@ -479,7 +370,6 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                             for kk, vv in copy.deepcopy(new_cotiled_sizes_temp)[name].items():
                                 new_tile_id = tuple(list(tile_id) + list(kk))
                                 new_cotiled_sizes[name][new_tile_id] = vv
-                        print(new_cotiled_temp)
                     cotiled = copy.deepcopy(new_cotiled)
                     cotiled_sizes = copy.deepcopy(new_cotiled_sizes)
 
@@ -492,20 +382,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
 
 inputCache = InputCacheSuiteSparse()
-inputCacheTensor = InputCacheTensor()
 
 if __name__ == "__main__":
-<<<<<<< HEAD
-    parser = argparse.ArgumentParser(description='Tile matrices')
-    parser.add_argument("--input_tensor", type=str, default=None)
-    parser.add_argument("--gen_tensor", action="store_true")
-    parser.add_argument("--cotile", type=str, default=None)
-    parser.add_argument("--output_dir_path", type=str, default="./tiles")
-    parser.add_argument("--hw_config", type=str, default=None)
-    parser.add_argument("--multilevel", action="store_true")
-    parser.add_argument("--input_path", type=str, default=None)
-    parser.add_argument("--extensor", action="store_true")
-=======
     parser = argparse.ArgumentParser(description='script that tiles tensors')
     parser.add_argument("--tensor_type", choices=['ex', 'gen', 'file', 'ss', 'frostt'], help='The \
         tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map) \
@@ -520,13 +398,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
     \n 'frostt' should always have 'higher_order' set as true.")
 
     parser.add_argument("--input_tensor", type=str, default=None,
-                        help="Input tensor NAME if tensor_type is set to 'file'. \
+            help="Input tensor NAME if tensor_type is set to 'file'. \
             This is for use with SuiteSparse or FROSTT")
     parser.add_argument("--input_path", type=str, default=None, help="Input tensor path")
     parser.add_argument("--output_dir_path", type=str, default="./tiles",
-                        help='Output path, directory where tiles get written to')
+            help='Output path, directory where tiles get written to')
     parser.add_argument("--hw_config", type=str, default=None,
-                        help='Path to the hardware config yaml')
+            help='Path to the hardware config yaml')
 
     parser.add_argument("--cotile", type=str, default=None, help='If \
             this is true cotile multiple tensors, else tile one tensor only')
@@ -534,7 +412,6 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             multilevel is true there will exist more than one level of tiles, \
             else only tile once')
     parser.add_argument("--seed", type=int, default=0, help="Random seed")
->>>>>>> 1b45a01e952656b52034005dfab5e2361850773f
 
     args = parser.parse_args()
 
@@ -551,22 +428,22 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
     elif args.tensor_type == "ex":
         tensor = scipy.io.mmread(args.input_path)
     elif args.tensor_type == "ss":
-        assert args.input_tensor is not None
-        tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
-        ss_tensor = SuiteSparseTensor(tensor_path)
-        tensor = inputCache.load(ss_tensor, False)
+            assert args.input_tensor is not None
+            tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
+            ss_tensor = SuiteSparseTensor(tensor_path)
+            tensor = inputCache.load(ss_tensor, False)
     elif args.tensor_type == "frostt":
-        assert args.input_tensor is not None
-        assert args.higher_order
+            assert args.input_tensor is not None
+            assert args.higher_order
 
-        tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
+            tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
 
-        # FIXME: This is broken
-        frostt_tensor = FrosttTensor(tensor_path)
-        tensor = inputCacheTensor.load(frostt_tensor, False)
+            # FIXME: This is broken
+            frostt_tensor = FrosttTensor(tensor_path)
+            tensor = inputCache.load(ss_tensor, False)
 
     else:
-        raise ValueError("This choice of 'tensor_type' is unreachable")
+        raise ValueError("This choice of 'tensor_type' is unreachable") 
 
     split_map = {"i": 16, "j": 16, "k": 16}
 
@@ -587,19 +464,17 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             assert args.cotile is not None
             cotiled_tensors = cotile_multilevel_coo(args.cotile, args.hw_config, [tensor],
                                                     os.path.join(args.output_dir_path,
-                                                                 args.cotile),
+                                                        args.cotile),
                                                     args.higher_order)
         elif args.cotile is not None:
             tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
             names, format_permutations, ivars = parse_sam_input(args.cotile)
 
             cotiled_tensors = cotile_coo(names, [tensor, tensor2],
-                                         format_permutations, ivars, split_map, args.higher_order)
+                    format_permutations, ivars, split_map, args.higher_order)
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
-        print("cotiled_tensors.keys(): ", names)
-        print("cotiled_tensors: ", cotiled_tensors)
         for name in names:
             for tile_id, tile in cotiled_tensors[name].items():
                 [str(item) for item in tile_id]
@@ -614,4 +489,4 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                     print(tile.shape)
                     tns_dumper.dump(tile, mtx_path_name)
                 else:
-                    scipy.io.mmwrite(mtx_path_name, tile)
+                    scipy.io.mmwrite(mtx_path_name, tile)
\ No newline at end of file
diff --git a/scripts/formatting/datastructure_suitesparse.py b/scripts/formatting/datastructure_suitesparse.py
index f9aeb3ae..823e8e13 100644
--- a/scripts/formatting/datastructure_suitesparse.py
+++ b/scripts/formatting/datastructure_suitesparse.py
@@ -219,7 +219,8 @@ def write_datastructure_bench(args, tensor, out_path, tiles=None):
 mtx_files = None
 if args.tiles:
     # get all mtx tile files from args.input_path
-    mtx_files = [os.path.join(args.input_path, fname) for fname in os.listdir(args.input_path) if fname.endswith(".mtx")]
+    # mtx_files = [os.path.join(args.input_path, fname) for fname in os.listdir(args.input_path) if fname.endswith(".mtx")]
+    mtx_files = [os.path.join(args.input_path, fname) for fname in os.listdir(args.input_path)]
 
     tensor = [SuiteSparseTensor(mtx_file) for mtx_file in mtx_files]
 elif args.input_path is not None:
diff --git a/scripts/suitesparse_memory_model_runner.sh b/scripts/suitesparse_memory_model_runner.sh
index ccea10e6..d2873266 100755
--- a/scripts/suitesparse_memory_model_runner.sh
+++ b/scripts/suitesparse_memory_model_runner.sh
@@ -31,7 +31,7 @@ mkdir -p $path
 mkdir -p $basedir/tiles/
 rm -rf $basedir/tiles/*
 
-./scripts/prepare_files.sh $fname.mtx $yaml_fname 
+./scripts/tiling/prepare_files.sh $fname.mtx $yaml_fname 
 
 cd $basedir/sam/sim
 # python3 -m pytest test/advanced-simulator/test_$bench.py --ssname $fname -s --check-gold --skip-empty --nbuffer --yaml_name=$yaml_fname  --benchmark-json=$path/mem_model_$fname.json 
@@ -39,6 +39,6 @@ cd $basedir/sam/sim
 
 # python3 $basedir/scripts/converter.py --json_name $path/mem_model_$fname.json	
 
-python3 $basedir/scripts/bench_csv_aggregator.py $path $basedir/$benchout/$bench.csv
+python3 $basedir/scripts/util/bench_csv_aggregator.py $path $basedir/$benchout/$bench.csv
 
 popd
diff --git a/scripts/tile_ext.sh b/scripts/tile_ext.sh
deleted file mode 100755
index 1df28a71..00000000
--- a/scripts/tile_ext.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-
-BENCHMARKS=(
-#   matmul_ijk
-#   mat_elemadd
-	# mat_elemmul
-	mat_mattransmul
-)
-
-# THIS FILE MUST BE RUN FROM sam/ location
-sspath=$SUITESPARSE_PATH
-
-basedir=$(pwd)
-
-ext_path=$basedir/extensor_mtx/$1
-
-echo "$ext_path"
-
-for b in ${!BENCHMARKS[@]}; do
-	bench=${BENCHMARKS[$b]}
-	path=$basedir/$benchout/$bench
-	mkdir -p $basedir/$benchout/$bench
-	echo "Testing $bench..."
-
-	rm -rf $basedir/tiles/*
-
-	echo "Tiling mtx file"
-	python3 $basedir/sam/sim/src/tiling/tile.py --tensor_type dataset --input_path $ext_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
-
-	echo "Generating input format files for $ext_path..."
-	python3 $basedir/scripts/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
-
-done
-
diff --git a/scripts/tiling/generate_gold_matmul_tiled.py b/scripts/tiling/generate_gold_matmul_tiled.py
index cd90cb33..2a069a0c 100644
--- a/scripts/tiling/generate_gold_matmul_tiled.py
+++ b/scripts/tiling/generate_gold_matmul_tiled.py
@@ -11,6 +11,7 @@
 from pathlib import Path
 from scripts.util.util import round_sparse
 
+app_name = "mat_mattransmul"
 
 def generate_gold_matmul_tiled(tile_crd_b, tile_crd_c, dirname, out_format="ss01"):
     # CSR
diff --git a/scripts/tiling/prepare_files.sh b/scripts/tiling/prepare_files.sh
index 5aece84f..3d24d037 100755
--- a/scripts/tiling/prepare_files.sh
+++ b/scripts/tiling/prepare_files.sh
@@ -12,4 +12,4 @@ rm -rf $basedir/tiles/*
 
 ./scripts/tiling/tile_ext.sh $1 memory_config_extensor_17M_llb.yaml
 
-python scripts/tiling/generate_gold_matmul_tiled.py --yaml_name memory_config_extensor_17M_llb.yaml
+python3 scripts/tiling/generate_gold_matmul_tiled.py --yaml_name memory_config_extensor_17M_llb.yaml
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 4548f466..16126e12 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -4,7 +4,8 @@
 # ./scripts/tiling/tile_ext.sh <tile_dir> <arch_config.yaml>
 
 BENCHMARKS=(
-  matmul_ikj
+#   matmul_ikj
+	mat_mattransmul
 )
 
 sspath=$SUITESPARSE_PATH
@@ -24,10 +25,11 @@ for b in ${!BENCHMARKS[@]}; do
 	rm -rf $basedir/tiles/*
 
 	echo "Tiling mtx file"
-	python $basedir/sam/sim/src/tiling/tile.py --extensor --input_path $tiles_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
+	# python $basedir/sam/sim/src/tiling/tile.py --extensor --input_path $tiles_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
+	python3 ./sam/sim/src/tiling/tile.py --tensor_type ss --input_tensor rel5 --cotile mat_mattransmul --multilevel --hw_config ./sam/sim/src/tiling/memory_config_onyx.yaml --higher_order
 
 	echo "Generating input format files for $tiles_path..."
-	python $basedir/scripts/formatting/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
+	python3 $basedir/scripts/formatting/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
 
 done
 
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index d20cb49a..e7195c99 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -13,11 +13,12 @@
 # for line in data_file_lines:
 #    data.append(line[:-1])
 
-if not os.path.exists("extensor_mtx"):
-   os.mkdir("extensor_mtx")
-
-if not os.path.exists("tiles_compiled"):
-   os.mkdir("tiles_compiled")
+os.environ["SUITESPARSE_PATH"] = "/nobackup/owhsu/sparse-datasets/suitesparse/"
+os.environ["FROSTT_PATH"] = "/nobackup/owhsu/sparse-datasets/frostt/"
+os.environ["SUITESPARSE_FORMATTED_PATH"] = "/home/avb03/sam/SUITESPARSE_FORMATTED"
+os.environ["FROSTT_FORMATTED_TACO_PATH"] = "/home/avb03/sam/FROST_FORMATTED_TACO"
+os.environ["FROSTT_FORMATTED_PATH"] = "/home/avb03/sam/FROST_FORMATTED"
+os.environ["TACO_TENSOR_PATH"] = "/home/avb03/sam/TACO_TENSOR"
 
 for datum in data:
    mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]

From 09fb1b0bf7ac2c249c949335b0755eb008bcd753 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Thu, 27 Jul 2023 16:32:26 -0700
Subject: [PATCH 21/39] Add in fixes to tile.py that broke during merge

---
 sam/sim/src/tiling/tile.py | 97 +++++++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 39 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index b4982c7c..bae03922 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -1,5 +1,6 @@
 import numpy as np
 import scipy.sparse
+import scipy.io
 import os
 import argparse
 import ast
@@ -8,28 +9,31 @@
 import pickle
 import random
 import sparse
+import sys
 
-from itertools import compress
 from pathlib import Path
 
-import sys
+from sam.util import SUITESPARSE_PATH, SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, \
+    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor
+from sam.sim.src.tiling.process_expr import parse_all
+
+# FIXME: This should not be here... Set your SAM_HOME directory
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
-from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, PydataSparseTensorDumper, SUITESPARSE_PATH, FROSTT_PATH
-from sam.sim.src.tiling.process_expr import parse_all, update_dict
-
-SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
+SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)",
             "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)"}
 
+
 def print_dict(dd):
     for k, v in dd.items():
         print(k, ":", v)
 
+
 def print_ast(node):
     for child in ast.iter_child_nodes(node):
         print_ast(child)
@@ -87,13 +91,16 @@ def parse_sam_input(string):
     permutations = [list(map(int, dictionary[tensor]["perm"])) for tensor in tensors]
     ivars = get_ivars(tensors, str_arr[0])
     ivars = [ivars[tensor] for tensor in tensors]
+
+    print("PARSE SAM INPUTS", tensors)
     return tensors, permutations, ivars
 
+
 # Outputs Pydata/sparse tensor tiles, given a pydata/sparse tensor (DOK or COO)
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
 #       In the future, will have to take into acocunt all reorderings
-def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None):
+def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
     human_readable = False
 
     tiles = dict()
@@ -155,12 +162,11 @@ def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None):
     return tiles, tile_sizes
 
 
-
 # Outputs COO tiles, given a COO tensor
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
 #       In the future, will have to take into acocunt all reorderings
-def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
+def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
     human_readable = False
 
     tiles = dict()
@@ -239,26 +245,25 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
         for dim in range(order):
             print("tensor format: ", tensor_format)
             print("dim is ", dim)
-            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim+1])
+            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
             print("ivar_map is ", ivar_map)
 
         if higher_order:
-            tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map)
+            tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map, tensor_name=tensor_name)
         else:
-            tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
-        
+            tiles, tile_sizes = tile_coo(tensor, tensor_name, ivar_map, split_map, tensor_name=tensor_name)
+
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
     return tiled_tensors, tiled_tensor_sizes
 
 
-def get_other_tensors(app_str, tensor):
-    tensors = []
-    tensors.append(tensor)
+def get_other_tensors(app_str, tensor, other_nonempty=True):
+    tensors = [tensor]
 
     if "matmul" in app_str:
         print("Writing shifted...")
@@ -283,15 +288,21 @@ def get_other_tensors(app_str, tensor):
         pass
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
-        rows, cols = tensor.shape # i,j
-        tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
-        tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
+        rows, cols = tensor.shape  # i,j
+        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+        tensor_d = scipy.sparse.random(rows, 1, data_rvs=np.ones).toarray().flatten()
+
+        if other_nonempty:
+            tensor_c[0] = 1
+            tensor_d[0] = 1
+
+        # import pdb; pdb.set_trace()
 
         tensors.append(tensor_c)
         tensors.append(tensor_d)
 
     elif "mat_residual" in app_str:
-        pass    
+        pass
     elif "mat_vecmul" in app_str:
         pass
     else:
@@ -307,7 +318,11 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-    import pdb; pdb.set_trace();
+    print("cotile_multilevel_coo tensors: ", names, "\n", tensors)
+
+    # import pdb
+    # pdb.set_trace()
+
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -345,7 +360,9 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                     # First iteration of tiling
                     print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
                     print("format_permutations: ", format_permutations)
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map, higher_order)
+                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map,
+                                                        higher_order)
+                    print("cotiled is ", cotiled)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
@@ -398,13 +415,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
     \n 'frostt' should always have 'higher_order' set as true.")
 
     parser.add_argument("--input_tensor", type=str, default=None,
-            help="Input tensor NAME if tensor_type is set to 'file'. \
+                        help="Input tensor NAME if tensor_type is set to 'file'. \
             This is for use with SuiteSparse or FROSTT")
     parser.add_argument("--input_path", type=str, default=None, help="Input tensor path")
     parser.add_argument("--output_dir_path", type=str, default="./tiles",
-            help='Output path, directory where tiles get written to')
+                        help='Output path, directory where tiles get written to')
     parser.add_argument("--hw_config", type=str, default=None,
-            help='Path to the hardware config yaml')
+                        help='Path to the hardware config yaml')
 
     parser.add_argument("--cotile", type=str, default=None, help='If \
             this is true cotile multiple tensors, else tile one tensor only')
@@ -412,6 +429,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             multilevel is true there will exist more than one level of tiles, \
             else only tile once')
     parser.add_argument("--seed", type=int, default=0, help="Random seed")
+    parser.add_argument("--other_nonempty", action="store_true",
+                        help="If this is enabled, the 'other' tensors will have at least one nonzero value")
 
     args = parser.parse_args()
 
@@ -428,22 +447,22 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
     elif args.tensor_type == "ex":
         tensor = scipy.io.mmread(args.input_path)
     elif args.tensor_type == "ss":
-            assert args.input_tensor is not None
-            tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
-            ss_tensor = SuiteSparseTensor(tensor_path)
-            tensor = inputCache.load(ss_tensor, False)
+        assert args.input_tensor is not None
+        tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
+        ss_tensor = SuiteSparseTensor(tensor_path)
+        tensor = inputCache.load(ss_tensor, False)
     elif args.tensor_type == "frostt":
-            assert args.input_tensor is not None
-            assert args.higher_order
+        assert args.input_tensor is not None
+        assert args.higher_order
 
-            tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
+        tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
 
-            # FIXME: This is broken
-            frostt_tensor = FrosttTensor(tensor_path)
-            tensor = inputCache.load(ss_tensor, False)
+        # FIXME: This is broken
+        frostt_tensor = FrosttTensor(tensor_path)
+        tensor = inputCache.load(frostt_tensor, False)
 
     else:
-        raise ValueError("This choice of 'tensor_type' is unreachable") 
+        raise ValueError("This choice of 'tensor_type' is unreachable")
 
     split_map = {"i": 16, "j": 16, "k": 16}
 
@@ -464,14 +483,14 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             assert args.cotile is not None
             cotiled_tensors = cotile_multilevel_coo(args.cotile, args.hw_config, [tensor],
                                                     os.path.join(args.output_dir_path,
-                                                        args.cotile),
+                                                                 args.cotile),
                                                     args.higher_order)
         elif args.cotile is not None:
             tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
             names, format_permutations, ivars = parse_sam_input(args.cotile)
 
             cotiled_tensors = cotile_coo(names, [tensor, tensor2],
-                    format_permutations, ivars, split_map, args.higher_order)
+                                         format_permutations, ivars, split_map, args.higher_order)
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
@@ -489,4 +508,4 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                     print(tile.shape)
                     tns_dumper.dump(tile, mtx_path_name)
                 else:
-                    scipy.io.mmwrite(mtx_path_name, tile)
\ No newline at end of file
+                    scipy.io.mmwrite(mtx_path_name, tile)

From 479a566cf0abb4e80396c010172833c5810269ab Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sat, 29 Jul 2023 10:54:44 -0700
Subject: [PATCH 22/39] formatting scripts mod

---
 sam/sim/src/tiling/tile.py | 8 +++++---
 scripts/tiling/tile_ext.sh | 2 ++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index bae03922..3f259de6 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -504,8 +504,10 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                 print("Output path:", mtx_path_name)
 
                 if args.higher_order:
-                    tns_dumper = PydataSparseTensorDumper()
+                    # tns_dumper = PydataSparseTensorDumper()
                     print(tile.shape)
-                    tns_dumper.dump(tile, mtx_path_name)
+                    # tns_dumper.dump(tile, mtx_path_name)
+                    # print(np.array(tile.todense()).reshape(1,-1))
+                    scipy.io.mmwrite(mtx_path_name, np.array(tile.todense()).reshape(1,-1))
                 else:
-                    scipy.io.mmwrite(mtx_path_name, tile)
+                    scipy.io.mmwrite(mtx_path_name, tile)
\ No newline at end of file
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 16126e12..36ce32ee 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -31,5 +31,7 @@ for b in ${!BENCHMARKS[@]}; do
 	echo "Generating input format files for $tiles_path..."
 	python3 $basedir/scripts/formatting/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
 
+	# $basedir/compiler/taco/build/bin/taco-test sam.pack_ss01
+	# python3 $basedir/scripts/formatting/datastructure_tns.py -n rel5 -f ss01 -b $bench -hw
 done
 

From 8846e91316b1e6f14bbad4b6c676e0d6a61a82bf Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sat, 29 Jul 2023 11:05:39 -0700
Subject: [PATCH 23/39] mtx vs tns file for vector tiling

---
 sam/sim/src/tiling/tile.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 3f259de6..bd964ab0 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -498,7 +498,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             for tile_id, tile in cotiled_tensors[name].items():
                 [str(item) for item in tile_id]
                 filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id])
-                filename += ".tns" if args.higher_order else ".mtx"
+                # filename += ".tns" if args.higher_order else ".mtx"
+                filename += ".mtx"
                 mtx_path_name = os.path.join(output_mtx_name, filename)
                 print(tile)
                 print("Output path:", mtx_path_name)

From 5a0998f6c1acbd1915d5cd20f53dd73739f51501 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sat, 29 Jul 2023 11:47:41 -0700
Subject: [PATCH 24/39] correct tile.py

---
 sam/sim/src/tiling/tile.py | 230 ++++++++-----------------------------
 1 file changed, 45 insertions(+), 185 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index bd964ab0..88be95f2 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -1,6 +1,5 @@
 import numpy as np
 import scipy.sparse
-import scipy.io
 import os
 import argparse
 import ast
@@ -8,32 +7,28 @@
 import copy
 import pickle
 import random
-import sparse
-import sys
 
+from itertools import compress
 from pathlib import Path
 
-from sam.util import SUITESPARSE_PATH, SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, \
-    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor
-from sam.sim.src.tiling.process_expr import parse_all
-
-# FIXME: This should not be here... Set your SAM_HOME directory
+import sys
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
-SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)",
+from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter
+from sam.sim.src.tiling.process_expr import parse_all, update_dict
+
+SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
             "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)"}
 
-
 def print_dict(dd):
     for k, v in dd.items():
         print(k, ":", v)
 
-
 def print_ast(node):
     for child in ast.iter_child_nodes(node):
         print_ast(child)
@@ -91,82 +86,14 @@ def parse_sam_input(string):
     permutations = [list(map(int, dictionary[tensor]["perm"])) for tensor in tensors]
     ivars = get_ivars(tensors, str_arr[0])
     ivars = [ivars[tensor] for tensor in tensors]
-
-    print("PARSE SAM INPUTS", tensors)
     return tensors, permutations, ivars
 
 
-# Outputs Pydata/sparse tensor tiles, given a pydata/sparse tensor (DOK or COO)
-# ASSUME: tensor is a scipy.sparse.coo_matrix
-# TODO: new_ivar_order right now is assumed to be one fixed order
-#       In the future, will have to take into acocunt all reorderings
-def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
-    human_readable = False
-
-    tiles = dict()
-    tile_sizes = dict()
-    order = len(tensor.shape)
-
-    tensor_coo = sparse.COO(tensor)
-    tensor_points = sparse.DOK.from_coo(tensor_coo)
-
-    print("ivar_map: ", ivar_map)
-    print("split_map: ", split_map)
-    print("order = ", order)
-
-    new_shape = []
-    for lvl in range(order):
-        ivar = ivar_map[lvl]
-        sf = split_map[ivar]
-        new_shape.append(sf)
-
-    for crds, val in tensor_points.data.items():
-        point = list(crds)
-
-        new_point = []
-        tile_id = []
-        for lvl in range(order):
-            ivar = ivar_map[lvl]
-            sf = split_map[ivar]
-
-            new_point.append(point[lvl] % sf)
-            tile_id.append(int(point[lvl] / sf))
-
-        # Add in value to the new_point as well
-        new_point.append(val)
-        tile_id = tuple(tile_id)
-
-        if tile_id in tiles:
-            tiles[tile_id].append(new_point)
-        else:
-            tiles[tile_id] = [new_point]
-
-    # sort the new coo lists
-    for key, val in tiles.items():
-        if human_readable:
-            dok = sorted(val)
-        else:
-            dok = sparse.DOK(tuple(new_shape))
-            for point in val:
-                dok[tuple(point[0:-1])] = point[-1]
-
-        tiles[key] = dok
-
-    for tile_id, tile_dok in tiles.items():
-        tile = tile_dok.to_coo()
-        # FIXME: This size number isn't correct for tensor tiles
-        nonempty_rows = tile.nnz
-        nonempty_row_ind = np.where(nonempty_rows > 0)[0]
-        tile_sizes[tile_id] = tile.nnz * 2 + 2 * len(nonempty_row_ind) + 3
-
-    return tiles, tile_sizes
-
-
 # Outputs COO tiles, given a COO tensor
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
 #       In the future, will have to take into acocunt all reorderings
-def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
+def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
     human_readable = False
 
     tiles = dict()
@@ -232,7 +159,7 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
 # permutation_strs: list of permutation_strs [ss01, ss10] following tensor_names (from SAM)
 # ivar_strs: list of ivar_strs ["ik", "kj"] following tensor_names (from SAM)
 # split_map: dictionary of split factors (from hardware)
-def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, higher_order=False):
+def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
     tiled_tensors = dict()
     tiled_tensor_sizes = dict()
 
@@ -245,25 +172,22 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
         for dim in range(order):
             print("tensor format: ", tensor_format)
             print("dim is ", dim)
-            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
+            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim+1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
             print("ivar_map is ", ivar_map)
 
-        if higher_order:
-            tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map, tensor_name=tensor_name)
-        else:
-            tiles, tile_sizes = tile_coo(tensor, tensor_name, ivar_map, split_map, tensor_name=tensor_name)
-
+        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
     return tiled_tensors, tiled_tensor_sizes
 
 
-def get_other_tensors(app_str, tensor, other_nonempty=True):
-    tensors = [tensor]
+def get_other_tensors(app_str, tensor):
+    tensors = []
+    tensors.append(tensor)
 
     if "matmul" in app_str:
         print("Writing shifted...")
@@ -288,21 +212,15 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
         pass
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
-        rows, cols = tensor.shape  # i,j
-        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
-        tensor_d = scipy.sparse.random(rows, 1, data_rvs=np.ones).toarray().flatten()
-
-        if other_nonempty:
-            tensor_c[0] = 1
-            tensor_d[0] = 1
-
-        # import pdb; pdb.set_trace()
+        rows, cols = tensor.shape # i,j
+        tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
+        tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
 
         tensors.append(tensor_c)
         tensors.append(tensor_d)
 
     elif "mat_residual" in app_str:
-        pass
+        pass    
     elif "mat_vecmul" in app_str:
         pass
     else:
@@ -313,16 +231,12 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
     return tensors
 
 
-def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, higher_order=False):
+def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
     tensors = get_other_tensors(app_str, tensors[0])
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-    print("cotile_multilevel_coo tensors: ", names, "\n", tensors)
-
-    # import pdb
-    # pdb.set_trace()
-
+    import pdb; pdb.set_trace();
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -360,9 +274,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                     # First iteration of tiling
                     print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
                     print("format_permutations: ", format_permutations)
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map,
-                                                        higher_order)
-                    print("cotiled is ", cotiled)
+                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
@@ -372,13 +284,9 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                         new_cotiled[name] = {}
                         new_cotiled_sizes[name] = {}
                         for tile_id, tile in cotiled[name].items():
-                            if higher_order:
-                                tile_in_coo = tile.to_coo()
-                            else:
-                                tile_in_coo = tile.tocoo()
-                            new_cotiled_temp, new_cotiled_sizes_temp = cotile_coo(name, [tile_in_coo],
+                            new_cotiled_temp, new_cotiled_sizes_temp = cotile_coo(name, [tile.tocoo()],
                                                                                   [format_permutations[i]], [ivars[i]],
-                                                                                  split_map, higher_order)
+                                                                                  split_map)
 
                             for kk, vv in copy.deepcopy(new_cotiled_temp)[name].items():
                                 new_tile_id = tuple(list(tile_id) + list(kk))
@@ -401,68 +309,31 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 inputCache = InputCacheSuiteSparse()
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='script that tiles tensors')
-    parser.add_argument("--tensor_type", choices=['ex', 'gen', 'file', 'ss', 'frostt'], help='The \
-        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map) \
-        type of tensor to tile: extensor(ex), generated (gen), \
-        SuiteSparse (ss), FROSTT (frostt), or input file (file)')
-    parser.add_argument("--higher_order", action="store_true", help="If \
-    true then we want to process a higher-order tensor. With higher-order set to true, if \
-    'tensor_type' is: \
-    \n 'gen' then a 3-tensor is generated instead of matrix. \
-    \n 'file' then a .tns file is read instead of a .mtx file. \
-    \n 'ss' then other matrices used with SuiteSparse are .tns instead of .mtx files. \
-    \n 'frostt' should always have 'higher_order' set as true.")
-
-    parser.add_argument("--input_tensor", type=str, default=None,
-                        help="Input tensor NAME if tensor_type is set to 'file'. \
-            This is for use with SuiteSparse or FROSTT")
-    parser.add_argument("--input_path", type=str, default=None, help="Input tensor path")
-    parser.add_argument("--output_dir_path", type=str, default="./tiles",
-                        help='Output path, directory where tiles get written to')
-    parser.add_argument("--hw_config", type=str, default=None,
-                        help='Path to the hardware config yaml')
-
-    parser.add_argument("--cotile", type=str, default=None, help='If \
-            this is true cotile multiple tensors, else tile one tensor only')
-    parser.add_argument("--multilevel", action="store_true", help='If \
-            multilevel is true there will exist more than one level of tiles, \
-            else only tile once')
-    parser.add_argument("--seed", type=int, default=0, help="Random seed")
-    parser.add_argument("--other_nonempty", action="store_true",
-                        help="If this is enabled, the 'other' tensors will have at least one nonzero value")
+    parser = argparse.ArgumentParser(description='Tile matrices')
+    parser.add_argument("--input_tensor", type=str, default=None)
+    parser.add_argument("--gen_tensor", action="store_false")
+    parser.add_argument("--cotile", type=str, default=None)
+    parser.add_argument("--output_dir_path", type=str, default="./tiles")
+    parser.add_argument("--hw_config", type=str, default=None)
+    parser.add_argument("--multilevel", action="store_true")
+    parser.add_argument("--input_path", type=str, default=None)
+    parser.add_argument("--extensor", action="store_true")
 
     args = parser.parse_args()
 
-    random.seed(args.seed)
-    np.random.seed(args.seed)
-
     tensor = None
     cwd = os.getcwd()
-    if args.tensor_type == "gen":
-        if args.higher_order:
-            tensor = sparse.COO(sparse.random((16, 16, 16)))
-        else:
-            tensor = scipy.sparse.random(16, 16)
-    elif args.tensor_type == "ex":
+    if args.gen_tensor:
+        tensor = scipy.sparse.random(16, 16)
+    elif args.extensor:
         tensor = scipy.io.mmread(args.input_path)
-    elif args.tensor_type == "ss":
+    else:
         assert args.input_tensor is not None
-        tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
+        SS_PATH = os.getenv('SUITESPARSE_PATH', default=os.path.join(cwd, 'suitesparse'))
+        # print("PATH:", SS_PATH)
+        tensor_path = os.path.join(SS_PATH, args.input_tensor + ".mtx")
         ss_tensor = SuiteSparseTensor(tensor_path)
         tensor = inputCache.load(ss_tensor, False)
-    elif args.tensor_type == "frostt":
-        assert args.input_tensor is not None
-        assert args.higher_order
-
-        tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
-
-        # FIXME: This is broken
-        frostt_tensor = FrosttTensor(tensor_path)
-        tensor = inputCache.load(frostt_tensor, False)
-
-    else:
-        raise ValueError("This choice of 'tensor_type' is unreachable")
 
     split_map = {"i": 16, "j": 16, "k": 16}
 
@@ -474,6 +345,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
         print("TILES:")
         print_dict(tiles)
     else:
+
         output_mtx_name = os.path.join(args.output_dir_path, args.cotile, "mtx")
         output_mtx_path = Path(output_mtx_name)
         output_mtx_path.mkdir(parents=True, exist_ok=True)
@@ -482,33 +354,21 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
         if args.multilevel:
             assert args.cotile is not None
             cotiled_tensors = cotile_multilevel_coo(args.cotile, args.hw_config, [tensor],
-                                                    os.path.join(args.output_dir_path,
-                                                                 args.cotile),
-                                                    args.higher_order)
+                                                    os.path.join(args.output_dir_path, args.cotile))
         elif args.cotile is not None:
             tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
             names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-            cotiled_tensors = cotile_coo(names, [tensor, tensor2],
-                                         format_permutations, ivars, split_map, args.higher_order)
+            cotiled_tensors = cotile_coo(names, [tensor, tensor2], format_permutations, ivars, split_map)
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
         for name in names:
             for tile_id, tile in cotiled_tensors[name].items():
                 [str(item) for item in tile_id]
-                filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id])
-                # filename += ".tns" if args.higher_order else ".mtx"
-                filename += ".mtx"
+                filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id]) + ".mtx"
                 mtx_path_name = os.path.join(output_mtx_name, filename)
                 print(tile)
-                print("Output path:", mtx_path_name)
-
-                if args.higher_order:
-                    # tns_dumper = PydataSparseTensorDumper()
-                    print(tile.shape)
-                    # tns_dumper.dump(tile, mtx_path_name)
-                    # print(np.array(tile.todense()).reshape(1,-1))
-                    scipy.io.mmwrite(mtx_path_name, np.array(tile.todense()).reshape(1,-1))
-                else:
-                    scipy.io.mmwrite(mtx_path_name, tile)
\ No newline at end of file
+                print(mtx_path_name, cwd)
+                scipy.io.mmwrite(mtx_path_name, tile)
+                print(os.path.exists(mtx_path_name))
\ No newline at end of file

From 91ad37f88f71a0633e641192d4e87e9392f94498 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Mon, 31 Jul 2023 13:37:25 -0700
Subject: [PATCH 25/39] orig tile files

---
 sam/sim/src/tiling/tile.py              | 252 +++++++++++++++++++-----
 scripts/formatting/datastructure_tns.py |   2 +-
 2 files changed, 204 insertions(+), 50 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 88be95f2..10cf469c 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -1,5 +1,6 @@
 import numpy as np
 import scipy.sparse
+import scipy.io
 import os
 import argparse
 import ast
@@ -7,28 +8,33 @@
 import copy
 import pickle
 import random
+import sparse
+import sys
 
-from itertools import compress
 from pathlib import Path
 
-import sys
+from sam.util import SUITESPARSE_PATH, SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, \
+    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor
+from sam.sim.src.tiling.process_expr import parse_all
+
+# FIXME: This should not be here... Set your SAM_HOME directory
 custom_path = '/home/avb03/sam'
 sys.path.append(custom_path)
 
-from sam.util import SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter
-from sam.sim.src.tiling.process_expr import parse_all, update_dict
-
-SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)", 
+SAM_STRS = {"matmul_kij": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss:1,0 -f=C:ss -s=reorder(k,i,j)",
             "matmul_ikj": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss -s=reorder(i,k,j)",
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
-            "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)"}
+            "mat_mattransmul": "X(i,j)=C(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)",
+            "mat_vecmul_ij" : "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)"}
+
 
 def print_dict(dd):
     for k, v in dd.items():
         print(k, ":", v)
 
+
 def print_ast(node):
     for child in ast.iter_child_nodes(node):
         print_ast(child)
@@ -86,14 +92,82 @@ def parse_sam_input(string):
     permutations = [list(map(int, dictionary[tensor]["perm"])) for tensor in tensors]
     ivars = get_ivars(tensors, str_arr[0])
     ivars = [ivars[tensor] for tensor in tensors]
+
+    print("PARSE SAM INPUTS", tensors)
     return tensors, permutations, ivars
 
 
+# Outputs Pydata/sparse tensor tiles, given a pydata/sparse tensor (DOK or COO)
+# ASSUME: tensor is a scipy.sparse.coo_matrix
+# TODO: new_ivar_order right now is assumed to be one fixed order
+#       In the future, will have to take into acocunt all reorderings
+def tile_tensor(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
+    human_readable = False
+
+    tiles = dict()
+    tile_sizes = dict()
+    order = len(tensor.shape)
+
+    tensor_coo = sparse.COO(tensor)
+    tensor_points = sparse.DOK.from_coo(tensor_coo)
+
+    print("ivar_map: ", ivar_map)
+    print("split_map: ", split_map)
+    print("order = ", order)
+
+    new_shape = []
+    for lvl in range(order):
+        ivar = ivar_map[lvl]
+        sf = split_map[ivar]
+        new_shape.append(sf)
+
+    for crds, val in tensor_points.data.items():
+        point = list(crds)
+
+        new_point = []
+        tile_id = []
+        for lvl in range(order):
+            ivar = ivar_map[lvl]
+            sf = split_map[ivar]
+
+            new_point.append(point[lvl] % sf)
+            tile_id.append(int(point[lvl] / sf))
+
+        # Add in value to the new_point as well
+        new_point.append(val)
+        tile_id = tuple(tile_id)
+
+        if tile_id in tiles:
+            tiles[tile_id].append(new_point)
+        else:
+            tiles[tile_id] = [new_point]
+
+    # sort the new coo lists
+    for key, val in tiles.items():
+        if human_readable:
+            dok = sorted(val)
+        else:
+            dok = sparse.DOK(tuple(new_shape))
+            for point in val:
+                dok[tuple(point[0:-1])] = point[-1]
+
+        tiles[key] = dok
+
+    for tile_id, tile_dok in tiles.items():
+        tile = tile_dok.to_coo()
+        # FIXME: This size number isn't correct for tensor tiles
+        nonempty_rows = tile.nnz
+        nonempty_row_ind = np.where(nonempty_rows > 0)[0]
+        tile_sizes[tile_id] = tile.nnz * 2 + 2 * len(nonempty_row_ind) + 3
+
+    return tiles, tile_sizes
+
+
 # Outputs COO tiles, given a COO tensor
 # ASSUME: tensor is a scipy.sparse.coo_matrix
 # TODO: new_ivar_order right now is assumed to be one fixed order
 #       In the future, will have to take into acocunt all reorderings
-def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
+def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
     human_readable = False
 
     tiles = dict()
@@ -159,7 +233,7 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None):
 # permutation_strs: list of permutation_strs [ss01, ss10] following tensor_names (from SAM)
 # ivar_strs: list of ivar_strs ["ik", "kj"] following tensor_names (from SAM)
 # split_map: dictionary of split factors (from hardware)
-def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
+def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, higher_order=False):
     tiled_tensors = dict()
     tiled_tensor_sizes = dict()
 
@@ -172,22 +246,25 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map):
         for dim in range(order):
             print("tensor format: ", tensor_format)
             print("dim is ", dim)
-            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim+1])
+            print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
             print("ivar_map is ", ivar_map)
 
-        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map)
+        if higher_order:
+            tiles, tile_sizes = tile_tensor(tensor, ivar_map, split_map, tensor_name=tensor_name)
+        else:
+            tiles, tile_sizes = tile_coo(tensor, tensor_name, ivar_map, split_map, tensor_name=tensor_name)
+
         tiled_tensors[tensor_name] = tiles
         tiled_tensor_sizes[tensor_name] = tile_sizes
 
     return tiled_tensors, tiled_tensor_sizes
 
 
-def get_other_tensors(app_str, tensor):
-    tensors = []
-    tensors.append(tensor)
+def get_other_tensors(app_str, tensor, other_nonempty=True):
+    tensors = [tensor]
 
     if "matmul" in app_str:
         print("Writing shifted...")
@@ -212,17 +289,32 @@ def get_other_tensors(app_str, tensor):
         pass
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
-        rows, cols = tensor.shape # i,j
-        tensor_c = scipy.sparse.random(cols, 1).toarray().flatten()
-        tensor_d = scipy.sparse.random(rows, 1).toarray().flatten()
+        rows, cols = tensor.shape  # i,j
+        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+        # tensor_d = scipy.sparse.random(rows, 1, density=1.0, data_rvs=np.ones).toarray().flatten()
+        tensor_d = scipy.sparse.random(rows, 1, data_rvs=np.ones).toarray().flatten()
+
+        if other_nonempty:
+            tensor_c[0] = 1
+            tensor_d[0] = 1
+
+        # import pdb; pdb.set_trace()
 
         tensors.append(tensor_c)
         tensors.append(tensor_d)
 
     elif "mat_residual" in app_str:
-        pass    
-    elif "mat_vecmul" in app_str:
         pass
+    elif "mat_vecmul" in app_str:
+        print("Writing other tensors...")
+        rows, cols = tensor.shape
+        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+
+        if other_nonempty:
+            tensor_c[0] = 1
+        
+        tensors.append(tensor_c)
+
     else:
         tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
         tensors.append(tensor2)
@@ -231,12 +323,16 @@ def get_other_tensors(app_str, tensor):
     return tensors
 
 
-def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
+def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, higher_order=False):
     tensors = get_other_tensors(app_str, tensors[0])
 
     names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-    import pdb; pdb.set_trace();
+    print("cotile_multilevel_coo tensors: ", names, "\n", tensors)
+
+    # import pdb
+    # pdb.set_trace()
+
     sizes_dict = {}
     for i, name in enumerate(names):
         tensor = tensors[i]
@@ -272,9 +368,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 
                 if cotiled is None:
                     # First iteration of tiling
-                    print("tensor shapes: ", tensors[0].shape, " ", tensors[1].shape, " ", tensors[2].shape)
-                    print("format_permutations: ", format_permutations)
-                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map)
+                    cotiled, cotiled_sizes = cotile_coo(names, tensors, format_permutations, ivars, split_map,
+                                                        higher_order)
                 else:
                     # recursively tile the blocks
                     new_cotiled = {}
@@ -284,9 +379,13 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
                         new_cotiled[name] = {}
                         new_cotiled_sizes[name] = {}
                         for tile_id, tile in cotiled[name].items():
-                            new_cotiled_temp, new_cotiled_sizes_temp = cotile_coo(name, [tile.tocoo()],
+                            if higher_order:
+                                tile_in_coo = tile.to_coo()
+                            else:
+                                tile_in_coo = tile.tocoo()
+                            new_cotiled_temp, new_cotiled_sizes_temp = cotile_coo(name, [tile_in_coo],
                                                                                   [format_permutations[i]], [ivars[i]],
-                                                                                  split_map)
+                                                                                  split_map, higher_order)
 
                             for kk, vv in copy.deepcopy(new_cotiled_temp)[name].items():
                                 new_tile_id = tuple(list(tile_id) + list(kk))
@@ -309,31 +408,68 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
 inputCache = InputCacheSuiteSparse()
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Tile matrices')
-    parser.add_argument("--input_tensor", type=str, default=None)
-    parser.add_argument("--gen_tensor", action="store_false")
-    parser.add_argument("--cotile", type=str, default=None)
-    parser.add_argument("--output_dir_path", type=str, default="./tiles")
-    parser.add_argument("--hw_config", type=str, default=None)
-    parser.add_argument("--multilevel", action="store_true")
-    parser.add_argument("--input_path", type=str, default=None)
-    parser.add_argument("--extensor", action="store_true")
+    parser = argparse.ArgumentParser(description='script that tiles tensors')
+    parser.add_argument("--tensor_type", choices=['ex', 'gen', 'file', 'ss', 'frostt'], help='The \
+        tiles, tile_sizes = tile_coo(tensor, ivar_map, split_map) \
+        type of tensor to tile: extensor(ex), generated (gen), \
+        SuiteSparse (ss), FROSTT (frostt), or input file (file)')
+    parser.add_argument("--higher_order", action="store_true", help="If \
+    true then we want to process a higher-order tensor. With higher-order set to true, if \
+    'tensor_type' is: \
+    \n 'gen' then a 3-tensor is generated instead of matrix. \
+    \n 'file' then a .tns file is read instead of a .mtx file. \
+    \n 'ss' then other matrices used with SuiteSparse are .tns instead of .mtx files. \
+    \n 'frostt' should always have 'higher_order' set as true.")
+
+    parser.add_argument("--input_tensor", type=str, default=None,
+                        help="Input tensor NAME if tensor_type is set to 'file'. \
+            This is for use with SuiteSparse or FROSTT")
+    parser.add_argument("--input_path", type=str, default=None, help="Input tensor path")
+    parser.add_argument("--output_dir_path", type=str, default="./tiles",
+                        help='Output path, directory where tiles get written to')
+    parser.add_argument("--hw_config", type=str, default=None,
+                        help='Path to the hardware config yaml')
+
+    parser.add_argument("--cotile", type=str, default=None, help='If \
+            this is true cotile multiple tensors, else tile one tensor only')
+    parser.add_argument("--multilevel", action="store_true", help='If \
+            multilevel is true there will exist more than one level of tiles, \
+            else only tile once')
+    parser.add_argument("--seed", type=int, default=0, help="Random seed")
+    parser.add_argument("--other_nonempty", action="store_true",
+                        help="If this is enabled, the 'other' tensors will have at least one nonzero value")
 
     args = parser.parse_args()
 
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+
     tensor = None
     cwd = os.getcwd()
-    if args.gen_tensor:
-        tensor = scipy.sparse.random(16, 16)
-    elif args.extensor:
+    if args.tensor_type == "gen":
+        if args.higher_order:
+            tensor = sparse.COO(sparse.random((16, 16, 16)))
+        else:
+            tensor = scipy.sparse.random(16, 16)
+    elif args.tensor_type == "ex":
         tensor = scipy.io.mmread(args.input_path)
-    else:
+    elif args.tensor_type == "ss":
         assert args.input_tensor is not None
-        SS_PATH = os.getenv('SUITESPARSE_PATH', default=os.path.join(cwd, 'suitesparse'))
-        # print("PATH:", SS_PATH)
-        tensor_path = os.path.join(SS_PATH, args.input_tensor + ".mtx")
+        tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
         ss_tensor = SuiteSparseTensor(tensor_path)
         tensor = inputCache.load(ss_tensor, False)
+    elif args.tensor_type == "frostt":
+        assert args.input_tensor is not None
+        assert args.higher_order
+
+        tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
+
+        # FIXME: This is broken
+        frostt_tensor = FrosttTensor(tensor_path)
+        tensor = inputCache.load(frostt_tensor, False)
+
+    else:
+        raise ValueError("This choice of 'tensor_type' is unreachable")
 
     split_map = {"i": 16, "j": 16, "k": 16}
 
@@ -345,7 +481,6 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
         print("TILES:")
         print_dict(tiles)
     else:
-
         output_mtx_name = os.path.join(args.output_dir_path, args.cotile, "mtx")
         output_mtx_path = Path(output_mtx_name)
         output_mtx_path.mkdir(parents=True, exist_ok=True)
@@ -354,21 +489,40 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path):
         if args.multilevel:
             assert args.cotile is not None
             cotiled_tensors = cotile_multilevel_coo(args.cotile, args.hw_config, [tensor],
-                                                    os.path.join(args.output_dir_path, args.cotile))
+                                                    os.path.join(args.output_dir_path,
+                                                                 args.cotile),
+                                                    args.higher_order)
         elif args.cotile is not None:
             tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
             names, format_permutations, ivars = parse_sam_input(args.cotile)
 
-            cotiled_tensors = cotile_coo(names, [tensor, tensor2], format_permutations, ivars, split_map)
+            cotiled_tensors = cotile_coo(names, [tensor, tensor2],
+                                         format_permutations, ivars, split_map, args.higher_order)
             # print(cotiled_tensors)
 
         names = cotiled_tensors.keys()
         for name in names:
             for tile_id, tile in cotiled_tensors[name].items():
                 [str(item) for item in tile_id]
-                filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id]) + ".mtx"
+                filename = "tensor_" + name + "_tile_" + "_".join([str(item) for item in tile_id])
+                # filename += ".tns" if args.higher_order else ".mtx"
+                filename += ".mtx"
                 mtx_path_name = os.path.join(output_mtx_name, filename)
                 print(tile)
-                print(mtx_path_name, cwd)
-                scipy.io.mmwrite(mtx_path_name, tile)
-                print(os.path.exists(mtx_path_name))
\ No newline at end of file
+                print("Output path:", mtx_path_name)
+
+                if args.higher_order:
+                    # tns_dumper = PydataSparseTensorDumper()
+                    print(tile.shape)
+                    # print(tile)
+                    # tns_dumper.dump(tile, mtx_path_name)
+                    
+                    if len(tile.shape) == 1:  
+                        # print(np.array(tile.todense()).reshape(1,-1))
+                        scipy.io.mmwrite(mtx_path_name, scipy.sparse.coo_matrix(tile.todense()))
+                    else:
+                        # print(tile.todense())
+                        scipy.io.mmwrite(mtx_path_name, scipy.sparse.coo_matrix(tile.todense()))
+
+                else:
+                    scipy.io.mmwrite(mtx_path_name, tile)
\ No newline at end of file
diff --git a/scripts/formatting/datastructure_tns.py b/scripts/formatting/datastructure_tns.py
index be0fa6c8..db9e1951 100644
--- a/scripts/formatting/datastructure_tns.py
+++ b/scripts/formatting/datastructure_tns.py
@@ -10,7 +10,7 @@
 from pathlib import Path
 from sam.util import parse_taco_format
 
-from util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
+from scripts.util.util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
 # custom_path = '/nobackup/jadivara/sam/sam/util.py'
 # sys.path.append(custom_path)
 # from  import SUITESPARSE_FORMATTED_PATH, ScipyTensorShifter

From 3947620230754b7a5c1fd20e546a9efd20e88d07 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Mon, 31 Jul 2023 14:39:39 -0700
Subject: [PATCH 26/39] Add in fixes to get tensor apps working

---
 sam/sim/src/tiling/tile.py | 49 +++++++++++++++++++++++++++-----------
 sam/util.py                |  2 +-
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 10cf469c..ad430463 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -27,7 +27,8 @@
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_mattransmul": "X(i,j)=C(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)",
-            "mat_vecmul_ij" : "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)"}
+            "mat_vecmul_ij": "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)",
+            "tensor3_ttv": "X(i,j)=B(i,j,k)*c(k) -f=X:ss -f=B:sss -f=c:s"}
 
 
 def print_dict(dd):
@@ -229,7 +230,7 @@ def tile_coo(tensor, ivar_map, split_map, new_ivar_order=None, tensor_name=""):
 
 
 # tensor_names: list of tensor names [B,C,D] (from SAM)
-# tensors: list of scipy.sparse.coo_matrix following tensor_names (from SAM)
+# tensors: list of sparse COO tensors (either Scipy or Pydata/Sparse) following tensor_names (from SAM)
 # permutation_strs: list of permutation_strs [ss01, ss10] following tensor_names (from SAM)
 # ivar_strs: list of ivar_strs ["ik", "kj"] following tensor_names (from SAM)
 # split_map: dictionary of split factors (from hardware)
@@ -237,6 +238,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
     tiled_tensors = dict()
     tiled_tensor_sizes = dict()
 
+    print(tensor_names, tensors, permutation_strs, ivar_strs, split_map)
     for i, tensor in enumerate(tensors):
         tensor_name = tensor_names[i]
         tensor_format = permutation_strs[i]
@@ -245,6 +247,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
         print("order is ", order)
         for dim in range(order):
             print("tensor format: ", tensor_format)
+
             print("dim is ", dim)
             print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
             lvl_permutation = tensor_format[dim:dim + 1][0]
@@ -266,6 +269,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
 def get_other_tensors(app_str, tensor, other_nonempty=True):
     tensors = [tensor]
 
+
     if "matmul" in app_str:
         print("Writing shifted...")
         shifted = ScipyTensorShifter().shiftLastMode(tensor)
@@ -312,13 +316,22 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
 
         if other_nonempty:
             tensor_c[0] = 1
-        
+
         tensors.append(tensor_c)
 
+    elif "tensor3_ttv" in app_str:
+        print("Writing other tensors...")
+        size_i, size_j, size_k = tensor.shape  # i,j,k
+        tensor_c = scipy.sparse.random(size_k, 1, data_rvs=np.ones).toarray().flatten()
+
+        if other_nonempty:
+            tensor_c[0] = 1
+
+        tensors.append(tensor_c)
     else:
-        tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
-        tensors.append(tensor2)
-        # raise NotImplementedError
+        # tensor2 = scipy.sparse.random(tensor.shape[0], tensor.shape[1])
+        # tensors.append(tensor2)
+        raise NotImplementedError
 
     return tensors
 
@@ -405,7 +418,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
             print(exc)
 
 
-inputCache = InputCacheSuiteSparse()
+inputCacheSuiteSparse = InputCacheSuiteSparse()
+inputCacheTensor = InputCacheTensor()
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='script that tiles tensors')
@@ -446,6 +460,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
 
     tensor = None
     cwd = os.getcwd()
+    inputCache = None
+
     if args.tensor_type == "gen":
         if args.higher_order:
             tensor = sparse.COO(sparse.random((16, 16, 16)))
@@ -455,6 +471,8 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
         tensor = scipy.io.mmread(args.input_path)
     elif args.tensor_type == "ss":
         assert args.input_tensor is not None
+
+        inputCache = inputCacheSuiteSparse
         tensor_path = os.path.join(SUITESPARSE_PATH, args.input_tensor + ".mtx")
         ss_tensor = SuiteSparseTensor(tensor_path)
         tensor = inputCache.load(ss_tensor, False)
@@ -462,6 +480,7 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
         assert args.input_tensor is not None
         assert args.higher_order
 
+        inputCache = inputCacheTensor
         tensor_path = os.path.join(FROSTT_PATH, args.input_tensor + ".tns")
 
         # FIXME: This is broken
@@ -512,12 +531,14 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                 print("Output path:", mtx_path_name)
 
                 if args.higher_order:
-                    # tns_dumper = PydataSparseTensorDumper()
-                    print(tile.shape)
-                    # print(tile)
-                    # tns_dumper.dump(tile, mtx_path_name)
-                    
-                    if len(tile.shape) == 1:  
+                    if args.tensor_type == "frostt":
+                        tns_dumper = PydataSparseTensorDumper()
+                        print(tile.shape)
+                        print(tile)
+                        tns_dumper.dump(tile, mtx_path_name)
+
+                    # FIXME: (owhsu) Why did avb03 add this in?
+                    elif len(tile.shape) == 1:
                         # print(np.array(tile.todense()).reshape(1,-1))
                         scipy.io.mmwrite(mtx_path_name, scipy.sparse.coo_matrix(tile.todense()))
                     else:
@@ -525,4 +546,4 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                         scipy.io.mmwrite(mtx_path_name, scipy.sparse.coo_matrix(tile.todense()))
 
                 else:
-                    scipy.io.mmwrite(mtx_path_name, tile)
\ No newline at end of file
+                    scipy.io.mmwrite(mtx_path_name, tile)
diff --git a/sam/util.py b/sam/util.py
index b92f8052..fbe308c9 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -534,7 +534,7 @@ def __init__(self):
         self.lastName = None
         self.tensor = None
 
-    def load(self, tensor, suiteSparse, cast, format_str):
+    def load(self, tensor, cast):
         if self.lastName == str(tensor):
             return self.tensor
         else:

From 870205dcbaa3d7216ccefc26070a05782ea4c507 Mon Sep 17 00:00:00 2001
From: Olivia Hsu <owhsu@stanford.edu>
Date: Wed, 9 Aug 2023 13:59:14 -0700
Subject: [PATCH 27/39] Change suitesparse file formatting

---
 scripts/datastructure_suitesparse.py          | 281 ++++++++++++++++++
 .../formatting/datastructure_suitesparse.py   |   2 +-
 2 files changed, 282 insertions(+), 1 deletion(-)
 create mode 100644 scripts/datastructure_suitesparse.py

diff --git a/scripts/datastructure_suitesparse.py b/scripts/datastructure_suitesparse.py
new file mode 100644
index 00000000..cb24ec39
--- /dev/null
+++ b/scripts/datastructure_suitesparse.py
@@ -0,0 +1,281 @@
+import argparse
+import os
+import shutil
+import scipy.sparse
+import numpy as np
+
+from pathlib import Path
+
+from util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
+from sam.util import SUITESPARSE_FORMATTED_PATH, ScipyTensorShifter
+
+all_formats = ["coo", "cooT", "csr", "dcsr", "dcsc", "csc", "dense", "denseT"]
+formats = ["coo", "cooT", "csr", "dcsr", "dcsc", "csc", "dense"]
+scipy_formats = ["coo", "csr", "csc"]
+
+
+def write_datastructure_tiles(args, tensor, out_path, tile_name):
+    print("Writing " + args.name + " for test " + args.benchname + "...")
+
+    dirname = args.output_dir_path if args.output_dir_path is not None else os.path.join(out_path, args.name, args.benchname)
+    dirname = os.path.join(dirname, tile_name)
+    dirpath = Path(dirname)
+    if os.path.exists(dirpath):
+        shutil.rmtree(dirpath)
+    dirpath.mkdir(parents=True, exist_ok=True, mode=0o777)
+
+    print(tile_name)
+    tensorname = tile_name.split("_")[1]
+
+    coo = inputCache.load(tensor, False)
+    formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss01", hw=False)
+
+
+def write_datastructure_bench(args, tensor, out_path, tiles=None):
+    shifter = ScipyTensorShifter()
+
+    print("Writing " + args.name + " for test " + args.benchname + "...")
+
+    dirname = args.output_dir_path if args.output_dir_path is not None else os.path.join(out_path, args.name, args.benchname)
+    if tiles is not None:
+        dirname = os.path.join(dirname, tiles)
+    dirpath = Path(dirname)
+    if os.path.exists(dirpath):
+        shutil.rmtree(dirpath)
+    dirpath.mkdir(parents=True, exist_ok=True, mode=0o777)
+
+    if "mat_mattransmul" in args.benchname or "mat_residual" in args.benchname:
+        tensorname = "C"
+    else:
+        tensorname = "B"
+
+    coo = inputCache.load(tensor, False)
+    shape = coo.shape
+
+    # These benchmarks need format_str == "ss10"
+    if args.benchname not in ["matmul_kij", "matmul_kji", "matmul_jki", "mat_vecmul", "mat_vecmul_ji", "mat_mattransmul"]:
+        formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss01")
+
+    if "matmul_ijk" in args.benchname:
+        shifted = shifter.shiftLastMode(coo)
+
+        print("Writing " + args.name + " shifted and transposed...")
+        tensorname = "C"
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout_separate_sparse_only(trans_shifted, dirname, tensorname, format_str="ss10")
+
+    elif "matmul_jik" in args.benchname:
+        shifted = shifter.shiftLastMode(coo)
+
+        print("Writing " + args.name + " shifted and transposed...")
+        tensorname = "C"
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout_separate_sparse_only(trans_shifted, dirname, tensorname, format_str="ss10")
+    elif "matmul_ikj" in args.benchname:
+        shifted = shifter.shiftLastMode(coo)
+
+        print("Writing " + args.name + " shifted and transposed...")
+        tensorname = "C"
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout_separate_sparse_only(trans_shifted, dirname, tensorname, format_str="ss01")
+
+    elif "matmul_jki" in args.benchname:
+        formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+
+        shifted = shifter.shiftLastMode(coo)
+
+        print("Writing " + args.name + " shifted and transposed...")
+        tensorname = "C"
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout_separate_sparse_only(trans_shifted, dirname, tensorname, format_str="ss10")
+
+    elif "matmul_kij" in args.benchname:
+        formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+
+        shifted = shifter.shiftLastMode(coo)
+
+        print("Writing " + args.name + " shifted and transposed...")
+        tensorname = "C"
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout_separate_sparse_only(trans_shifted, dirname, tensorname, format_str="ss01")
+
+    elif "matmul_kji" in args.benchname:
+        formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+
+        shifted = shifter.shiftLastMode(coo)
+
+        print("Writing " + args.name + " shifted and transposed...")
+        tensorname = "C"
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout_separate_sparse_only(trans_shifted, dirname, tensorname, format_str="ss01")
+
+    elif "mat_elemadd3" in args.benchname:
+        print("Writing " + args.name + " shifted...")
+        tensorname = "C"
+        shifted = shifter.shiftLastMode(coo)
+        formatWriter.writeout_separate_sparse_only(shifted, dirname, tensorname, format_str="ss01")
+
+        print("Writing " + args.name + " shifted2...")
+        tensorname = "D"
+        shifted2 = shifter.shiftLastMode(shifted)
+        formatWriter.writeout_separate_sparse_only(shifted2, dirname, tensorname, format_str="ss01")
+
+    elif "mat_elemadd" in args.benchname or "mat_elemmul" in args.benchname:
+        print("Writing " + args.name + " shifted...")
+        tensorname = "C"
+        shifted = shifter.shiftLastMode(coo)
+        formatWriter.writeout_separate_sparse_only(shifted, dirname, tensorname, format_str="ss01")
+
+    elif "mat_mattransmul" in args.benchname:
+        formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+        if not args.no_gen_other:
+            tensorname = 'd'
+            vec = scipy.sparse.random(shape[0], 1, density=args.density, data_rvs=np.ones)
+            vec = vec.toarray().flatten()
+            formatWriter.writeout_separate_vec(vec, dirname, tensorname)
+
+            tensorname = 'f'
+            vec = scipy.sparse.random(shape[1], 1, density=args.density, data_rvs=np.ones)
+            vec = vec.toarray().flatten()
+            formatWriter.writeout_separate_vec(vec, dirname, tensorname)
+    elif "mat_vecmul" == args.benchname or "mat_vecmul_ji" in args.benchname:
+        formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
+        if not args.no_gen_other:
+            tensorname = 'c'
+            vec = scipy.sparse.random(shape[1], 1, density=args.density, data_rvs=np.ones)
+            vec = vec.toarray().flatten()
+            formatWriter.writeout_separate_vec(vec, dirname, tensorname)
+    elif "mat_vecmul_ij" in args.benchname:
+        pass
+    elif "mat_sddmm" in args.benchname:
+        pass
+    elif "mat_residual" in args.benchname:
+        if not args.no_gen_other:
+            tensorname = 'b'
+            vec = scipy.sparse.random(shape[0], 1, density=args.density, data_rvs=np.ones)
+            vec = vec.toarray().flatten()
+            formatWriter.writeout_separate_vec(vec, dirname, tensorname)
+
+            tensorname = 'd'
+            vec = scipy.sparse.random(shape[1], 1, density=args.density, data_rvs=np.ones)
+            vec = vec.toarray().flatten()
+            formatWriter.writeout_separate_vec(vec, dirname, tensorname)
+    elif "mat_identity" in args.benchname:
+        pass
+    else:
+        raise NotImplementedError
+
+
+parser = argparse.ArgumentParser(description="Process some suitesparse matrices into per-level datastructures")
+parser.add_argument('-n', '--name', metavar='ssname', type=str, action='store', help='tensor name to run format '
+                                                                                     'conversion on one SS tensor')
+parser.add_argument('-f', '--format', metavar='ssformat', type=str, action='store', help='The format that the tensor '
+                                                                                         'should be converted to')
+parser.add_argument('-comb', '--combined', action='store_true', default=False, help='Whether the formatted datastructures '
+                    'should be in separate files')
+parser.add_argument('-o', '--omit-dense', action='store_true', default=False, help='Do not create fully dense format')
+parser.add_argument('-cast', '--cast', action='store_true', default=False, help='Safe sparsity cast to int for values')
+parser.add_argument('-hw', '--hw', action='store_true', default=False,
+                    help='Only generate formats used for hardware testing (all sparse'
+                         'levels, concordant)')
+parser.add_argument('-b', '--benchname', type=str, default=None, help='test name to run format '
+                                                                      'conversion on')
+parser.add_argument('--input_path', type=str, default=None)
+parser.add_argument('--output_dir_path', type=str, default=None)
+parser.add_argument('--tiles', action='store_true')
+parser.add_argument('--no_gen_other', action='store_true', help="Whether this"
+                    "script should generate the randmo 'other' tensors")
+parser.add_argument('--seed', type=int, default=0, help='Random seed needed for gen_other')
+parser.add_argument('--density', type=int, default=0.25, help='If gen_other, used for density of "other" tensor')
+args = parser.parse_args()
+
+np.random.seed(args.seed)
+
+inputCache = InputCacheSuiteSparse()
+formatWriter = FormatWriter(args.cast)
+
+cwd = os.getcwd()
+if args.output_dir_path is None:
+    out_dirname = SUITESPARSE_FORMATTED_PATH
+else:
+    out_dirname = args.output_dir_path
+
+out_path = Path(out_dirname)
+out_path.mkdir(parents=True, exist_ok=True, mode=0o777)
+
+if args.name is None:
+    print("Please enter a matrix name")
+    exit()
+
+if args.input_path is None:
+    SS_PATH = os.getenv('SUITESPARSE_TENSOR_PATH', default=os.path.join(cwd, 'suitesparse'))
+
+else:
+    SS_PATH = args.input_path
+
+tensor = None
+mtx_files = None
+if args.tiles:
+    # get all mtx tile files from args.input_path
+    mtx_files = [os.path.join(args.input_path, fname) for fname in os.listdir(args.input_path) if fname.endswith(".mtx")]
+
+    tensor = [SuiteSparseTensor(mtx_file) for mtx_file in mtx_files]
+elif args.input_path is not None:
+    tensor = SuiteSparseTensor(args.input_path)
+else:
+    print(SS_PATH)
+    tensor = SuiteSparseTensor(SS_PATH)
+
+if args.format is not None:
+    assert args.format in formats
+    filename = os.path.join(out_path, args.name + "_" + args.format + ".txt")
+
+    coo = inputCache.load(tensor, False)
+    formatWriter.writeout(coo, args.format, filename)
+elif args.combined:
+    for format_str in formats:
+        filename = os.path.join(out_path, args.name + "_" + format_str + ".txt")
+        print("Writing " + args.name + " " + format_str + "...")
+
+        coo = inputCache.load(tensor, False)
+        formatWriter.writeout(coo, format_str, filename)
+
+        shifted_filename = os.path.join(out_path, args.name + "_shifted_" + format_str + ".txt")
+        shifted = ScipyTensorShifter().shiftLastMode(coo)
+        formatWriter.writeout(shifted, format_str, shifted_filename)
+
+        trans_filename = os.path.join(out_path, args.name + "_trans_shifted_" + format_str + ".txt")
+        trans_shifted = shifted.transpose()
+        formatWriter.writeout(trans_shifted, format_str, trans_filename)
+elif args.hw:
+    if args.tiles and tensor is not None:
+        for i, ten in enumerate(tensor):
+            tile_name = os.path.split(mtx_files[i])[1].split(".")[0]
+            write_datastructure_tiles(args, ten, out_path, tile_name)
+    else:
+        write_datastructure_bench(args, tensor, out_path)
+
+else:
+    print("Writing " + args.name + " original...")
+    dirname = os.path.join(out_path, args.name, "orig")
+    dirpath = Path(dirname)
+    dirpath.mkdir(parents=True, exist_ok=True, mode=0o777)
+    tensorname = "B"
+    coo = inputCache.load(tensor, False)
+    formatWriter.writeout_separate(coo, dirname, tensorname, omit_dense=args.omit_dense)
+
+    print("Writing " + args.name + " shifted...")
+    dirname = os.path.join(out_path, args.name, "shift")
+    dirpath = Path(dirname)
+    dirpath.mkdir(parents=True, exist_ok=True, mode=0o777)
+    tensorname = "C"
+    shifted = ScipyTensorShifter().shiftLastMode(coo)
+    formatWriter.writeout_separate(shifted, dirname, tensorname, omit_dense=args.omit_dense)
+
+    print("Writing " + args.name + " shifted and transposed...")
+    dirname = os.path.join(out_path, args.name, "shift-trans")
+    dirpath = Path(dirname)
+    dirpath.mkdir(parents=True, exist_ok=True, mode=0o777)
+    tensorname = "C"
+    trans_shifted = shifted.transpose()
+    formatWriter.writeout_separate(trans_shifted, dirname, tensorname, omit_dense=args.omit_dense)
diff --git a/scripts/formatting/datastructure_suitesparse.py b/scripts/formatting/datastructure_suitesparse.py
index 823e8e13..9a703034 100644
--- a/scripts/formatting/datastructure_suitesparse.py
+++ b/scripts/formatting/datastructure_suitesparse.py
@@ -28,7 +28,7 @@ def write_datastructure_tiles(args, tensor, out_path, tile_name):
     tensorname = tile_name.split("_")[1]
 
     coo = inputCache.load(tensor, False)
-    formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss01", hw=False)
+    formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss01", args.hw)
 
 
 def write_datastructure_bench(args, tensor, out_path, tiles=None):

From c3eb5382bc76d36d30b6d03ba884b77f3bb377cc Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Fri, 18 Aug 2023 18:14:44 -0700
Subject: [PATCH 28/39] current matrix tiling flow

---
 count_nnz_tiling.py                           |  42 ++
 find_max_tilesize.py                          |  75 +++
 sam/sim/src/tiling/tile.py                    |  36 +-
 .../formatting/datastructure_suitesparse.py   |   2 +
 scripts/formatting/datastructure_tns.py       |  10 +-
 scripts/generate_frostt_formats_onyx.sh       |  59 +++
 scripts/suitesparse_memory_model_runner.sh    |   2 +-
 scripts/tiling/generate_gold_mattransmul.py   | 174 +++++++
 scripts/tiling/prepare_files.sh               |   6 +-
 scripts/tiling/tile_ext.sh                    |   8 +-
 setup_tiling_mat.py                           | 175 ++++++-
 setup_tiling_tensors.py                       |  23 +
 tile_pairing.py                               | 437 ++++++++++++++++++
 13 files changed, 1020 insertions(+), 29 deletions(-)
 create mode 100644 count_nnz_tiling.py
 create mode 100644 find_max_tilesize.py
 create mode 100755 scripts/generate_frostt_formats_onyx.sh
 create mode 100644 scripts/tiling/generate_gold_mattransmul.py
 create mode 100644 setup_tiling_tensors.py
 create mode 100644 tile_pairing.py

diff --git a/count_nnz_tiling.py b/count_nnz_tiling.py
new file mode 100644
index 00000000..c98bdd3a
--- /dev/null
+++ b/count_nnz_tiling.py
@@ -0,0 +1,42 @@
+import glob
+def count_nonzeros(matrix_values_file):
+    with open(matrix_values_file, 'r') as values_file:
+        matrix_values = [float(val) for val in values_file.readlines()]
+
+    nonzeros = sum(1 for val in matrix_values if val != 0)
+
+    return nonzeros
+
+
+tile_dirs = glob.glob("SPARSE_TESTS/MAT_TMP_DIR/tile*")
+num_tiles = len(tile_dirs)
+print("there are ", num_tiles, "tiles")
+limit = 1000
+
+tot_num_nonzeros = 0
+for tile_num in range(0,num_tiles):
+    tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_C_mode_vals'
+
+    num_nonzeros = count_nonzeros(tensor_C_values_file)
+    if num_nonzeros >= limit:
+        print("error! too many nonzeros in tensorC, tile", tile_num)
+        # raise Exception 
+
+#     tot_num_nonzeros += num_nonzeros
+
+# average_num_nonzeros = tot_num_nonzeros / 9
+# print("for matrix C, the average number of non-zero values is", average_num_nonzeros)
+
+tot_num_nonzeros = 0
+
+for tile_num in range(0,num_tiles):
+    tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_B_mode_vals'
+
+    num_nonzeros = count_nonzeros(tensor_C_values_file)
+    if num_nonzeros >= limit:
+        print("error! too many nonzeros in tensorB, tile", tile_num)
+        # raise Exception
+#     tot_num_nonzeros += num_nonzeros
+
+# average_num_nonzeros = tot_num_nonzeros / 6
+# print("for matrix B, the average number of non-zero values is", average_num_nonzeros)
diff --git a/find_max_tilesize.py b/find_max_tilesize.py
new file mode 100644
index 00000000..8eb96038
--- /dev/null
+++ b/find_max_tilesize.py
@@ -0,0 +1,75 @@
+import os
+import sys
+import glob
+
+def write_to_line(file_path, line_number, new_content):
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+
+    if line_number > len(lines) or line_number < 1:
+        # Line number is out of range
+        return
+
+    lines[line_number - 1] = new_content + '\n'
+
+    with open(file_path, 'w') as file:
+        file.writelines(lines)
+
+def check_keyword_in_output(command, keyword):
+    # Run the command and redirect the output to a file
+    os.system(f'{command} > output.txt')
+
+    # Read the contents of the file
+    with open('output.txt', 'r') as file:
+        output = file.read()
+
+    # Check if the keyword is present in the output
+    if keyword in output:
+        # Optionally, you can delete the output file
+        os.remove('output.txt')
+        return True
+    else:
+        # Optionally, you can delete the output file
+        os.remove('output.txt')
+        return False
+
+
+tile_size = 450
+step = 10
+
+for _ in range(20):
+    print("********************")
+    print("tile size: ", tile_size)
+    print("step: ", step)
+
+    yaml_file = "sam/sim/src/tiling/memory_config_onyx.yaml"
+    mem_tile_line = f"Mem_tile_size: {tile_size}"
+    print(mem_tile_line)
+    write_to_line(yaml_file, 19, mem_tile_line)
+
+    run_setup_script = "python3 setup_tiling_mat.py > temp.txt"
+    os.system(run_setup_script)
+    print(run_setup_script)
+
+    run_tile_pairing = "python3 tile_pairing.py > temp.txt"
+    os.system(run_tile_pairing)
+    print(run_tile_pairing)
+
+    run_count = "python3 count_nnz_tiling.py"
+    print(run_count)
+
+    if (check_keyword_in_output(run_count, "error")) == False:
+        tile_size += step
+        step *= 2
+    else:
+        print("****************Tile broken!")
+        tile_size -= step
+        step //= 2
+
+    if step == 0:
+        if _ >= 15:
+            step = 10
+        else:
+            break
+    
+print("max tile size: ", tile_size)
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index ad430463..76dc1dde 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -26,9 +26,10 @@
             "matmul_ijk": "X(i,j)=B(i,k)*C(k,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemadd": "X(i,j)=B(i,j)+C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
             "mat_elemmul": "X(i,j)=B(i,j)*C(i,j) -f=X:ss -f=B:ss -f=C:ss:1,0  -s=reorder(i,j,k)",
-            "mat_mattransmul": "X(i,j)=C(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)",
-            "mat_vecmul_ij": "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)",
-            "tensor3_ttv": "X(i,j)=B(i,j,k)*c(k) -f=X:ss -f=B:sss -f=c:s"}
+            "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)",
+            "mat_vecmul_ij" : "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)",
+            "mat_residual": "X(i,j)=b(i)-C(i,j)*d(j) -f=X:ss -f=C:ss -f=b:ss:0 -f=d:ss:0  -s=reorder(i,j)",
+            "mat_sddmm": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:ss -f=D:ss -s=reorder(i,j,k)"}
 
 
 def print_dict(dd):
@@ -87,9 +88,11 @@ def parse_sam_input(string):
 
     str_arr = sam_str.split(" ")
     dictionary = parse_all(str_arr, has_quotes=False)
+    print("dictionary is: ", dictionary)
 
     # Assume there are no repeat tensors...
     tensors = dictionary["rhs_tensors"]
+    print("tensors are: ", tensors)
     permutations = [list(map(int, dictionary[tensor]["perm"])) for tensor in tensors]
     ivars = get_ivars(tensors, str_arr[0])
     ivars = [ivars[tensor] for tensor in tensors]
@@ -250,6 +253,7 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
 
             print("dim is ", dim)
             print("tensor_format[dim:dim+1] is ", tensor_format[dim:dim + 1])
+            print("tensor name is ", tensor_name)
             lvl_permutation = tensor_format[dim:dim + 1][0]
             ivar = ivar_strs[i][dim]
             ivar_map[lvl_permutation] = ivar
@@ -290,7 +294,15 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
         tensors.append(shifted)
 
     elif "mat_sddmm" in app_str:
-        pass
+        print("Writing other tensors, shifted...")
+        print("Writing shifted...")
+        shifted = ScipyTensorShifter().shiftLastMode(tensor)
+        tensors.append(shifted)
+
+        print("Writing  shifted2...")
+        shifted2 = ScipyTensorShifter().shiftLastMode(shifted)
+        tensors.append(shifted2)
+
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
         rows, cols = tensor.shape  # i,j
@@ -308,7 +320,18 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
         tensors.append(tensor_d)
 
     elif "mat_residual" in app_str:
-        pass
+        print("Writing other tensors...")
+        rows, cols = tensor.shape
+        tensor_b = scipy.sparse.random(rows, 1, data_rvs=np.ones).toarray().flatten()
+        tensor_d = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+
+        if other_nonempty:
+            tensor_b[0] = 1
+            tensor_d[0] = 1
+        
+        tensors.insert(0, tensor_b)
+        tensors.append(tensor_d)
+
     elif "mat_vecmul" in app_str:
         print("Writing other tensors...")
         rows, cols = tensor.shape
@@ -536,10 +559,11 @@ def cotile_multilevel_coo(app_str, hw_config_fname, tensors, output_dir_path, hi
                         print(tile.shape)
                         print(tile)
                         tns_dumper.dump(tile, mtx_path_name)
-
                     # FIXME: (owhsu) Why did avb03 add this in?
                     elif len(tile.shape) == 1:
+                        real_shape = tile.shape[0]
                         # print(np.array(tile.todense()).reshape(1,-1))
+                        # scipy.io.mmwrite(mtx_path_name, scipy.sparse.coo_matrix(tile.todense()).reshape((real_shape,1)))
                         scipy.io.mmwrite(mtx_path_name, scipy.sparse.coo_matrix(tile.todense()))
                     else:
                         # print(tile.todense())
diff --git a/scripts/formatting/datastructure_suitesparse.py b/scripts/formatting/datastructure_suitesparse.py
index 823e8e13..373bf81e 100644
--- a/scripts/formatting/datastructure_suitesparse.py
+++ b/scripts/formatting/datastructure_suitesparse.py
@@ -29,6 +29,8 @@ def write_datastructure_tiles(args, tensor, out_path, tile_name):
 
     coo = inputCache.load(tensor, False)
     formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss01", hw=False)
+    # formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss01", hw=args.hw)
+
 
 
 def write_datastructure_bench(args, tensor, out_path, tiles=None):
diff --git a/scripts/formatting/datastructure_tns.py b/scripts/formatting/datastructure_tns.py
index db9e1951..cb335f9b 100644
--- a/scripts/formatting/datastructure_tns.py
+++ b/scripts/formatting/datastructure_tns.py
@@ -90,7 +90,7 @@
             outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
             taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
-            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format)
             #Need this line? formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
             file_path_name = os.path.join(outdir_orig_name, "tensor_B_mode_shape")
             file1 = open(file_path_name, 'r')
@@ -122,7 +122,7 @@
             outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
             taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
-            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format)
             #Need this line? formatWriter.writeout_separate_sparse_only(coo, dirname, tensorname, format_str="ss10")
             file_path_name = os.path.join(outdir_orig_name, "tensor_B_mode_shape")
             file1 = open(file_path_name, 'r')
@@ -156,7 +156,7 @@
             outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
             taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
-            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+            parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format)
             
             file_path_name = os.path.join(outdir_orig_name, "tensor_B_mode_shape")
             file1 = open(file_path_name, 'r')
@@ -201,7 +201,7 @@
         outdir_orig_path.mkdir(parents=True, exist_ok=True)
 
         taco_format_orig_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_" + levels + '.txt'
-        parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format, hw_filename=args.hw)
+        parse_taco_format(taco_format_orig_filename, outdir_orig_name, 'B', args.format)
 
         # Shifted
         if args.shift:
@@ -210,4 +210,4 @@
             outdir_shift_path.mkdir(parents=True, exist_ok=True)
 
             taco_format_shift_filename = "/home/avb03/sam/FROST_FORMATTED_TACO/" + args.name + "_shift_" + levels + '.txt'
-            parse_taco_format(taco_format_shift_filename, outdir_shift_name, 'C', args.format, hw_filename=args.hw)
+            parse_taco_format(taco_format_shift_filename, outdir_shift_name, 'C', args.format)
diff --git a/scripts/generate_frostt_formats_onyx.sh b/scripts/generate_frostt_formats_onyx.sh
new file mode 100755
index 00000000..9a84bde2
--- /dev/null
+++ b/scripts/generate_frostt_formats_onyx.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+#SBATCH -N 1
+#SBATCH -t 360
+
+# ./scripts/generate_frostt_formats_onyx.sh <tensor_name.txt>
+
+FORMATS=(
+  sss012
+)
+
+BENCHMARKS=(
+  #using all tensor apps except elemmul here**
+  # tensor3_elemadd
+  # tensor3_innerprod
+  tensor3_ttv
+  # tensor3_elemmul
+  # tensor3_mttkrp
+  # tensor3_ttm
+  # using tensor3_ttm
+)
+
+# OTHERBENCHES='["tensor3_ttv"]'
+# export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
+# export FROSTT_PATH=/nobackup/owhsu/sparse-datasets/frostt/
+# export SUITESPARSE_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/suitesparse-formatted
+# export FROSTT_FORMATTED_TACO_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted/taco-tensor
+# export FROSTT_FORMATTED_PATH=/nobackup/owhsu/sparse-datasets/frostt-formatted
+
+export SUITESPARSE_PATH=/nobackup/owhsu/sparse-datasets/suitesparse/
+export FROSTT_PATH=/home/avb03/sparse-datasets/tensors
+export SUITESPARSE_FORMATTED_PATH=/home/avb03/sam/SUITESPARSE_FORMATTED
+export FROSTT_FORMATTED_TACO_PATH=/home/avb03/sam/FROST_FORMATTED_TACO
+export FROSTT_FORMATTED_PATH=/home/avb03/sam/FROST_FORMATTED
+export TACO_TENSOR_PATH=/home/avb03/sam/TACO_TENSOR
+
+basedir=$(pwd)
+
+for i in ${!FORMATS[@]}; do
+    format=${FORMATS[@]};
+    echo "Generating files for format $format..."
+    
+    $basedir/compiler/taco/build/bin/taco-test sam.pack_$format
+    $basedir/compiler/taco/build/bin/taco-test sam.pack_other_frostt
+    for b in ${!BENCHMARKS[@]}; do
+	    bench=${BENCHMARKS[$b]}
+    	while read line; do
+        
+        	name=$line 
+        	echo "Generating input format files for $name..."
+        	python3 $basedir/scripts/formatting/datastructure_tns.py -n $name -f $format -b $bench -hw
+        	python3 $basedir/scripts/formatting/datastructure_tns.py -n $name -f $format --other -b $bench -hw
+        	# if [[ $OTHERBENCHES =~ "$bench" ]]; then
+			    #   echo "Generating format of 'other' tensor"
+			    #   python3 $basedir/scripts/datastructure_tns_old.py -n $line -f ss01 --other -ss -b $bench -hw
+		      # fi
+          chmod -R 775 $FROSTT_FORMATTED_PATH
+    	done <$1
+    done
+done
diff --git a/scripts/suitesparse_memory_model_runner.sh b/scripts/suitesparse_memory_model_runner.sh
index d2873266..e715a508 100755
--- a/scripts/suitesparse_memory_model_runner.sh
+++ b/scripts/suitesparse_memory_model_runner.sh
@@ -31,7 +31,7 @@ mkdir -p $path
 mkdir -p $basedir/tiles/
 rm -rf $basedir/tiles/*
 
-./scripts/tiling/prepare_files.sh $fname.mtx $yaml_fname 
+./scripts/tiling/prepare_files.sh $fname.mtx $yaml_fname $fname
 
 cd $basedir/sam/sim
 # python3 -m pytest test/advanced-simulator/test_$bench.py --ssname $fname -s --check-gold --skip-empty --nbuffer --yaml_name=$yaml_fname  --benchmark-json=$path/mem_model_$fname.json 
diff --git a/scripts/tiling/generate_gold_mattransmul.py b/scripts/tiling/generate_gold_mattransmul.py
new file mode 100644
index 00000000..b044a949
--- /dev/null
+++ b/scripts/tiling/generate_gold_mattransmul.py
@@ -0,0 +1,174 @@
+import scipy
+import scipy.sparse
+import os
+import scipy.io
+import numpy as np
+import yaml
+import math
+import pickle
+import argparse
+
+from pathlib import Path
+from scripts.util.util import round_sparse
+
+def generate_gold_mattransmul_tiled(tile_crd_b, tile_crd_c, tile_crd_d, dirname, out_format="ss01"):
+    # CSR
+    formatted_dir = f"./tiles/mat_mattransmul/mtx"
+
+    B_dir = "tensor_B_tile_"
+    for a in tile_crd_b:
+        B_dir += str(a) + "_"
+    C_dir = "tensor_c_tile_"
+    for a in tile_crd_c:
+        C_dir += str(a) + "_"
+    d_dir = "tensor_d_tile_"
+    for a in tile_crd_d:
+        d_dir += str(a) + "_"
+
+    B_dir = B_dir[0:-1] + ".mtx"
+    C_dir = C_dir[0:-1] + ".mtx"
+    d_dir = d_dir[0:-1] + ".mtx"
+    # print(B_dir, " ", C_dir)
+    B_filename = os.path.join(formatted_dir, B_dir)
+    C_filename = os.path.join(formatted_dir, C_dir)
+    d_filename = os.path.join(formatted_dir, d_dir)
+    # print()
+    # print(B_filename)
+    # print(C_filename)
+    # print(d_filename)
+    # print()
+    if os.path.exists(B_filename) and os.path.exists(C_filename) and os.path.exists(d_filename):
+        B_scipy = scipy.io.mmread(B_filename)
+        itr = 0
+        # print("\nB_scipy: ", B_scipy)
+        for i, j, v in zip(B_scipy.row, B_scipy.col, B_scipy.data):
+            # print(B_scipy.data)
+            # print(i, " ", j, " ", v)
+            B_scipy.data[itr] = round_sparse(B_scipy.data[itr])
+            # if B_scipy.data[itr] < 1 and B_scipy.data[itr] > 0:
+            #    B_scipy.data[itr] = 1
+            # elif B_scipy.data[itr] < 0 and B_scipy.data[itr] > -1:
+            #    B_scipy.data[itr] = -1
+            # else:
+            #    B_scipy.data[itr] = int(B_scipy.data[itr])
+            itr += 1
+        B_scipy = B_scipy.tocsr()
+
+
+        C_scipy = scipy.io.mmread(C_filename)
+        # print(C_filename)
+        # print("\nC_scipy: ", C_scipy)
+        # print("___________________")
+        # print(B_scipy)
+        itr = 0
+        for i, j, v in zip(C_scipy.row, C_scipy.col, C_scipy.data):
+            C_scipy.data[itr] = round_sparse(C_scipy.data[itr])
+            itr += 1
+        C_scipy = C_scipy.tocsr()
+        C_scipy = np.transpose(C_scipy)
+
+        d_scipy = scipy.io.mmread(d_filename)
+        # print("\nd_scipy: ", d_scipy)
+
+        itr = 0
+        for i, j, v in zip(d_scipy.row, d_scipy.col, d_scipy.data):
+            d_scipy.data[itr] = round_sparse(d_scipy.data[itr])
+
+            itr += 1
+        d_scipy = d_scipy.tocsr()
+        d_scipy = np.transpose(d_scipy)
+
+        # gold_nd = (B_scipy @ C_scipy)
+        # gold_nd = B_scipy.dot(C_scipy)
+
+        #constants
+        alpha = 2
+        beta = 2
+
+        print("B_scipy.shape: ", B_scipy.shape)
+        print("C_scipy.shape: ", C_scipy.shape)
+        print("d_scipy.shape: ", d_scipy.shape)
+
+        gold_nd = alpha*(B_scipy @ C_scipy) + beta * d_scipy
+        # print(gold_nd)
+
+        gold_out = gold_nd.tocoo()
+        assert tile_crd_b[1] == tile_crd_c[0] and tile_crd_b[3] == tile_crd_c[1] and tile_crd_b[0] == tile_crd_d[0] and tile_crd_b[2] == tile_crd_d[1]
+        # assert tile_crd_b[1] == tile_crd_c[0] and tile_crd_b[3] == tile_crd_c[2]
+        scipy.io.mmwrite(
+            dirname + "out_" + str(tile_crd_b[0]) + "_" + str(tile_crd_b[1]) + "_" + str(tile_crd_b[3]) + "_" + str(tile_crd_b[2]) + "_" + str(
+                tile_crd_c[0]) + "_" + str(tile_crd_c[1]) + "_" + str(tile_crd_d[0]) + "_" + str(tile_crd_d[1]) + ".mtx", gold_out)
+    elif os.path.exists(d_filename):
+        d_scipy = scipy.io.mmread(d_filename)
+        # print("\nd_scipy: ", d_scipy)
+
+        itr = 0
+        for i, j, v in zip(d_scipy.row, d_scipy.col, d_scipy.data):
+            d_scipy.data[itr] = d_scipy.data[itr]
+
+            itr += 1
+        d_scipy = d_scipy.tocsr()
+        # d_scipy = np.transpose(d_scipy)
+
+        # gold_nd = (B_scipy @ C_scipy)
+        # gold_nd = B_scipy.dot(C_scipy)
+
+        #constants
+        alpha = 2
+        beta = 2
+
+        # print(d_scipy.todense())
+        gold_nd = beta * d_scipy
+        # print(gold_nd)
+        if(np.count_nonzero(gold_nd.todense()) == 0):
+            print("output is all zero")
+            return
+
+        gold_out = gold_nd.tocoo()
+        # assert tile_crd_b[1] == tile_crd_c[0] and tile_crd_b[3] == tile_crd_c[1] and tile_crd_b[0] == tile_crd_d[0] and tile_crd_b[2] == tile_crd_d[1]
+        # assert tile_crd_b[1] == tile_crd_c[0] and tile_crd_b[3] == tile_crd_c[2]
+        scipy.io.mmwrite(
+            dirname + "out_" + str(tile_crd_b[0]) + "_" + str(tile_crd_b[1]) + "_" + str(tile_crd_b[3]) + "_" + str(tile_crd_b[2]) + "_" + str(
+                tile_crd_c[0]) + "_" + str(tile_crd_c[1]) + "_" + str(tile_crd_d[0]) + "_" + str(tile_crd_d[1]) + ".mtx", gold_out)
+
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate tiled output gold")
+    parser.add_argument("--yaml_name", type=str, default="memory_config_real.yaml")
+    args = parser.parse_args()
+    outdir = f"./tiles/mat_mattransmul/output/"
+    outpath = Path(outdir)
+    outpath.mkdir(parents=True)
+
+    # generate_gold_matmul_tiled([0, 1, 2, 9], [1, 0, 9, 0], outdir)
+
+    # generate_gold_matmul_tiled([0, 1, 0, 7], [1, 0, 7, 0], outdir)
+    # quit()    with open("/nobackup/rsharma3/Sparsity/simulator/old_sam/sam/tiles/matmul_ikj/tensor_sizes", "rb") as ff:
+
+    with open(f"./tiles/mat_mattransmul/tensor_sizes", "rb") as ff:
+        sizes_dict_level_full = pickle.load(ff)
+
+    with open("./sam/sim/src/tiling/" + args.yaml_name, "r") as stream:
+        loop_config = yaml.safe_load(stream)
+
+    print()
+    print("sizes_dict_level_full", sizes_dict_level_full)
+    print()
+    print("loop_config", loop_config)
+
+    struct = {
+        "j00": 1 + int(sizes_dict_level_full["B"][0]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
+        "i00": 1 + int(sizes_dict_level_full["c"][0]) // (loop_config["Glb_tile_size"] * loop_config["Mem_tile_size"]),
+        "i0": loop_config["Glb_tile_size"], "j0": loop_config["Glb_tile_size"]}
+
+    print()
+    print(struct)
+
+    # print(struct)
+    # # quit()
+    for i00 in range(struct["i00"]):
+        for j00 in range(struct["j00"]):
+            for i0 in range(struct["i0"]):
+                for j0 in range(struct["j0"]):
+                    generate_gold_mattransmul_tiled([j00, i00, j0, i0], [i00, i0], [j00, j0], outdir)
diff --git a/scripts/tiling/prepare_files.sh b/scripts/tiling/prepare_files.sh
index 3d24d037..d255f317 100755
--- a/scripts/tiling/prepare_files.sh
+++ b/scripts/tiling/prepare_files.sh
@@ -6,10 +6,12 @@
 
 # ./scripts/tiling/prepare_files.sh extensor_<NNZ>_<DIM>.mtx
 
+appname=$3
+
 basedir=$(pwd)
 
 rm -rf $basedir/tiles/*
 
-./scripts/tiling/tile_ext.sh $1 memory_config_extensor_17M_llb.yaml
+./scripts/tiling/tile_ext.sh $1 memory_config_extensor_17M_llb.yaml $appname
 
-python3 scripts/tiling/generate_gold_matmul_tiled.py --yaml_name memory_config_extensor_17M_llb.yaml
+# python3 scripts/tiling/generate_gold_matmul_tiled.py --yaml_name memory_config_extensor_17M_llb.yaml
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 36ce32ee..5acaeab7 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -5,9 +5,13 @@
 
 BENCHMARKS=(
 #   matmul_ikj
-	mat_mattransmul
+	# mat_mattransmul
+	# mat_vecmul_ij
+	mat_residual
 )
 
+appname=$3
+
 sspath=$SUITESPARSE_PATH
 
 basedir=$(pwd)
@@ -26,7 +30,7 @@ for b in ${!BENCHMARKS[@]}; do
 
 	echo "Tiling mtx file"
 	# python $basedir/sam/sim/src/tiling/tile.py --extensor --input_path $tiles_path --cotile $bench --multilevel --hw_config $basedir/sam/sim/src/tiling/$2 
-	python3 ./sam/sim/src/tiling/tile.py --tensor_type ss --input_tensor rel5 --cotile mat_mattransmul --multilevel --hw_config ./sam/sim/src/tiling/memory_config_onyx.yaml --higher_order
+	python3 ./sam/sim/src/tiling/tile.py --tensor_type ss --input_tensor $appname --cotile $bench --multilevel --hw_config ./sam/sim/src/tiling/memory_config_onyx.yaml --higher_order
 
 	echo "Generating input format files for $tiles_path..."
 	python3 $basedir/scripts/formatting/datastructure_suitesparse.py -n temp -hw -b $bench --input $basedir/tiles/$bench/mtx/ --output_dir_path $basedir/tiles/$bench/formatted --tiles
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index e7195c99..bd12c015 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -2,16 +2,35 @@
 import glob
 import shutil
 import os
+import re
 
 from sam.util import SUITESPARSE_PATH
 
+## PARAMS ######################################################################
+
+# 'rel5', 'mk9-b1', 
 data = ['rel5']
 # app_name = "mat_elemadd"
-app_name = "mat_mattransmul"
+# app_name = "mat_elemmul"
+app_name = "mat_sddmm"
+# app_name = "matmul_ijk"
+# app_name = "mat_elemmul"
+# app_name = "mat_vecmul_ij"
+# app_name = "mat_residual"
 # data_file = open("scripts/tensor_names/suitesparse_valid_mid50.txt")
 # data_file_lines = data_file.readlines()
 # for line in data_file_lines:
 #    data.append(line[:-1])
+mode_to_exclude = 0
+addition_vector_name = "d" #mattransmul (d) and residual (b) only
+
+other_tensors = ["c"]
+samples_directory = f"samples/{app_name}"
+docker_path = f"avb03-sparse-tiling"
+use_dataset_files = False
+matrix_app=True
+
+###############################################################################
 
 os.environ["SUITESPARSE_PATH"] = "/nobackup/owhsu/sparse-datasets/suitesparse/"
 os.environ["FROSTT_PATH"] = "/nobackup/owhsu/sparse-datasets/frostt/"
@@ -20,17 +39,147 @@
 os.environ["FROSTT_FORMATTED_PATH"] = "/home/avb03/sam/FROST_FORMATTED"
 os.environ["TACO_TENSOR_PATH"] = "/home/avb03/sam/TACO_TENSOR"
 
-for datum in data:
-   mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]
-   os.makedirs("extensor_mtx", exist_ok=True)
-   shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
-   
-   command = f"./scripts/suitesparse_memory_model_runner.sh {datum} {app_name}"
-   os.system(command)
+def replace_ones_with_zeros(mtx_file):
+    with open(mtx_file, 'r') as file:
+        lines = file.readlines()
+
+    new_lines = []
+    for line in lines:
+        values = line.split()
+        if len(values) >= 3:
+            values[2] = '0'
+        new_lines.append(' '.join(values))
+
+    with open(mtx_file, 'w') as file:
+        file.writelines(new_lines)
+
+
+if(matrix_app):
+   for datum in data:
+      rmdir = f"rm -rf tiles/{app_name}"
+      os.system(rmdir)
+
+      mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]
+      os.makedirs("extensor_mtx", exist_ok=True)
+      shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
+      
+      command = f"./scripts/suitesparse_memory_model_runner.sh {datum} {app_name}"
+      os.system(command)
+
+      directories = glob.glob(f'tiles/{app_name}/formatted/tensor_[a-z]*')
+
+      #for vectors, do cleanup
+      for directory in directories:
+         print(directory)
+         match = re.search(r'tensor_([a-z])', directory)
+         if match:
+            lowercase_letter = match.group(1)
+         
+         crd_file = os.path.join(directory, f"{lowercase_letter}{mode_to_exclude}_crd.txt")
+         seg_file = os.path.join(directory, f"{lowercase_letter}{mode_to_exclude}_seg.txt")
+
+         # if os.path.exists(crd_file):
+         #   os.remove(crd_file)
+
+         # if os.path.exists(seg_file):
+         #    os.remove(seg_file)
+
+      samples_with_addition_vector = None
+      
+      # dense tile replacement for addition
+      if app_name == "mat_mattransmul" or app_name == "mat_residual":
+         # samples_with_addition_vector = glob.glob(f"{samples_directory}/*[{addition_vector_name}]*")
+         # samples_with_addition_vector = glob.glob(f"{samples_directory}/mtm_w_0_1/tensor_d_tile_0_0")
+         samples_with_addition_vector = glob.glob(f"{samples_directory}/mtm_w_0_1_BAK")
+
+
+         print(samples_with_addition_vector)
+         #fill in missing tiles with blanks
+         for sample in samples_with_addition_vector:
+            file_path = os.path.join(sample, f"{addition_vector_name}_vals.txt")
+
+            with open(file_path, "r") as file:
+               file_contents = file.read()
+            
+            file_contents = file_contents.replace("1", "0")
 
-   os.makedirs("tiles_compiled", exist_ok=True)
-   copy_rename = f"cp -r tiles/{app_name} tiles_compiled/{app_name}_{datum}"
-   os.system(copy_rename)
+            with open(file_path, "w") as file:
+               file.write(file_contents)
 
-   docker_copy_command = f"docker cp tiles_compiled/{app_name}_{datum} avb03-sparse-tiling:/aha/garnet/tiles_{app_name}_{datum}"
-   os.system(docker_copy_command) 
+         tile_range = [(0,i) for i in range(8)] + [(1,i) for i in range(4)]
+
+         for i,j in tile_range:
+            tile_dir = f"tiles/{app_name}/formatted/tensor_{addition_vector_name}_tile_{i}_{j}"
+
+            if not os.path.exists(tile_dir):
+               # replace_ones_with_zeros("samples/mat_mattransmul/tensor_d_dense_mtx.mtx")
+
+               # copy_over_to_mtx_dir = f"cp samples/mat_mattransmul/tensor_d_dense_gold_stash.mtx tiles/{app_name}/mtx/tensor_{addition_vector_name}_tile_{i}_{j}.mtx"
+               # os.system(copy_over_to_mtx_dir)
+
+               sample_tile_dir = samples_with_addition_vector[0]
+
+               if os.path.exists(sample_tile_dir):
+                  shutil.copytree(sample_tile_dir, tile_dir)   
+
+      if(use_dataset_files):
+         assert os.path.exists("SUITESPARSE_FORMATTED")
+
+         temp_name = app_name
+         if app_name == "mat_vecmul_ij":
+            temp_name = "mat_vecmul"
+
+         app_path_additional = f"SUITESPARSE_FORMATTED/{datum}/{temp_name}/"
+
+         for tens in other_tensors:
+            valid_dirs = glob.glob(f"tiles/{app_name}/formatted/tensor_{tens}*")
+            for d in valid_dirs:
+               remove_tens = f"rm {d}/*"
+               print(remove_tens)
+               os.system(remove_tens)
+
+               files_to_cp = glob.glob(f"{app_path_additional}tensor_{tens}*")
+
+               for file in files_to_cp:
+                  if "mode_0_crd" in file:
+                     copy_rename = f"cp {file} {d}/{tens}0_crd.txt"
+                     print(copy_rename)
+                     os.system(copy_rename)
+                  elif "mode_1_crd" in file:
+                     copy_rename = f"cp {file} {d}/{tens}1_crd.txt"
+                     print(copy_rename)
+                     os.system(copy_rename)
+                  elif "mode_0_seg" in file:
+                     copy_rename = f"cp {file} {d}/{tens}0_seg.txt"
+                     print(copy_rename)
+                     os.system(copy_rename)
+                  elif "mode_1_seg" in file:
+                     copy_rename = f"cp {file} {d}/{tens}1_seg.txt"
+                     print(copy_rename)
+                     os.system(copy_rename)
+                  elif "vals" in file:
+                     copy_rename = f"cp {file} {d}/{tens}_vals.txt"
+                     print(copy_rename)
+                     os.system(copy_rename)
+                  elif "shape" in file:
+                     copy_rename = f"cp {file} {d}/{tens}_shape.txt"
+                     print(copy_rename)
+                     os.system(copy_rename)
+
+
+      dump_gold_tiles = f"python3 scripts/tiling/generate_gold_mattransmul.py --yaml_name memory_config_extensor_17M_llb.yaml"
+      os.system(dump_gold_tiles)
+
+      # os.makedirs("tiles_compiled", exist_ok=True)
+      # copy_rename = f"cp -r tiles/{app_name} tiles_compiled/{app_name}_{datum}"
+      # print(copy_rename)
+      # os.system(copy_rename)
+
+      docker_clean = f"docker exec {docker_path} rm -r /aha/garnet/tiles_{app_name}_{datum}"
+      print(docker_clean)
+      os.system(docker_clean)
+
+      docker_copy_command = f"docker cp tiles {docker_path}:/aha/garnet/tiles_{app_name}_{datum}"
+      print(docker_copy_command)
+      os.system(docker_copy_command) 
+   
diff --git a/setup_tiling_tensors.py b/setup_tiling_tensors.py
new file mode 100644
index 00000000..15d73c98
--- /dev/null
+++ b/setup_tiling_tensors.py
@@ -0,0 +1,23 @@
+import numpy as np
+import os
+import glob
+import shutil
+from scripts.util.util import FormatWriter, InputCacheSuiteSparse
+
+#### PARAMS ####
+tile = True
+app_name = "tensor3_ttv"
+vector_names = ['c']
+##############
+
+tiled_tensors = glob.glob(f"tiles/{app_name}/mtx/*.tns")
+formatwriter = FormatWriter()
+inputCache = InputCacheSuiteSparse()
+
+for tensor in tiled_tensors:
+    if any(x in tensor for x in vector_names):
+        #vector
+        inputCache.load(tensor)
+        formatwriter.writeout_separate_sparse_only()
+    else:
+        print("regular 3d tensors can be packed and tiled")
\ No newline at end of file
diff --git a/tile_pairing.py b/tile_pairing.py
new file mode 100644
index 00000000..5e171464
--- /dev/null
+++ b/tile_pairing.py
@@ -0,0 +1,437 @@
+import shutil
+import glob
+import subprocess
+import os
+import json
+
+# test = "bcsstm26" 
+# test = "rel5"
+test = "qiulp"
+# test = "adder_dcop_30"
+# test = "n4c6-b1"
+# app_name = "mat_residual"
+# app_name = "matmul_ijk"
+# app_name = "matmul_ijk"
+# app_name = "mat_mattransmul"
+app_name = "mat_elemmul"
+const_val = 2 # only for mat_mattransmul
+
+
+tiles_accumulation = {}
+
+b_tensors = glob.glob(f"/home/avb03/sam/tiles/{app_name}/formatted/tensor_B*")
+c_tensors = glob.glob(f"/home/avb03/sam/tiles/{app_name}/formatted/tensor_C*")
+d_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_D*")
+
+print("b_tensors: ", b_tensors)
+print("c_tensors: ", c_tensors)
+print("d_tensors: ", d_tensors)
+
+# b_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/formatted/tensor_B*")
+# c_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/formatted/tensor_C*")
+
+b_vec_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_b*")
+c_vec_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_c*")
+d_vec_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_d*")
+
+d_loc_paired = []
+b_loc_paired = []
+
+if not os.path.exists("SPARSE_TESTS/MAT_TMP_DIR"):
+    os.makedirs("SPARSE_TESTS/MAT_TMP_DIR")
+
+os.system(f"rm -rf SPARSE_TESTS/{app_name}*")
+os.system(f"rm -rf SPARSE_TESTS/MAT_TMP_DIR/tile*")
+
+tile = 0
+
+os.chdir("SPARSE_TESTS")
+
+if app_name == "matmul_ijk":
+    for b in b_tensors:
+        for c in c_tensors:
+            tile_str = "tile" + str(tile)
+            b_loc = b[-7:]
+            c_loc = c[-7:]
+            b_loc = b_loc.split("_")
+            c_loc = c_loc.split("_")
+            if(b_loc[1] == c_loc[0] and b_loc[3] == c_loc[2]):
+                print(b, c)
+
+                if b_loc[2] not in tiles_accumulation:
+                    tiles_accumulation[b_loc[2]] = []
+
+                tiles_accumulation[b_loc[2]].append(tile_str)
+
+                if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                    os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                
+                shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_crd")
+                shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_seg")
+
+                shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_crd")
+                shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_seg")
+
+                shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_vals")
+
+                shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_shape")
+
+                shutil.copy(f"{c}/C0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                shutil.copy(f"{c}/C0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                shutil.copy(f"{c}/C1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                shutil.copy(f"{c}/C1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                shutil.copy(f"{c}/C_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                shutil.copy(f"{c}/C_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                tile = tile + 1
+elif app_name == "mat_elemadd" or app_name == "mat_elemmul":
+    for b in b_tensors:
+        for c in c_tensors:
+            tile_str = "tile" + str(tile)
+            b_loc = b[-7:]
+            c_loc = c[-7:]
+            b_loc = b_loc.split("_")
+            c_loc = c_loc.split("_")
+            if(b_loc == c_loc):
+                print(b, c)
+                if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                    os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_crd")
+                shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_seg")
+
+                shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_crd")
+                shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_seg")
+
+                shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_vals")
+
+                shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_shape")
+
+                shutil.copy(f"{c}/C0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                shutil.copy(f"{c}/C0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                shutil.copy(f"{c}/C1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                shutil.copy(f"{c}/C1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                shutil.copy(f"{c}/C_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                shutil.copy(f"{c}/C_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                # subprocess.call(["aha",
+                #     "regress",
+                #     "fast"],
+                #     text=True)
+
+                # shutil.copy("/aha/garnet/SPARSE_TESTS/GLB_DIR/matmul_ijk_combined_seed_tile1/output_gold.npy", "/aha/garnet/SPARSE_TESTS/GLB_DIR/matmul_ijk_combined_seed_tile1/bin")
+                # shutil.copytree("/aha/garnet/SPARSE_TESTS/GLB_DIR/matmul_ijk_combined_seed_tile1/bin", f"/aha/garnet/SPARSE_TESTS/{tile_str}")
+                tile = tile + 1
+                # print("we are on tile ", tile)
+elif app_name == "mat_mattransmul":
+    for b in b_tensors:
+        for c in c_vec_tensors:
+            for d in d_vec_tensors:
+                tile_str = "tile" + str(tile)
+                b_loc = b[-7:]
+                c_loc = c[-3:]
+                d_loc = d[-3:]
+
+                b_loc = b_loc.split("_")
+                c_loc = c_loc.split("_")
+                d_loc = d_loc.split("_")
+
+                if(b_loc[1] == c_loc[0] and b_loc[3] == c_loc[1] and b_loc[0] == d_loc[0] and b_loc[2] == d_loc[1]):
+                # if(b_loc[1] == d_loc[0] and b_loc[3] == d_loc[1] and b_loc[0] == c_loc[0] and b_loc[2] == c_loc[1]):
+                    d_loc_paired.append(d_loc)
+
+                    print(f"\n ----- TILE {tile} ----- \n")
+                    print("B is: ", b) #in build_tb, B == C, c == d, d == f. (#FIXME: change build_tb)
+                    print("C is: ", c)
+                    print("d is: ", d)
+                    print(f"\n ----- TILE {tile} ----- \n")
+                    if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                        os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                        
+                    shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                    shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                    shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                    shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                    shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                    shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                    shutil.copy(f"{c}/c1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_crd")
+                    shutil.copy(f"{c}/c1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_seg")
+
+                    shutil.copy(f"{c}/c0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_crd")
+                    shutil.copy(f"{c}/c0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_seg")
+
+                    shutil.copy(f"{d}/d1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_1_crd")
+                    shutil.copy(f"{d}/d1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_1_seg")
+
+                    shutil.copy(f"{d}/d0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_0_crd")
+                    shutil.copy(f"{d}/d0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_0_seg")
+
+                    shutil.copy(f"{c}/c_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_vals")
+                    shutil.copy(f"{c}/c_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_shape")
+
+                    shutil.copy(f"{d}/d_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_vals")
+                    shutil.copy(f"{d}/d_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_shape")
+
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_vals", 'w') as file:
+                        file.write(str(const_val))
+                    
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_e_mode_vals", 'w') as file:
+                        file.write(str(const_val))
+                    
+                    tile = tile + 1
+                elif d_loc not in d_loc_paired:
+                    # case: B and c tiles are zero but d is nonzero. We have all d tiles. Just take a B and c tile, copy it and make it zero.'
+                    d_loc_paired.append(d_loc)
+                    print(f"\n ----- TILE D-unpaired {tile} ----- \n")
+                    print("B (zero tile) is: ", b) #in build_tb, B == C, c == d, d == f. (#FIXME: change build_tb)
+                    print("C (zero tile) is: ", c)
+                    print("d is: ", d)
+                    print(f"\n ----- TILE D-unpaired {tile} ----- \n")
+
+                    if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                        os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                        
+                    shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                    shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                    shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                    shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                    shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                    # clear out C vals
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals", 'r+') as file:
+                        contents = file.read()
+                        contents = contents.replace(contents, str(0))
+                        file.seek(0)
+                        file.write(contents)
+
+                    shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                    shutil.copy(f"{c}/c1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_crd")
+                    shutil.copy(f"{c}/c1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_seg")
+
+                    shutil.copy(f"{c}/c0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_crd")
+                    shutil.copy(f"{c}/c0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_seg")
+
+                    shutil.copy(f"{d}/d1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_1_crd")
+                    shutil.copy(f"{d}/d1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_1_seg")
+
+                    shutil.copy(f"{d}/d0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_0_crd")
+                    shutil.copy(f"{d}/d0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_0_seg")
+
+                    shutil.copy(f"{c}/c_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_vals")
+
+                    # clear out d vals
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_vals", 'r+') as file:
+                        contents = file.read()
+                        contents = contents.replace(contents, str(0))
+                        file.seek(0)
+                        file.write(contents)
+
+                    shutil.copy(f"{c}/c_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_shape")
+
+                    shutil.copy(f"{d}/d_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_vals")
+                    shutil.copy(f"{d}/d_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_f_mode_shape")
+
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_vals", 'w') as file:
+                        file.write(str(const_val))
+                    
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_e_mode_vals", 'w') as file:
+                        file.write(str(const_val))
+
+                    tile = tile + 1
+    print("d_loc_paired: ", d_loc_paired)
+elif app_name == "mat_vecmul_ij":
+    for b in b_tensors:
+        for c in c_vec_tensors:
+            tile_str = "tile" + str(tile)
+            b_loc = b[-7:]
+            c_loc = c[-3:]
+
+            b_loc = b_loc.split("_")
+            c_loc = c_loc.split("_")
+
+            # if(b_loc[1] == c_loc[0] and b_loc[3] == c_loc[1] and b_loc[0] == d_loc[0] and b_loc[2] == d_loc[1]):
+            if(b_loc[1] == c_loc[0] and b_loc[3] == c_loc[1]):
+                print(b,c)
+                if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                    os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                    
+                shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_crd")
+                shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_seg")
+
+                shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_crd")
+                shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_seg")
+
+                shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_vals")
+
+                shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_shape")
+
+                # shutil.copy(f"{c}/c1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_c_mode_1_crd")
+                # shutil.copy(f"{c}/c1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_c_mode_1_seg")
+
+                shutil.copy(f"{c}/c0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_c_mode_0_crd")
+                shutil.copy(f"{c}/c0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_c_mode_0_seg")
+
+                shutil.copy(f"{c}/c_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_c_mode_vals")
+                shutil.copy(f"{c}/c_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_c_mode_shape")
+
+                tile = tile + 1
+elif app_name == "mat_residual":
+    for b in b_vec_tensors:
+        for c in c_tensors:
+            for d in d_vec_tensors:
+                tile_str = "tile" + str(tile)
+                b_loc = b[-3:]
+                c_loc = c[-7:]
+                d_loc = d[-3:]
+
+                b_loc = b_loc.split("_")
+                c_loc = c_loc.split("_")
+                d_loc = d_loc.split("_")
+
+                # if(b_loc[1] == c_loc[0] and b_loc[3] == c_loc[1] and b_loc[0] == d_loc[0] and b_loc[2] == d_loc[1]):
+                if(c_loc[0] == b_loc[0] and c_loc[2] == b_loc[1] and c_loc[1] == d_loc[0] and c_loc[3] == d_loc[1]):
+                    print(b, c, d)
+                    b_loc_paired.append(b_loc)
+                    
+                    if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                        os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                        
+                    shutil.copy(f"{c}/C0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                    shutil.copy(f"{c}/C0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                    shutil.copy(f"{c}/C1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                    shutil.copy(f"{c}/C1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                    shutil.copy(f"{c}/C_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                    shutil.copy(f"{c}/C_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                    shutil.copy(f"{b}/b1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_1_crd")
+                    shutil.copy(f"{b}/b1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_1_seg")
+
+                    shutil.copy(f"{b}/b0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_0_crd")
+                    shutil.copy(f"{b}/b0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_0_seg")
+
+                    shutil.copy(f"{d}/d1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_crd")
+                    shutil.copy(f"{d}/d1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_seg")
+
+                    shutil.copy(f"{d}/d0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_crd")
+                    shutil.copy(f"{d}/d0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_seg")
+
+                    shutil.copy(f"{b}/b_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_vals")
+                    shutil.copy(f"{b}/b_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_shape")
+
+                    shutil.copy(f"{d}/d_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_vals")
+                    shutil.copy(f"{d}/d_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_shape")
+
+                    tile = tile + 1
+                elif b_loc not in b_loc_paired:
+                    b_loc_paired.append(b_loc)
+                    
+                    if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                        os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                        
+                    shutil.copy(f"{c}/C0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                    shutil.copy(f"{c}/C0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                    shutil.copy(f"{c}/C1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                    shutil.copy(f"{c}/C1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                    shutil.copy(f"{c}/C_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                    # clear out C vals
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals", 'r+') as file:
+                        contents = file.read()
+                        contents = contents.replace(contents, str(0))
+                        file.seek(0)
+                        file.write(contents)
+
+                    shutil.copy(f"{c}/C_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                    shutil.copy(f"{b}/b1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_1_crd")
+                    shutil.copy(f"{b}/b1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_1_seg")
+
+                    shutil.copy(f"{b}/b0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_0_crd")
+                    shutil.copy(f"{b}/b0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_0_seg")
+
+                    shutil.copy(f"{d}/d1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_crd")
+                    shutil.copy(f"{d}/d1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_1_seg")
+
+                    shutil.copy(f"{d}/d0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_crd")
+                    shutil.copy(f"{d}/d0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_0_seg")
+
+                    shutil.copy(f"{b}/b_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_vals")
+                    shutil.copy(f"{b}/b_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_b_mode_shape")
+
+                    shutil.copy(f"{d}/d_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_vals")
+                    shutil.copy(f"{d}/d_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_shape")
+
+                    # clear out d vals
+                    with open(f"./MAT_TMP_DIR/{tile_str}/tensor_d_mode_vals", 'r+') as file:
+                        contents = file.read()
+                        contents = contents.replace(contents, str(0))
+                        file.seek(0)
+                        file.write(contents)
+
+                    tile = tile + 1
+
+elif app_name == "mat_sddmm":
+    for b in b_tensors:
+        for c in c_tensors:
+            for d in d_tensors:
+                tile_str = "tile" + str(tile)
+
+                b_loc = b[-7:]
+                c_loc = c[-7:]
+                d_loc = d[-7:]
+
+                b_loc = b_loc.split("_")
+                c_loc = c_loc.split("_")
+                d_loc = d_loc.split("_")
+
+                # first j, then i (k is a free coordinate)
+                if(b_loc[0] == d_loc[1] and b_loc[2] == d_loc[3] and b_loc[1] == c_loc[0] and b_loc[3] == c_loc[2]):
+                    print(b, c, d)
+                    if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                        os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                    
+                    shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_crd")
+                    shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_seg")
+
+                    shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_crd")
+                    shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_seg")
+
+                    shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_vals")
+
+                    shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_shape")
+
+                    shutil.copy(f"{c}/C0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                    shutil.copy(f"{c}/C0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                    shutil.copy(f"{c}/C1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                    shutil.copy(f"{c}/C1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                    shutil.copy(f"{c}/C_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                    shutil.copy(f"{c}/C_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                    tile = tile + 1
+
+print("tiles_accumulation: ", tiles_accumulation)
+
+with open("../tiles_accumulation.json", "w") as file:
+    json.dump(tiles_accumulation, file)
+
+print("there are ", tile_str, " tiles")
\ No newline at end of file

From 9c6443322baca43e466c68e1f186a1e397196171 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sun, 20 Aug 2023 10:33:22 -0700
Subject: [PATCH 29/39] updated matrix tiling flow

---
 sam/sim/src/tiling/tile.py | 2 +-
 scripts/tiling/tile_ext.sh | 4 ++--
 tile_pairing.py            | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 76dc1dde..637aa71d 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -29,7 +29,7 @@
             "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)",
             "mat_vecmul_ij" : "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)",
             "mat_residual": "X(i,j)=b(i)-C(i,j)*d(j) -f=X:ss -f=C:ss -f=b:ss:0 -f=d:ss:0  -s=reorder(i,j)",
-            "mat_sddmm": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:ss -f=D:ss -s=reorder(i,j,k)"}
+            "mat_sddmm": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:dd -f=D:dd:1,0 -s=reorder(i,j,k)"}
 
 
 def print_dict(dd):
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 5acaeab7..e43b9c87 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -6,8 +6,8 @@
 BENCHMARKS=(
 #   matmul_ikj
 	# mat_mattransmul
-	# mat_vecmul_ij
-	mat_residual
+	mat_sddmm
+	# mat_residual
 )
 
 appname=$3
diff --git a/tile_pairing.py b/tile_pairing.py
index 5e171464..e5e1dc5d 100644
--- a/tile_pairing.py
+++ b/tile_pairing.py
@@ -13,7 +13,7 @@
 # app_name = "matmul_ijk"
 # app_name = "matmul_ijk"
 # app_name = "mat_mattransmul"
-app_name = "mat_elemmul"
+app_name = "mat_vecmul_ij"
 const_val = 2 # only for mat_mattransmul
 
 
@@ -434,4 +434,4 @@
 with open("../tiles_accumulation.json", "w") as file:
     json.dump(tiles_accumulation, file)
 
-print("there are ", tile_str, " tiles")
\ No newline at end of file
+print("there are ", tile, " tiles")
\ No newline at end of file

From 5be8b8f8b3e25ba31a086f25a9e217d0ef532369 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sun, 20 Aug 2023 11:54:21 -0700
Subject: [PATCH 30/39] minor matrix tiling updates

---
 scripts/tiling/tile_ext.sh | 3 ++-
 setup_tiling_mat.py        | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index e43b9c87..e9d7b146 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -6,7 +6,8 @@
 BENCHMARKS=(
 #   matmul_ikj
 	# mat_mattransmul
-	mat_sddmm
+	# mat_sddmm
+	mat_vecmul_ij
 	# mat_residual
 )
 
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index bd12c015..e9739d24 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -9,13 +9,13 @@
 ## PARAMS ######################################################################
 
 # 'rel5', 'mk9-b1', 
-data = ['rel5']
+data = ['west2021']
 # app_name = "mat_elemadd"
 # app_name = "mat_elemmul"
-app_name = "mat_sddmm"
+# app_name = "mat_sddmm"
 # app_name = "matmul_ijk"
 # app_name = "mat_elemmul"
-# app_name = "mat_vecmul_ij"
+app_name = "mat_vecmul_ij"
 # app_name = "mat_residual"
 # data_file = open("scripts/tensor_names/suitesparse_valid_mid50.txt")
 # data_file_lines = data_file.readlines()

From 887b0a57db99fbfcd59aaafb8e0ce87d260e2683 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Mon, 21 Aug 2023 18:10:27 -0700
Subject: [PATCH 31/39] all matrix apps tiling flow

---
 find_max_tilesize.py       | 5 ++++-
 sam/sim/src/tiling/tile.py | 3 ++-
 scripts/tiling/tile_ext.sh | 1 +
 setup_tiling_mat.py        | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/find_max_tilesize.py b/find_max_tilesize.py
index 8eb96038..bab4549a 100644
--- a/find_max_tilesize.py
+++ b/find_max_tilesize.py
@@ -34,7 +34,7 @@ def check_keyword_in_output(command, keyword):
         return False
 
 
-tile_size = 450
+tile_size = 300
 step = 10
 
 for _ in range(20):
@@ -66,6 +66,9 @@ def check_keyword_in_output(command, keyword):
         tile_size -= step
         step //= 2
 
+    if tile_size == 450:
+        break
+
     if step == 0:
         if _ >= 15:
             step = 10
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 637aa71d..35822d5e 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -29,7 +29,8 @@
             "mat_mattransmul": "X(i,j)=B(j,i)*c(j)+d(i) -f=X:ss -f=B:ss -f=c:ss:0 -f=d:ss:0  -s=reorder(i,j)",
             "mat_vecmul_ij" : "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)",
             "mat_residual": "X(i,j)=b(i)-C(i,j)*d(j) -f=X:ss -f=C:ss -f=b:ss:0 -f=d:ss:0  -s=reorder(i,j)",
-            "mat_sddmm": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:dd -f=D:dd:1,0 -s=reorder(i,j,k)"}
+            "mat_sddmm": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:dd -f=D:dd:1,0 -s=reorder(i,j,k)",
+            "mat_elemadd3": "X(i,j)=B(i,j)+C(i,j)+D(i,j) -f=X:ss -f=B:ss -f=C:ss -f=D:ss"}
 
 
 def print_dict(dd):
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index e9d7b146..234e588f 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -7,6 +7,7 @@ BENCHMARKS=(
 #   matmul_ikj
 	# mat_mattransmul
 	# mat_sddmm
+	# mat_vecmul_ij
 	mat_vecmul_ij
 	# mat_residual
 )
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index e9739d24..6fa553dc 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -9,7 +9,7 @@
 ## PARAMS ######################################################################
 
 # 'rel5', 'mk9-b1', 
-data = ['west2021']
+data = ['adder_trans_02']
 # app_name = "mat_elemadd"
 # app_name = "mat_elemmul"
 # app_name = "mat_sddmm"

From 0f0b87efc95e80858ff850b447f4c5de2b4846c7 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Wed, 23 Aug 2023 20:27:32 -0700
Subject: [PATCH 32/39] loading in same vectors as CPU

---
 count_nnz_tiling.py        |  34 +++---
 sam/sim/src/tiling/tile.py |  17 ++-
 sam/util.py                |   7 ++
 scripts/tiling/tile_ext.sh |   4 +-
 setup_tiling_mat.py        | 244 +++++++++++++++++--------------------
 tile_pairing.py            |  70 ++++++++++-
 6 files changed, 217 insertions(+), 159 deletions(-)

diff --git a/count_nnz_tiling.py b/count_nnz_tiling.py
index c98bdd3a..c05b054b 100644
--- a/count_nnz_tiling.py
+++ b/count_nnz_tiling.py
@@ -10,33 +10,29 @@ def count_nonzeros(matrix_values_file):
 
 tile_dirs = glob.glob("SPARSE_TESTS/MAT_TMP_DIR/tile*")
 num_tiles = len(tile_dirs)
+limit = 900
 print("there are ", num_tiles, "tiles")
-limit = 1000
 
-tot_num_nonzeros = 0
+
 for tile_num in range(0,num_tiles):
+    tot_num_nonzeros = 0
+
     tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_C_mode_vals'
 
     num_nonzeros = count_nonzeros(tensor_C_values_file)
-    if num_nonzeros >= limit:
-        print("error! too many nonzeros in tensorC, tile", tile_num)
-        # raise Exception 
-
-#     tot_num_nonzeros += num_nonzeros
-
-# average_num_nonzeros = tot_num_nonzeros / 9
-# print("for matrix C, the average number of non-zero values is", average_num_nonzeros)
+    tot_num_nonzeros += num_nonzeros
 
-tot_num_nonzeros = 0
-
-for tile_num in range(0,num_tiles):
     tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_B_mode_vals'
 
     num_nonzeros = count_nonzeros(tensor_C_values_file)
-    if num_nonzeros >= limit:
-        print("error! too many nonzeros in tensorB, tile", tile_num)
-        # raise Exception
-#     tot_num_nonzeros += num_nonzeros
+    tot_num_nonzeros += num_nonzeros
 
-# average_num_nonzeros = tot_num_nonzeros / 6
-# print("for matrix B, the average number of non-zero values is", average_num_nonzeros)
+    tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_D_mode_vals'
+
+    num_nonzeros = count_nonzeros(tensor_C_values_file)
+    tot_num_nonzeros += tot_num_nonzeros
+    
+    if tot_num_nonzeros >= limit:
+        print("tot_num_nonzeros: ", tot_num_nonzeros)
+        print("error! too many nonzeros in matrices")
+        raise Exception
\ No newline at end of file
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index 35822d5e..bf71a7bb 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -14,7 +14,7 @@
 from pathlib import Path
 
 from sam.util import SUITESPARSE_PATH, SuiteSparseTensor, InputCacheSuiteSparse, ScipyTensorShifter, \
-    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor
+    FROSTT_PATH, FrosttTensor, PydataSparseTensorDumper, InputCacheTensor, constructOtherMatKey, constructOtherVecKey
 from sam.sim.src.tiling.process_expr import parse_all
 
 # FIXME: This should not be here... Set your SAM_HOME directory
@@ -274,7 +274,6 @@ def cotile_coo(tensor_names, tensors, permutation_strs, ivar_strs, split_map, hi
 def get_other_tensors(app_str, tensor, other_nonempty=True):
     tensors = [tensor]
 
-
     if "matmul" in app_str:
         print("Writing shifted...")
         shifted = ScipyTensorShifter().shiftLastMode(tensor)
@@ -335,8 +334,18 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
 
     elif "mat_vecmul" in app_str:
         print("Writing other tensors...")
-        rows, cols = tensor.shape
-        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+        tensorName = args.input_tensor
+        # c(j) use mode1
+        variant = "mode1"
+        path = constructOtherVecKey(tensorName,variant)
+        tensor_c_from_path = FrosttTensor(path)
+        tensor_c = tensor_c_from_path.load().todense()
+
+        print("TENSOR SHAPE: ", tensor.shape)
+        print("TENSOR_C SHAPE: ", tensor_c.shape)
+ 
+        # rows, cols = tensor.shape
+        # tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
 
         if other_nonempty:
             tensor_c[0] = 1
diff --git a/sam/util.py b/sam/util.py
index fbe308c9..85ab59a3 100644
--- a/sam/util.py
+++ b/sam/util.py
@@ -35,6 +35,13 @@ def safeCastScipyTensorToInts(tensor):
         data[i] = round_sparse(tensor.data[i])
     return scipy.sparse.coo_matrix(tensor.coords, data, tensor.shape)
 
+def constructOtherVecKey(tensorName, variant, sparsity=0.001):
+    path = os.getenv('TACO_TENSOR_PATH')
+    return f"{path}/{tensorName}-vec_{variant}-{sparsity}.tns"
+
+def constructOtherMatKey(tensorName, variant, sparsity=0.001):
+    path = os.getenv('TACO_TENSOR_PATH')
+    return f"{path}/../suitesparse/{tensorName}_{variant}.mtx"
 
 # ScipyTensorShifter shifts all elements in the last mode
 # of the input scipy/sparse tensor by one.
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 234e588f..e0dba099 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -8,8 +8,10 @@ BENCHMARKS=(
 	# mat_mattransmul
 	# mat_sddmm
 	# mat_vecmul_ij
-	mat_vecmul_ij
+	# mat_vecmul_ij
 	# mat_residual
+	mat_elemadd3
+	# matmul_ijk
 )
 
 appname=$3
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 6fa553dc..c68b3fb3 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -8,19 +8,31 @@
 
 ## PARAMS ######################################################################
 
-# 'rel5', 'mk9-b1', 
+# data = ['rajat12']
+
 data = ['adder_trans_02']
+tilesizes = [30]
 # app_name = "mat_elemadd"
 # app_name = "mat_elemmul"
 # app_name = "mat_sddmm"
-# app_name = "matmul_ijk"
+# app_name = "matmul_ijk"           
+app_name = "mat_elemadd3"
 # app_name = "mat_elemmul"
-app_name = "mat_vecmul_ij"
 # app_name = "mat_residual"
-# data_file = open("scripts/tensor_names/suitesparse_valid_mid50.txt")
+
+# data = []
+# data_file = open("onyx_final_eval_mid50_tensor_names.txt")
 # data_file_lines = data_file.readlines()
 # for line in data_file_lines:
 #    data.append(line[:-1])
+
+# with open('matmul_tilesize_list.txt', 'r') as file:
+#     lines = file.readlines()
+
+# tilesizes = [int(line.strip()) for line in lines]
+print("TILESIZES: ", tilesizes)
+print("DATA: ", data)
+
 mode_to_exclude = 0
 addition_vector_name = "d" #mattransmul (d) and residual (b) only
 
@@ -28,16 +40,21 @@
 samples_directory = f"samples/{app_name}"
 docker_path = f"avb03-sparse-tiling"
 use_dataset_files = False
-matrix_app=True
 
 ###############################################################################
 
-os.environ["SUITESPARSE_PATH"] = "/nobackup/owhsu/sparse-datasets/suitesparse/"
-os.environ["FROSTT_PATH"] = "/nobackup/owhsu/sparse-datasets/frostt/"
-os.environ["SUITESPARSE_FORMATTED_PATH"] = "/home/avb03/sam/SUITESPARSE_FORMATTED"
-os.environ["FROSTT_FORMATTED_TACO_PATH"] = "/home/avb03/sam/FROST_FORMATTED_TACO"
-os.environ["FROSTT_FORMATTED_PATH"] = "/home/avb03/sam/FROST_FORMATTED"
-os.environ["TACO_TENSOR_PATH"] = "/home/avb03/sam/TACO_TENSOR"
+def write_to_line(file_path, line_number, new_content):
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+
+    if line_number > len(lines) or line_number < 1:
+        # Line number is out of range
+        return
+
+    lines[line_number - 1] = new_content + '\n'
+
+    with open(file_path, 'w') as file:
+        file.writelines(lines)
 
 def replace_ones_with_zeros(mtx_file):
     with open(mtx_file, 'r') as file:
@@ -54,132 +71,95 @@ def replace_ones_with_zeros(mtx_file):
         file.writelines(new_lines)
 
 
-if(matrix_app):
-   for datum in data:
-      rmdir = f"rm -rf tiles/{app_name}"
-      os.system(rmdir)
+i = 0
+for datum in data:
+   tilesize = tilesizes[i]
+   
+   yaml_file = "sam/sim/src/tiling/memory_config_onyx.yaml"
+   mem_tile_line = f"Mem_tile_size: {tilesize}"
+   print(mem_tile_line)
+   write_to_line(yaml_file, 19, mem_tile_line)
+
+   rmdir = f"rm -rf tiles/{app_name}"
+   os.system(rmdir)
 
-      mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]
-      os.makedirs("extensor_mtx", exist_ok=True)
-      shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
+   mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]
+   os.makedirs("extensor_mtx", exist_ok=True)
+   shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")
+   
+   command = f"./scripts/suitesparse_memory_model_runner.sh {datum} {app_name}"
+   os.system(command)
+
+   directories = glob.glob(f'tiles/{app_name}/formatted/tensor_[a-z]*')
+
+   #for vectors, do cleanup
+   for directory in directories:
+      print(directory)
+      match = re.search(r'tensor_([a-z])', directory)
+      if match:
+         lowercase_letter = match.group(1)
       
-      command = f"./scripts/suitesparse_memory_model_runner.sh {datum} {app_name}"
-      os.system(command)
+      crd_file = os.path.join(directory, f"{lowercase_letter}{mode_to_exclude}_crd.txt")
+      seg_file = os.path.join(directory, f"{lowercase_letter}{mode_to_exclude}_seg.txt")
+
+      # if os.path.exists(crd_file):
+      #   os.remove(crd_file)
 
-      directories = glob.glob(f'tiles/{app_name}/formatted/tensor_[a-z]*')
+      # if os.path.exists(seg_file):
+      #    os.remove(seg_file)
 
-      #for vectors, do cleanup
-      for directory in directories:
-         print(directory)
-         match = re.search(r'tensor_([a-z])', directory)
-         if match:
-            lowercase_letter = match.group(1)
+   samples_with_addition_vector = None
+   
+   # dense tile replacement for addition
+   if app_name == "mat_mattransmul" or app_name == "mat_residual":
+      # samples_with_addition_vector = glob.glob(f"{samples_directory}/*[{addition_vector_name}]*")
+      # samples_with_addition_vector = glob.glob(f"{samples_directory}/mtm_w_0_1/tensor_d_tile_0_0")
+      samples_with_addition_vector = glob.glob(f"{samples_directory}/mtm_w_0_1_BAK")
+
+
+      print(samples_with_addition_vector)
+      #fill in missing tiles with blanks
+      for sample in samples_with_addition_vector:
+         file_path = os.path.join(sample, f"{addition_vector_name}_vals.txt")
+
+         with open(file_path, "r") as file:
+            file_contents = file.read()
          
-         crd_file = os.path.join(directory, f"{lowercase_letter}{mode_to_exclude}_crd.txt")
-         seg_file = os.path.join(directory, f"{lowercase_letter}{mode_to_exclude}_seg.txt")
+         file_contents = file_contents.replace("1", "0")
 
-         # if os.path.exists(crd_file):
-         #   os.remove(crd_file)
+         with open(file_path, "w") as file:
+            file.write(file_contents)
 
-         # if os.path.exists(seg_file):
-         #    os.remove(seg_file)
+      tile_range = [(0,i) for i in range(8)] + [(1,i) for i in range(4)]
 
-      samples_with_addition_vector = None
-      
-      # dense tile replacement for addition
-      if app_name == "mat_mattransmul" or app_name == "mat_residual":
-         # samples_with_addition_vector = glob.glob(f"{samples_directory}/*[{addition_vector_name}]*")
-         # samples_with_addition_vector = glob.glob(f"{samples_directory}/mtm_w_0_1/tensor_d_tile_0_0")
-         samples_with_addition_vector = glob.glob(f"{samples_directory}/mtm_w_0_1_BAK")
-
-
-         print(samples_with_addition_vector)
-         #fill in missing tiles with blanks
-         for sample in samples_with_addition_vector:
-            file_path = os.path.join(sample, f"{addition_vector_name}_vals.txt")
-
-            with open(file_path, "r") as file:
-               file_contents = file.read()
-            
-            file_contents = file_contents.replace("1", "0")
-
-            with open(file_path, "w") as file:
-               file.write(file_contents)
-
-         tile_range = [(0,i) for i in range(8)] + [(1,i) for i in range(4)]
-
-         for i,j in tile_range:
-            tile_dir = f"tiles/{app_name}/formatted/tensor_{addition_vector_name}_tile_{i}_{j}"
-
-            if not os.path.exists(tile_dir):
-               # replace_ones_with_zeros("samples/mat_mattransmul/tensor_d_dense_mtx.mtx")
-
-               # copy_over_to_mtx_dir = f"cp samples/mat_mattransmul/tensor_d_dense_gold_stash.mtx tiles/{app_name}/mtx/tensor_{addition_vector_name}_tile_{i}_{j}.mtx"
-               # os.system(copy_over_to_mtx_dir)
-
-               sample_tile_dir = samples_with_addition_vector[0]
-
-               if os.path.exists(sample_tile_dir):
-                  shutil.copytree(sample_tile_dir, tile_dir)   
-
-      if(use_dataset_files):
-         assert os.path.exists("SUITESPARSE_FORMATTED")
-
-         temp_name = app_name
-         if app_name == "mat_vecmul_ij":
-            temp_name = "mat_vecmul"
-
-         app_path_additional = f"SUITESPARSE_FORMATTED/{datum}/{temp_name}/"
-
-         for tens in other_tensors:
-            valid_dirs = glob.glob(f"tiles/{app_name}/formatted/tensor_{tens}*")
-            for d in valid_dirs:
-               remove_tens = f"rm {d}/*"
-               print(remove_tens)
-               os.system(remove_tens)
-
-               files_to_cp = glob.glob(f"{app_path_additional}tensor_{tens}*")
-
-               for file in files_to_cp:
-                  if "mode_0_crd" in file:
-                     copy_rename = f"cp {file} {d}/{tens}0_crd.txt"
-                     print(copy_rename)
-                     os.system(copy_rename)
-                  elif "mode_1_crd" in file:
-                     copy_rename = f"cp {file} {d}/{tens}1_crd.txt"
-                     print(copy_rename)
-                     os.system(copy_rename)
-                  elif "mode_0_seg" in file:
-                     copy_rename = f"cp {file} {d}/{tens}0_seg.txt"
-                     print(copy_rename)
-                     os.system(copy_rename)
-                  elif "mode_1_seg" in file:
-                     copy_rename = f"cp {file} {d}/{tens}1_seg.txt"
-                     print(copy_rename)
-                     os.system(copy_rename)
-                  elif "vals" in file:
-                     copy_rename = f"cp {file} {d}/{tens}_vals.txt"
-                     print(copy_rename)
-                     os.system(copy_rename)
-                  elif "shape" in file:
-                     copy_rename = f"cp {file} {d}/{tens}_shape.txt"
-                     print(copy_rename)
-                     os.system(copy_rename)
-
-
-      dump_gold_tiles = f"python3 scripts/tiling/generate_gold_mattransmul.py --yaml_name memory_config_extensor_17M_llb.yaml"
-      os.system(dump_gold_tiles)
-
-      # os.makedirs("tiles_compiled", exist_ok=True)
-      # copy_rename = f"cp -r tiles/{app_name} tiles_compiled/{app_name}_{datum}"
-      # print(copy_rename)
-      # os.system(copy_rename)
-
-      docker_clean = f"docker exec {docker_path} rm -r /aha/garnet/tiles_{app_name}_{datum}"
-      print(docker_clean)
-      os.system(docker_clean)
-
-      docker_copy_command = f"docker cp tiles {docker_path}:/aha/garnet/tiles_{app_name}_{datum}"
-      print(docker_copy_command)
-      os.system(docker_copy_command) 
-   
+      for i,j in tile_range:
+         tile_dir = f"tiles/{app_name}/formatted/tensor_{addition_vector_name}_tile_{i}_{j}"
+
+         if not os.path.exists(tile_dir):
+            # replace_ones_with_zeros("samples/mat_mattransmul/tensor_d_dense_mtx.mtx")
+
+            # copy_over_to_mtx_dir = f"cp samples/mat_mattransmul/tensor_d_dense_gold_stash.mtx tiles/{app_name}/mtx/tensor_{addition_vector_name}_tile_{i}_{j}.mtx"
+            # os.system(copy_over_to_mtx_dir)
+
+            sample_tile_dir = samples_with_addition_vector[0]
+
+            if os.path.exists(sample_tile_dir):
+               shutil.copytree(sample_tile_dir, tile_dir)   
+
+   dump_gold_tiles = f"python3 scripts/tiling/generate_gold_mattransmul.py --yaml_name memory_config_extensor_17M_llb.yaml"
+   os.system(dump_gold_tiles)
+
+   # os.makedirs("tiles_compiled", exist_ok=True)
+   # copy_rename = f"cp -r tiles/{app_name} tiles_compiled/{app_name}_{datum}"
+   # print(copy_rename)
+   # os.system(copy_rename)
+
+   docker_clean = f"docker exec {docker_path} rm -r /aha/garnet/tiles_{app_name}_{datum}"
+   print(docker_clean)
+   os.system(docker_clean)
+
+   docker_copy_command = f"docker cp tiles {docker_path}:/aha/garnet/tiles_{app_name}_{datum}"
+   print(docker_copy_command)
+   os.system(docker_copy_command) 
+
+   i = i+1
diff --git a/tile_pairing.py b/tile_pairing.py
index e5e1dc5d..638df2da 100644
--- a/tile_pairing.py
+++ b/tile_pairing.py
@@ -13,7 +13,7 @@
 # app_name = "matmul_ijk"
 # app_name = "matmul_ijk"
 # app_name = "mat_mattransmul"
-app_name = "mat_vecmul_ij"
+app_name = "mat_elemadd3"
 const_val = 2 # only for mat_mattransmul
 
 
@@ -21,7 +21,7 @@
 
 b_tensors = glob.glob(f"/home/avb03/sam/tiles/{app_name}/formatted/tensor_B*")
 c_tensors = glob.glob(f"/home/avb03/sam/tiles/{app_name}/formatted/tensor_C*")
-d_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_D*")
+d_tensors = glob.glob(f"/home/avb03/sam/tiles/{app_name}/formatted/tensor_D*")
 
 print("b_tensors: ", b_tensors)
 print("c_tensors: ", c_tensors)
@@ -31,7 +31,8 @@
 # c_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/formatted/tensor_C*")
 
 b_vec_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_b*")
-c_vec_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_c*")
+c_vec_tensors = glob.glob(f"/home/avb03/sam/tiles/{app_name}/formatted/tensor_c*")
+print("c_vec_tensors: ", c_vec_tensors)
 d_vec_tensors = glob.glob(f"/aha/garnet/tiles_{app_name}_{test}/{app_name}/formatted/tensor_d*")
 
 d_loc_paired = []
@@ -429,6 +430,69 @@
 
                     tile = tile + 1
 
+elif app_name == "mat_elemadd3":
+    for b in b_tensors:
+        for c in c_tensors:
+            for d in d_tensors:
+                tile_str = "tile" + str(tile)
+                b_loc = b[-7:]
+                c_loc = c[-7:]
+                b_loc = b_loc.split("_")
+                c_loc = c_loc.split("_")
+                d_loc = d[-7:]
+                d_loc = d_loc.split("_")
+
+                # if(b_loc == c_loc and b_loc != d_loc):
+                #     b_equal_c_no_d += 1
+                # if(c_loc == d_loc and b_loc != c_loc):
+                #     c_equal_d_no_b += 1
+                # if(b_loc == d_loc and b_loc != c_loc):
+                #     b_equal_d_no_c += 1
+
+                if(b_loc == c_loc and b_loc == d_loc):
+                    print(b, c, d)
+                    if not os.path.exists(f"./MAT_TMP_DIR/{tile_str}"):
+                        os.mkdir(f"./MAT_TMP_DIR/{tile_str}")
+                    shutil.copy(f"{b}/B0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_crd")
+                    shutil.copy(f"{b}/B0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_0_seg")
+
+                    shutil.copy(f"{b}/B1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_crd")
+                    shutil.copy(f"{b}/B1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_1_seg")
+
+                    shutil.copy(f"{b}/B_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_vals")
+
+                    shutil.copy(f"{b}/B_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_B_mode_shape")
+
+                    shutil.copy(f"{c}/C0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_crd")
+                    shutil.copy(f"{c}/C0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_0_seg")
+
+                    shutil.copy(f"{c}/C1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_crd")
+                    shutil.copy(f"{c}/C1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_1_seg")
+
+                    shutil.copy(f"{c}/C_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_vals")
+
+                    shutil.copy(f"{c}/C_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_C_mode_shape")
+
+                    shutil.copy(f"{d}/D0_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_D_mode_0_crd")
+                    shutil.copy(f"{d}/D0_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_D_mode_0_seg")
+
+                    shutil.copy(f"{d}/D1_crd.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_D_mode_1_crd")
+                    shutil.copy(f"{d}/D1_seg.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_D_mode_1_seg")
+
+                    shutil.copy(f"{d}/D_vals.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_D_mode_vals")
+
+                    shutil.copy(f"{d}/D_shape.txt", f"./MAT_TMP_DIR/{tile_str}/tensor_D_mode_shape")
+
+                    # subprocess.call(["aha",
+                    #     "regress",
+                    #     "fast"],
+                    #     text=True)
+
+                    # shutil.copy("/aha/garnet/SPARSE_TESTS/GLB_DIR/matmul_ijk_combined_seed_tile1/output_gold.npy", "/aha/garnet/SPARSE_TESTS/GLB_DIR/matmul_ijk_combined_seed_tile1/bin")
+                    # shutil.copytree("/aha/garnet/SPARSE_TESTS/GLB_DIR/matmul_ijk_combined_seed_tile1/bin", f"/aha/garnet/SPARSE_TESTS/{tile_str}")
+                    tile = tile + 1
+                    # print("we are on tile ", tile)
+
 print("tiles_accumulation: ", tiles_accumulation)
 
 with open("../tiles_accumulation.json", "w") as file:

From 60f4ad7bb7c26ea490e71ecfd634621f4fb6e655 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Fri, 1 Sep 2023 20:07:07 -0700
Subject: [PATCH 33/39] input arg setup

---
 setup_tiling_mat.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index c68b3fb3..0ae6cde8 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -3,6 +3,7 @@
 import shutil
 import os
 import re
+import sys
 
 from sam.util import SUITESPARSE_PATH
 
@@ -10,13 +11,13 @@
 
 # data = ['rajat12']
 
-data = ['adder_trans_02']
-tilesizes = [30]
+data = [sys.argv[2]]
+tilesizes = [int(sys.argv[3])]
 # app_name = "mat_elemadd"
 # app_name = "mat_elemmul"
 # app_name = "mat_sddmm"
 # app_name = "matmul_ijk"           
-app_name = "mat_elemadd3"
+app_name = sys.argv[1]
 # app_name = "mat_elemmul"
 # app_name = "mat_residual"
 

From 19080a3f2e9a1c6b8a42d34ce4537889089c39e1 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sat, 2 Sep 2023 13:29:22 -0700
Subject: [PATCH 34/39] latest update tiling

---
 maximum_tiling.py          | 105 +++++++++++++++++++++++++++++++------
 scripts/tiling/tile_ext.sh |   5 +-
 setup_tiling_mat.py        |   4 +-
 3 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/maximum_tiling.py b/maximum_tiling.py
index 3be52642..317ea607 100644
--- a/maximum_tiling.py
+++ b/maximum_tiling.py
@@ -41,6 +41,40 @@ def pair_tiles(app_name):
                 if (b_loc[1] == c_loc[0] and b_loc[3] == c_loc[2]):
                     tile_pairing[tile] = [b, c]
                     tile += 1
+    elif "elemmul" in app_name:
+        operands = ["B", "C"]
+        operand_files = get_files_from_dir(path, operands)
+        b_tensors = operand_files["B"]
+        c_tensors = operand_files["C"]
+
+        tile = 0
+        for b in b_tensors:
+            for c in c_tensors:
+                b_loc = get_tile_id(b)
+                c_loc = get_tile_id(c)
+                if (b_loc == c_loc):
+                    tile_pairing[tile] = [b, c]
+                    tile += 1
+    elif "elemadd3" in app_name:
+        operands = ["B", "C", "D"]
+        operand_files = get_files_from_dir(path, operands)
+        b_tensors = operand_files["B"]
+        c_tensors = operand_files["C"]
+        d_tensors = operand_files["D"]
+
+        tile = 0
+        for b in b_tensors:
+            for c in c_tensors:
+                b_loc = get_tile_id(b)
+                c_loc = get_tile_id(c)
+                if (b_loc != c_loc):
+                    continue
+
+                for d in d_tensors:
+                    d_loc = get_tile_id(d)
+                    if (b_loc == c_loc and c_loc == d_loc):
+                        tile_pairing[tile] = [b, c, d]
+                        tile += 1
 
     return tile_pairing
 
@@ -56,16 +90,44 @@ def compute_outputs(tile_pairing, app_name, limit=900):
             C_mat = read_mtx(value[1])
             C_mat = np.transpose(C_mat)
             out = np.matmul(B_mat, C_mat)
-            # if np.count_nonzero(out) > limit:
-            if np.any(out):
+            if np.count_nonzero(out) > limit or np.count_nonzero(B_mat) > limit or np.count_nonzero(C_mat) > limit:
+            # if np.any(out):
                 print("tile = ", key)
                 print("B_tile_ID = ", value[0])
                 print("C_tile_ID = ", value[1])
                 print("out = ", out)
                 print("count = ", np.count_nonzero(out))
-                # return EarlyReturn()
-                breakpoint()
-                break
+                return EarlyReturn()
+        elif "elemmul" in app_name:
+            B_mat = read_mtx(value[0])
+            C_mat = read_mtx(value[1])
+            out = np.multiply(B_mat, C_mat)
+            # if np.any(out):
+            if np.count_nonzero(out) > limit or np.count_nonzero(B_mat) > limit or np.count_nonzero(C_mat) > limit:
+            # if np.count_nonzero(out) > limit or (np.count_nonzero(B_mat) + np.count_nonzero(C_mat)) > limit:
+                print("tile = ", key)
+                print("B_tile_ID = ", value[0])
+                print("C_tile_ID = ", value[1])
+                print("out = ", out)
+                print("count = ", np.count_nonzero(out))
+                return EarlyReturn()
+        elif "elemadd3" in app_name:
+            B_mat = read_mtx(value[0])
+            C_mat = read_mtx(value[1])
+            D_mat = read_mtx(value[2])
+
+            out = np.add(np.add(B_mat, C_mat), D_mat)
+            # if np.any(out):
+            if np.count_nonzero(out) > limit or np.count_nonzero(B_mat) > limit or np.count_nonzero(C_mat) > limit or np.count_nonzero(D_mat) > limit:
+            # if np.count_nonzero(out) > limit or (np.count_nonzero(B_mat) + np.count_nonzero(C_mat)) > limit:
+                print("tile = ", key)
+                print("B_tile_ID = ", value[0])
+                print("C_tile_ID = ", value[1])
+                print("D_tile_ID = ", value[2])
+                print("out = ", out)
+                print("count = ", np.count_nonzero(out))
+                return EarlyReturn()
+
     return None
             
 def find_optimal_tilesize(app_name, datum, initial=30, step_size=10):
@@ -73,7 +135,8 @@ def find_optimal_tilesize(app_name, datum, initial=30, step_size=10):
     max_tile_size = initial
     prev_tile_pairing = None
 
-    for _ in range(10):
+    # while True:
+    for _ in range(50):
         call_tiling = f"python3 setup_tiling_mat.py {app_name} {datum} {tile_size} > temp.txt"
         os.system(call_tiling)
         print(call_tiling)
@@ -90,18 +153,28 @@ def find_optimal_tilesize(app_name, datum, initial=30, step_size=10):
         print("***********************")
         prev_tile_pairing = tile_pairing
     
-    return None
+    return tile_size, prev_tile_pairing
        
 
 if __name__ == "__main__":
-    app_name = "matmul_ijk"
-    datum = "qiulp"
+    max_list = {}
+    for i in range(1, 11):
+        app_name = "matmul_ijk"
+        datum = f"matrix_sp80_sm_{i}"
 
-    tile_pairing = pair_tiles(app_name)
-    compute_outputs(tile_pairing, app_name)
+        # tile_pairing = pair_tiles(app_name)
+        # compute_outputs(tile_pairing, app_name)
+
+        max_tile_size, tile_pairing = find_optimal_tilesize(app_name, datum, initial=30, step_size=10)
+        print("-"*20)
+        print(f"MAX TILESIZE for {app_name}, {datum}: {max_tile_size}")
+        print(f"NUMBER OF TILES: {len(tile_pairing.keys())}")
+        print("-"*20)
+
+        max_list[datum] = [max_tile_size, len(tile_pairing.keys())]
+
+        call_tiling = f"python3 setup_tiling_mat.py {app_name} {datum} {max_tile_size} > temp.txt"
+        os.system(call_tiling)
+        print(call_tiling)
 
-    # max_tile_size, tile_pairing = find_optimal_tilesize(app_name, datum)
-    # print("-"*20)
-    # print(f"MAX TILESIZE for {app_name}, {datum}: {max_tile_size}")
-    # print(f"NUMBER OF TILES: {len(tile_pairing.keys())}")
-    # print("-"*20)
\ No newline at end of file
+    print(max_list)
\ No newline at end of file
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index e0dba099..3483df89 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -10,8 +10,9 @@ BENCHMARKS=(
 	# mat_vecmul_ij
 	# mat_vecmul_ij
 	# mat_residual
-	mat_elemadd3
-	# matmul_ijk
+	# mat_elemadd3
+	matmul_ijk
+	# mat_elemmul
 )
 
 appname=$3
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 0ae6cde8..84ff2051 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -31,8 +31,8 @@
 #     lines = file.readlines()
 
 # tilesizes = [int(line.strip()) for line in lines]
-print("TILESIZES: ", tilesizes)
-print("DATA: ", data)
+# print("TILESIZES: ", tilesizes)
+# print("DATA: ", data)
 
 mode_to_exclude = 0
 addition_vector_name = "d" #mattransmul (d) and residual (b) only

From a80b449bd503649942e8bbacf269aaf42e247227 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sat, 2 Sep 2023 13:30:17 -0700
Subject: [PATCH 35/39] helper scripts (need cleanup)

---
 count_nnz_tiling.py | 26 ++++++++++++++++++++------
 tile_pairing.py     |  4 ++--
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/count_nnz_tiling.py b/count_nnz_tiling.py
index c05b054b..c013fa4f 100644
--- a/count_nnz_tiling.py
+++ b/count_nnz_tiling.py
@@ -1,4 +1,5 @@
 import glob
+import sys
 def count_nonzeros(matrix_values_file):
     with open(matrix_values_file, 'r') as values_file:
         matrix_values = [float(val) for val in values_file.readlines()]
@@ -13,7 +14,10 @@ def count_nonzeros(matrix_values_file):
 limit = 900
 print("there are ", num_tiles, "tiles")
 
-
+sparsity_B = 0
+sparsity_C = 0
+# tilesize=int(sys.argv[1])**2
+tot_num_nonzeros = 0
 for tile_num in range(0,num_tiles):
     tot_num_nonzeros = 0
 
@@ -21,18 +25,28 @@ def count_nonzeros(matrix_values_file):
 
     num_nonzeros = count_nonzeros(tensor_C_values_file)
     tot_num_nonzeros += num_nonzeros
+    if num_nonzeros >= limit:
+        print("num_nonzeros: ", num_nonzeros)
+        print("error! too many nonzeros in INPUT matrices")
+        raise Exception
 
     tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_B_mode_vals'
 
     num_nonzeros = count_nonzeros(tensor_C_values_file)
     tot_num_nonzeros += num_nonzeros
+    if num_nonzeros >= limit:
+        print("num_nonzeros: ", num_nonzeros)
+        print("error! too many nonzeros in INPUT matrices")
+        raise Exception
 
-    tensor_C_values_file = f'SPARSE_TESTS/MAT_TMP_DIR/tile{tile_num}/tensor_D_mode_vals'
-
-    num_nonzeros = count_nonzeros(tensor_C_values_file)
-    tot_num_nonzeros += tot_num_nonzeros
     
     if tot_num_nonzeros >= limit:
         print("tot_num_nonzeros: ", tot_num_nonzeros)
         print("error! too many nonzeros in matrices")
-        raise Exception
\ No newline at end of file
+        raise Exception
+
+sparsity_B /= num_tiles
+sparsity_C /= num_tiles
+
+print("sparsity_B: ", sparsity_B)
+print("sparsity_C: ", sparsity_C)
\ No newline at end of file
diff --git a/tile_pairing.py b/tile_pairing.py
index 638df2da..8af7000b 100644
--- a/tile_pairing.py
+++ b/tile_pairing.py
@@ -12,8 +12,8 @@
 # app_name = "mat_residual"
 # app_name = "matmul_ijk"
 # app_name = "matmul_ijk"
-# app_name = "mat_mattransmul"
-app_name = "mat_elemadd3"
+# app_name = "mat_mattrpython3ansmul"
+app_name = "mat_elemmul"
 const_val = 2 # only for mat_mattransmul
 
 

From 9f73f16c07b243444b8facab14b7138b4a9efc72 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sat, 2 Sep 2023 23:08:37 -0700
Subject: [PATCH 36/39] square matrices for vecmul iter

---
 spmv_iter_matrices.txt | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 spmv_iter_matrices.txt

diff --git a/spmv_iter_matrices.txt b/spmv_iter_matrices.txt
new file mode 100644
index 00000000..f113329a
--- /dev/null
+++ b/spmv_iter_matrices.txt
@@ -0,0 +1,9 @@
+bcsstm26
+tols2000
+west2021
+adder_dcop_30
+adder_trans_02
+watt_2
+rajat12
+G42
+G30

From a1925123acadee034c7505fb03855f0cca8cbcbd Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sun, 3 Sep 2023 00:05:19 -0700
Subject: [PATCH 37/39] moved to right place

---
 scripts/tensor_names/spmv_iter_matrices.txt | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 scripts/tensor_names/spmv_iter_matrices.txt

diff --git a/scripts/tensor_names/spmv_iter_matrices.txt b/scripts/tensor_names/spmv_iter_matrices.txt
new file mode 100644
index 00000000..f113329a
--- /dev/null
+++ b/scripts/tensor_names/spmv_iter_matrices.txt
@@ -0,0 +1,9 @@
+bcsstm26
+tols2000
+west2021
+adder_dcop_30
+adder_trans_02
+watt_2
+rajat12
+G42
+G30

From 6c9dbd730bfdf301718afbb3f5e207f46184b174 Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Sun, 3 Sep 2023 12:26:59 -0700
Subject: [PATCH 38/39] changes for new complex apps

---
 maximum_tiling.py                          | 122 ++++++++++++++++++---
 sam/sim/src/tiling/memory_config_onyx.yaml |   2 +-
 sam/sim/src/tiling/tile.py                 |  28 ++++-
 scripts/tiling/tile_ext.sh                 |   4 +-
 setup_tiling_mat.py                        |  24 ++--
 5 files changed, 149 insertions(+), 31 deletions(-)

diff --git a/maximum_tiling.py b/maximum_tiling.py
index 317ea607..e9a41415 100644
--- a/maximum_tiling.py
+++ b/maximum_tiling.py
@@ -76,6 +76,62 @@ def pair_tiles(app_name):
                         tile_pairing[tile] = [b, c, d]
                         tile += 1
 
+    elif "mat_mask_tri" in app_name:
+        operands = ["B", "C", "D"]
+        operand_files = get_files_from_dir(path, operands)
+        b_tensors = operand_files["B"]
+        c_tensors = operand_files["C"]
+        d_tensors = operand_files["D"]
+
+        tile = 0
+        for b in b_tensors:
+            for c in c_tensors:
+                b_loc = get_tile_id(b)
+                c_loc = get_tile_id(c)
+                if not (b_loc[0] == c_loc[0] and b_loc[2] == c_loc[2]):
+                    continue
+
+                for d in d_tensors:
+                    d_loc = get_tile_id(d)
+                    if(c_loc[1] == d_loc[0] and c_loc[3] == d_loc[2] and b_loc[1] == d_loc[1] and b_loc[3] == d_loc[3] and b_loc[0] == c_loc[0] and b_loc[2] == c_loc[2]):
+                        tile_pairing[tile] = [b, c, d]
+                        tile += 1
+    elif "mat_vecmul_iter" in app_name:
+        operands = ["B", "C", "D", "E", "f"]
+        operand_files = get_files_from_dir(path, operands)
+        b_tensors = operand_files["B"]
+        c_tensors = operand_files["C"]
+        d_tensors = operand_files["D"]
+        e_tensors = operand_files["E"]
+        f_tensors = operand_files["f"]
+
+        tile = 0
+
+        for b in b_tensors:
+            for c in c_tensors:
+                b_loc = get_tile_id(b)
+                c_loc = get_tile_id(c)
+                if not (b_loc[1] == c_loc[0] and b_loc[3] == c_loc[2]):
+                    continue
+                for d in d_tensors:
+                    d_loc = get_tile_id(d)
+                    # check k coord
+                    if not (c_loc[1] == d_loc[0] and c_loc[3] == d_loc[2]):
+                        continue
+                    for e in e_tensors:
+                        e_loc = get_tile_id(e)
+                        # check l coord
+                        if not (d_loc[1] == e_loc[0] and d_loc[3] == e_loc[2]):
+                            continue
+                        for f in f_tensors:
+                            f_loc = get_tile_id(f)
+                            if (d_loc[1] == e_loc[0] and d_loc[3] == e_loc[2] and c_loc[1] == d_loc[0] and c_loc[3] == d_loc[2] and b_loc[1] == c_loc[0] and b_loc[3] == c_loc[2] and e_loc[1] == f_loc[0] and e_loc[3] == f_loc[1]):
+                                tile_pairing[tile] = [b, c, d, e, f]
+                                tile += 1
+
+
+
+
     return tile_pairing
 
 def read_mtx(mtx_path):
@@ -127,7 +183,41 @@ def compute_outputs(tile_pairing, app_name, limit=900):
                 print("out = ", out)
                 print("count = ", np.count_nonzero(out))
                 return EarlyReturn()
-
+        elif "mat_mask_tri" in app_name:
+            B_mat = read_mtx(value[0])
+            C_mat = read_mtx(value[1])
+            D_mat = read_mtx(value[2])
+            D_mat = np.transpose(D_mat)
+            out = np.sum(np.multiply(np.matmul(C_mat, D_mat), B_mat))
+            if np.count_nonzero(out) > limit or np.count_nonzero(B_mat) > limit or np.count_nonzero(C_mat) > limit or np.count_nonzero(D_mat) > limit:
+                print("tile = ", key)
+                print("B_tile_ID = ", value[0])
+                print("C_tile_ID = ", value[1])
+                print("D_tile_ID = ", value[2])
+                print("out = ", out)
+                print("count = ", np.count_nonzero(out))
+                return EarlyReturn()
+        elif "mat_vecmul_iter" in app_name:
+            B_mat = read_mtx(value[0])
+            C_mat = read_mtx(value[1])
+            D_mat = read_mtx(value[2])
+            E_mat = read_mtx(value[3])
+            f_mat = read_mtx(value[4])
+            # we transpose bc we swap in copy formatted
+            f_mat = np.transpose(f_mat)
+            out = np.matmul(np.matmul(np.matmul(np.matmul(B_mat, C_mat), D_mat), E_mat), f_mat)
+            if np.any(out):
+            # if np.count_nonzero(out) > limit or np.count_nonzero(B_mat) > limit or np.count_nonzero(C_mat) > limit or np.count_nonzero(D_mat) > limit or np.count_nonzero(E_mat) > limit or np.count_nonzero(f_mat) > limit:   
+                print("tile = ", key)
+                print("B_tile_ID = ", value[0])
+                print("C_tile_ID = ", value[1])
+                print("D_tile_ID = ", value[2])
+                print("E_tile_ID = ", value[3])
+                print("f_tile_ID = ", value[4])
+                print("out = ", out)
+                print("count = ", np.count_nonzero(out))
+                breakpoint()
+                return EarlyReturn()
     return None
             
 def find_optimal_tilesize(app_name, datum, initial=30, step_size=10):
@@ -158,23 +248,23 @@ def find_optimal_tilesize(app_name, datum, initial=30, step_size=10):
 
 if __name__ == "__main__":
     max_list = {}
-    for i in range(1, 11):
-        app_name = "matmul_ijk"
-        datum = f"matrix_sp80_sm_{i}"
+    # for i in range(1, 11):
+    app_name = "mat_vecmul_iter"
+    datum = "qiulp"
 
-        # tile_pairing = pair_tiles(app_name)
-        # compute_outputs(tile_pairing, app_name)
+    tile_pairing = pair_tiles(app_name)
+    compute_outputs(tile_pairing, app_name)
 
-        max_tile_size, tile_pairing = find_optimal_tilesize(app_name, datum, initial=30, step_size=10)
-        print("-"*20)
-        print(f"MAX TILESIZE for {app_name}, {datum}: {max_tile_size}")
-        print(f"NUMBER OF TILES: {len(tile_pairing.keys())}")
-        print("-"*20)
+    # max_tile_size, tile_pairing = find_optimal_tilesize(app_name, datum, initial=20, step_size=10)
+    # print("-"*20)
+    # print(f"MAX TILESIZE for {app_name}, {datum}: {max_tile_size}")
+    # print(f"NUMBER OF TILES: {len(tile_pairing.keys())}")
+    # print("-"*20)
 
-        max_list[datum] = [max_tile_size, len(tile_pairing.keys())]
+    # # max_list[datum] = [max_tile_size, len(tile_pairing.keys())]
 
-        call_tiling = f"python3 setup_tiling_mat.py {app_name} {datum} {max_tile_size} > temp.txt"
-        os.system(call_tiling)
-        print(call_tiling)
+    # call_tiling = f"python3 setup_tiling_mat.py {app_name} {datum} {max_tile_size} > temp.txt"
+    # os.system(call_tiling)
+    # print(call_tiling)
 
-    print(max_list)
\ No newline at end of file
+    # print(max_list)
\ No newline at end of file
diff --git a/sam/sim/src/tiling/memory_config_onyx.yaml b/sam/sim/src/tiling/memory_config_onyx.yaml
index b565bce0..a6fa9d35 100644
--- a/sam/sim/src/tiling/memory_config_onyx.yaml
+++ b/sam/sim/src/tiling/memory_config_onyx.yaml
@@ -16,4 +16,4 @@ n_levels: 3
 level_names: ["Main", "Glb", "Mem"]
 Main_tile_size: None
 Glb_tile_size: 8 # 8 = (8x8) = 64 elements
-Mem_tile_size: 30 #45 # Size of one dense dimension. 45 = (45*45) = 2025 
+Mem_tile_size: 30
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index bf71a7bb..c6ef86b7 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -30,7 +30,9 @@
             "mat_vecmul_ij" : "X(i,j)=B(i,j)*c(j) -f=X:ss -f=B:ss -f=c:ss:0  -s=reorder(i,j)",
             "mat_residual": "X(i,j)=b(i)-C(i,j)*d(j) -f=X:ss -f=C:ss -f=b:ss:0 -f=d:ss:0  -s=reorder(i,j)",
             "mat_sddmm": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:dd -f=D:dd:1,0 -s=reorder(i,j,k)",
-            "mat_elemadd3": "X(i,j)=B(i,j)+C(i,j)+D(i,j) -f=X:ss -f=B:ss -f=C:ss -f=D:ss"}
+            "mat_elemadd3": "X(i,j)=B(i,j)+C(i,j)+D(i,j) -f=X:ss -f=B:ss -f=C:ss -f=D:ss",
+            "mat_mask_tri": "X(i,j)=B(i,j)*C(i,k)*D(k,j) -f=X:ss -f=B:ss -f=C:ss -f=D:ss:1,0 -s=reorder(i,j,k)",
+            "mat_vecmul_iter": "X(i,j)=B(i,j)*C(j,k)*D(k,l)*E(l,m)*f(m) -f=X:ss -f=B:ss -f=C:ss -f=D:ss -f=E:ss -f=f:s -s=reorder(i,j,k,l,m)"}
 
 
 def print_dict(dd):
@@ -303,6 +305,30 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
         shifted2 = ScipyTensorShifter().shiftLastMode(shifted)
         tensors.append(shifted2)
 
+    elif "mat_mask_tri" in app_str:
+        print("Writing other tensor 1...")
+        shifted = ScipyTensorShifter().shiftLastMode(tensor)
+        tensors.append(shifted)
+
+        print("Writing  shifted2...")
+        shifted2 = ScipyTensorShifter().shiftLastMode(shifted)
+        tensors.append(shifted2)
+    elif "mat_vecmul_iter" in app_str:
+        print("Writing other tensor 1...")
+        tensors.append(tensor)
+        tensors.append(tensor)
+        tensors.append(tensor)
+
+        print("writing other vector...")
+        tensorName = args.input_tensor
+        variant = "mode1"
+        path = constructOtherVecKey(tensorName,variant)
+        tensor_c_from_path = FrosttTensor(path)
+        tensor_c = tensor_c_from_path.load().todense()
+
+        # breakpoint()
+        tensors.append(tensor_c)
+
     elif "mat_mattransmul" in app_str:
         print("Writing other tensors...")
         rows, cols = tensor.shape  # i,j
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 3483df89..7fc23251 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -11,7 +11,9 @@ BENCHMARKS=(
 	# mat_vecmul_ij
 	# mat_residual
 	# mat_elemadd3
-	matmul_ijk
+	# matmul_ijk
+	# mat_mask_tri
+	mat_vecmul_iter
 	# mat_elemmul
 )
 
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 84ff2051..1202ac81 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -11,8 +11,8 @@
 
 # data = ['rajat12']
 
-data = [sys.argv[2]]
-tilesizes = [int(sys.argv[3])]
+# data = [sys.argv[2]]
+# tilesizes = [int(sys.argv[3])]
 # app_name = "mat_elemadd"
 # app_name = "mat_elemmul"
 # app_name = "mat_sddmm"
@@ -21,18 +21,18 @@
 # app_name = "mat_elemmul"
 # app_name = "mat_residual"
 
-# data = []
-# data_file = open("onyx_final_eval_mid50_tensor_names.txt")
-# data_file_lines = data_file.readlines()
-# for line in data_file_lines:
-#    data.append(line[:-1])
+data = []
+data_file = open("onyx_final_eval_mid50_tensor_names.txt")
+data_file_lines = data_file.readlines()
+for line in data_file_lines:
+   data.append(line[:-1])
 
-# with open('matmul_tilesize_list.txt', 'r') as file:
-#     lines = file.readlines()
+with open('matmul_tilesize_list.txt', 'r') as file:
+    lines = file.readlines()
 
-# tilesizes = [int(line.strip()) for line in lines]
-# print("TILESIZES: ", tilesizes)
-# print("DATA: ", data)
+tilesizes = [int(line.strip()) for line in lines]
+print("TILESIZES: ", tilesizes)
+print("DATA: ", data)
 
 mode_to_exclude = 0
 addition_vector_name = "d" #mattransmul (d) and residual (b) only

From a1440cd5d78d0f6be8962ac2b4b19b65208d823f Mon Sep 17 00:00:00 2001
From: Akhilesh Varadan Balasingam <avb03@kiwi.stanford.edu>
Date: Mon, 9 Oct 2023 11:41:39 -0700
Subject: [PATCH 39/39] syn mat

---
 maximum_tiling.py                             | 26 ++++++++--------
 sam/sim/src/tiling/tile.py                    | 17 ++++++-----
 .../formatting/datastructure_suitesparse.py   |  6 +++-
 .../generate_suitesparse_formats.sh           | 14 ++++-----
 scripts/tiling/tile_ext.sh                    |  4 +--
 setup_tiling_mat.py                           | 30 ++++++++++++-------
 6 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/maximum_tiling.py b/maximum_tiling.py
index e9a41415..2390ed21 100644
--- a/maximum_tiling.py
+++ b/maximum_tiling.py
@@ -249,22 +249,22 @@ def find_optimal_tilesize(app_name, datum, initial=30, step_size=10):
 if __name__ == "__main__":
     max_list = {}
     # for i in range(1, 11):
-    app_name = "mat_vecmul_iter"
-    datum = "qiulp"
+    app_name = "matmul_ijk"
+    datum = "N_biocarta"
 
-    tile_pairing = pair_tiles(app_name)
-    compute_outputs(tile_pairing, app_name)
+    # tile_pairing = pair_tiles(app_name)
+    # compute_outputs(tile_pairing, app_name)
 
-    # max_tile_size, tile_pairing = find_optimal_tilesize(app_name, datum, initial=20, step_size=10)
-    # print("-"*20)
-    # print(f"MAX TILESIZE for {app_name}, {datum}: {max_tile_size}")
-    # print(f"NUMBER OF TILES: {len(tile_pairing.keys())}")
-    # print("-"*20)
+    max_tile_size, tile_pairing = find_optimal_tilesize(app_name, datum, initial=40, step_size=10)
+    print("-"*20)
+    print(f"MAX TILESIZE for {app_name}, {datum}: {max_tile_size}")
+    print(f"NUMBER OF TILES: {len(tile_pairing.keys())}")
+    print("-"*20)
 
-    # # max_list[datum] = [max_tile_size, len(tile_pairing.keys())]
+    max_list[datum] = [max_tile_size, len(tile_pairing.keys())]
 
-    # call_tiling = f"python3 setup_tiling_mat.py {app_name} {datum} {max_tile_size} > temp.txt"
-    # os.system(call_tiling)
-    # print(call_tiling)
+    call_tiling = f"python3 setup_tiling_mat.py {app_name} {datum} {max_tile_size} > temp.txt"
+    os.system(call_tiling)
+    print(call_tiling)
 
     # print(max_list)
\ No newline at end of file
diff --git a/sam/sim/src/tiling/tile.py b/sam/sim/src/tiling/tile.py
index c6ef86b7..47cf15a0 100644
--- a/sam/sim/src/tiling/tile.py
+++ b/sam/sim/src/tiling/tile.py
@@ -362,16 +362,17 @@ def get_other_tensors(app_str, tensor, other_nonempty=True):
         print("Writing other tensors...")
         tensorName = args.input_tensor
         # c(j) use mode1
-        variant = "mode1"
-        path = constructOtherVecKey(tensorName,variant)
-        tensor_c_from_path = FrosttTensor(path)
-        tensor_c = tensor_c_from_path.load().todense()
+        
+        # variant = "mode1"
+        # path = constructOtherVecKey(tensorName,variant)
+        # tensor_c_from_path = FrosttTensor(path)
+        # tensor_c = tensor_c_from_path.load().todense()
 
-        print("TENSOR SHAPE: ", tensor.shape)
-        print("TENSOR_C SHAPE: ", tensor_c.shape)
+        # print("TENSOR SHAPE: ", tensor.shape)
+        # print("TENSOR_C SHAPE: ", tensor_c.shape)
  
-        # rows, cols = tensor.shape
-        # tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
+        rows, cols = tensor.shape
+        tensor_c = scipy.sparse.random(cols, 1, data_rvs=np.ones).toarray().flatten()
 
         if other_nonempty:
             tensor_c[0] = 1
diff --git a/scripts/formatting/datastructure_suitesparse.py b/scripts/formatting/datastructure_suitesparse.py
index 373bf81e..e2d9e496 100644
--- a/scripts/formatting/datastructure_suitesparse.py
+++ b/scripts/formatting/datastructure_suitesparse.py
@@ -6,7 +6,11 @@
 
 from pathlib import Path
 
-from scripts.util.util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
+import sys
+# the mock-0.3.1 dir contains testcase.py, testutils.py & mock.py
+sys.path.append('/home/avb03/sam/scripts')
+
+from util.util import FormatWriter, SuiteSparseTensor, InputCacheSuiteSparse
 from sam.util import SUITESPARSE_FORMATTED_PATH, ScipyTensorShifter
 
 all_formats = ["coo", "cooT", "csr", "dcsr", "dcsc", "csc", "dense", "denseT"]
diff --git a/scripts/formatting/generate_suitesparse_formats.sh b/scripts/formatting/generate_suitesparse_formats.sh
index dd9822c3..6763f12b 100755
--- a/scripts/formatting/generate_suitesparse_formats.sh
+++ b/scripts/formatting/generate_suitesparse_formats.sh
@@ -6,16 +6,16 @@
 
 BENCHMARKS=(
 #  matmul_ikj
-#  matmul_ijk
+ matmul_ijk
 #  matmul_kij
-#  mat_elemmul
-#  mat_elemadd
-#  mat_elemadd3
+ mat_elemmul
+ mat_elemadd
+ mat_elemadd3
   mat_residual
   mat_mattransmul
   mat_vecmul
 #  mat_identity
-#  mat_sddmm
+ mat_sddmm
 )
 
 # This is a list of benchmarks that have "other" tensors that are generated
@@ -36,10 +36,10 @@ for b in ${!BENCHMARKS[@]}; do
 		sspath=${SUITESPARSE_PATH}/$name
 		echo "Generating input format files for $name..."
 
-		SUITESPARSE_TENSOR_PATH=$sspath python $basedir/scripts/formatting/datastructure_suitesparse.py -n $name -hw -b $bench 
+		SUITESPARSE_TENSOR_PATH=$sspath python3 $basedir/scripts/formatting/datastructure_suitesparse.py -n $name -hw -b $bench 
 		if [[ $OTHERBENCHES =~ "$bench" ]]; then
 			echo "Generating format of 'other' tensor"
-			python $basedir/scripts/formatting/datastructure_tns.py -n $line -f ss01 --other -ss -b $bench -hw
+			python3 $basedir/scripts/formatting/datastructure_tns.py -n $line -f ss01 --other -ss -b $bench -hw
 		fi
 	
 	done <$textfile
diff --git a/scripts/tiling/tile_ext.sh b/scripts/tiling/tile_ext.sh
index 7fc23251..082c1c38 100755
--- a/scripts/tiling/tile_ext.sh
+++ b/scripts/tiling/tile_ext.sh
@@ -13,8 +13,8 @@ BENCHMARKS=(
 	# mat_elemadd3
 	# matmul_ijk
 	# mat_mask_tri
-	mat_vecmul_iter
-	# mat_elemmul
+	# mat_vecmul_iter
+	mat_elemadd
 )
 
 appname=$3
diff --git a/setup_tiling_mat.py b/setup_tiling_mat.py
index 1202ac81..b153eec0 100644
--- a/setup_tiling_mat.py
+++ b/setup_tiling_mat.py
@@ -11,8 +11,8 @@
 
 # data = ['rajat12']
 
-# data = [sys.argv[2]]
-# tilesizes = [int(sys.argv[3])]
+data = [sys.argv[2]]
+tilesizes = [int(sys.argv[3])]
 # app_name = "mat_elemadd"
 # app_name = "mat_elemmul"
 # app_name = "mat_sddmm"
@@ -21,16 +21,23 @@
 # app_name = "mat_elemmul"
 # app_name = "mat_residual"
 
-data = []
-data_file = open("onyx_final_eval_mid50_tensor_names.txt")
-data_file_lines = data_file.readlines()
-for line in data_file_lines:
-   data.append(line[:-1])
+# data = []
+# tilesizes = []
+# sparsities = [60, 80, 90, 95, 98]
+# for sparsity in sparsities:
+#     for i in range(5):
+#         data.append(f"matrix_sp{str(sparsity)}_sm_{i+1}")
+#         tilesizes.append(30)
 
-with open('matmul_tilesize_list.txt', 'r') as file:
-    lines = file.readlines()
+# data_file = open("onyx_final_eval_mid50_tensor_names.txt")
+# data_file_lines = data_file.readlines()
+# for line in data_file_lines:
+#    data.append(line[:-1])
 
-tilesizes = [int(line.strip()) for line in lines]
+# with open('matmul_tilesize_list.txt', 'r') as file:
+#     lines = file.readlines()
+
+# tilesizes = [int(line.strip()) for line in lines]
 print("TILESIZES: ", tilesizes)
 print("DATA: ", data)
 
@@ -83,7 +90,8 @@ def replace_ones_with_zeros(mtx_file):
 
    rmdir = f"rm -rf tiles/{app_name}"
    os.system(rmdir)
-
+   
+   print(f"{SUITESPARSE_PATH}/{datum}.mtx")
    mtx_file = glob.glob(f"{SUITESPARSE_PATH}/{datum}.mtx")[0]
    os.makedirs("extensor_mtx", exist_ok=True)
    shutil.copy(mtx_file,f"extensor_mtx/{datum}.mtx")