Skip to content

Commit

Permalink
Update shard_irpa_file function
Browse files Browse the repository at this point in the history
Signed-off-by: aviator19941 <[email protected]>
  • Loading branch information
aviator19941 committed Oct 31, 2024
1 parent d74395c commit 1ac1e0d
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 21 deletions.
20 changes: 12 additions & 8 deletions sharktank/sharktank/utils/export_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,29 +123,33 @@ def wrapper(*args, **kwargs):
def shard_irpa_file(
self,
*,
output_file: str,
gguf_file: str,
output_irpa: str,
):
shard_irpa_args = [
"python3",
"-m",
"sharktank.models.llama.tools.shard_llama",
"--irpa-file",
self.irpa_path,
"--output-file",
output_file,
"--shard_count",
"sharktank.examples.sharding.shard_llm_dataset",
"--gguf-file",
gguf_file,
"--output-irpa",
output_irpa,
"--tensor-parallelism-size",
str(self.tensor_parallelism_size),
]

cwd = self.sharktank_dir
cmd = subprocess.list2cmdline(shard_irpa_args)
import pdb

pdb.set_trace()

logger.info(f"Sharding irpa file:\n" f"cd {cwd} && {cmd}")

proc = subprocess.run(cmd, shell=True, capture_output=True, cwd=cwd, text=True)
if proc.returncode != 0:
logger.error(
f"Error sharding irpa file with shard_llama.py\n"
f"Error sharding irpa file with shard_llm_dataset.py\n"
f"{proc.stdout+proc.stderr}"
)
else:
Expand Down
31 changes: 18 additions & 13 deletions sharktank/tests/models/llama/benchmark_amdgpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def setUp(self):
super().setUp()
# TODO: add numpy files to Azure and download from it
self.artifacts_dir = Path("/data/llama-3.1/8b")
self.gguf_path = self.artifacts_dir / "llama8b_f16.gguf"
self.irpa_path = self.artifacts_dir / "llama8b_f16.irpa"
self.irpa_path_fp8 = self.artifacts_dir / "llama8b_fp8.irpa"
self.tensor_parallelism_size = 1
Expand Down Expand Up @@ -151,7 +152,7 @@ def testBenchmark8B_f16_Decomposed(self):
)
# shard_irpa file
shard_return_code = self.llama8b_f16_decomposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -201,7 +202,7 @@ def testBenchmark8B_f16_Decodeposed(self):
)
# shard_irpa file
shard_return_code = self.llama8b_f16_decodeposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -252,7 +253,7 @@ def testBenchmark8B_fp8_Decomposed(self):
)
# shard_irpa file
shard_return_code = self.llama8b_fp8_decomposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -303,7 +304,7 @@ def testBenchmark8B_fp8_Decodeposed(self):
)
# shard_irpa file
shard_return_code = self.llama8b_fp8_decodeposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -342,6 +343,7 @@ def setUp(self):
super().setUp()
# TODO: add numpy files to Azure and download from it
self.artifacts_dir = Path("/data/llama-3.1/70b")
self.gguf_path = self.artifacts_dir / "llama70b_f16.gguf"
self.irpa_path = self.artifacts_dir / "llama70b_f16.irpa"
self.irpa_path_fp8 = self.artifacts_dir / "llama70b_fp8.irpa"
self.tensor_parallelism_size = 1
Expand Down Expand Up @@ -439,7 +441,7 @@ def testBenchmark70B_f16_Decomposed(self):
)
# shard_irpa file
shard_return_code = self.llama70b_f16_decomposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -471,7 +473,9 @@ def testBenchmark70B_f16_Decomposed(self):
)

@pytest.mark.xfail(
reason="Test not yet implemented", strict=True, raises=ExportMlirException
reason="'tm_tensor.attention' op query and mask batch dimension mismatch",
strict=True,
raises=IreeCompileException,
)
def testBenchmark70B_f16_Decodeposed(self):
output_file_name = self.dir_path_70b / "f16_torch"
Expand All @@ -490,7 +494,7 @@ def testBenchmark70B_f16_Decodeposed(self):
)
# shard_irpa file
shard_return_code = self.llama70b_f16_decodeposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -541,7 +545,7 @@ def testBenchmark70B_fp8_Decomposed(self):
)
# shard_irpa file
shard_return_code = self.llama70b_fp8_decomposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -592,7 +596,7 @@ def testBenchmark70B_fp8_Decodeposed(self):
)
# shard_irpa file
shard_return_code = self.llama70b_fp8_decodeposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -632,6 +636,7 @@ def setUp(self):
# TODO: add numpy files to Azure and download from it
self.artifacts_dir = Path("/data/llama-3.1/405b")
self.irpa_path = self.artifacts_dir / "llama405b_f16.irpa"
self.gguf_path = self.artifacts_dir / "llama3_405b_full_combined.gguf"
self.irpa_path_fp8 = self.artifacts_dir / "llama405b_fp8.irpa"
self.tensor_parallelism_size = 8
self.dir_path_405b = self.dir_path / "llama-405b"
Expand Down Expand Up @@ -728,7 +733,7 @@ def testBenchmark405B_f16_Decomposed(self):
)
# shard_irpa file
shard_return_code = self.llama405b_f16_decomposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -779,7 +784,7 @@ def testBenchmark405B_f16_Decodeposed(self):
)
# shard_irpa file
shard_return_code = self.llama405b_f16_decodeposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -830,7 +835,7 @@ def testBenchmark405B_fp8_Decomposed(self):
)
# shard_irpa file
shard_return_code = self.llama405b_fp8_decomposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down Expand Up @@ -881,7 +886,7 @@ def testBenchmark405B_fp8_Decodeposed(self):
)
# shard_irpa file
shard_return_code = self.llama405b_fp8_decodeposed_artifacts.shard_irpa_file(
output_file=output_shard_file_name
gguf_file=self.gguf_path, output_irpa=output_shard_file_name
)
if shard_return_code == 0:
self.irpa_path = output_shard_file_name
Expand Down

0 comments on commit 1ac1e0d

Please sign in to comment.