From e3fdc4a3648d2e5b35a80337a710fd4e1dce2cca Mon Sep 17 00:00:00 2001 From: Mergen Nachin Date: Tue, 16 Apr 2024 16:16:13 -0400 Subject: [PATCH] Create top-level torchat.py CLI binary (#215) --- .github/workflows/test_torchchat_commands.yml | 57 +++++++++++++++++++ build/builder.py | 16 +++--- cli.py | 40 ++++++------- eval.py | 54 +++++++----------- export.py | 34 +++++------ generate.py | 23 ++++---- torchat.py | 39 ------------- torchchat.py | 54 ++++++++++++++++++ 8 files changed, 179 insertions(+), 138 deletions(-) create mode 100644 .github/workflows/test_torchchat_commands.yml delete mode 100644 torchat.py create mode 100644 torchchat.py diff --git a/.github/workflows/test_torchchat_commands.yml b/.github/workflows/test_torchchat_commands.yml new file mode 100644 index 000000000..86c4d8224 --- /dev/null +++ b/.github/workflows/test_torchchat_commands.yml @@ -0,0 +1,57 @@ +name: Run torchchat command tests + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +jobs: + torchchat-command-load-test: + strategy: + matrix: + runner: [macos-14] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.11 + - name: Print machine info + run: | + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + - name: Install requirements + run: | + echo "Installing pip packages" + pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu + pip install -r requirements.txt + + - name: Download Stories files + run: | + + mkdir -p checkpoints/stories15M + pushd checkpoints/stories15M + curl -fsSL -O https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt + curl -fsSL -O https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + popd + + - name: Test generate + run: | + + export MODEL_PATH=checkpoints/stories15M/stories15M.pt + export MODEL_NAME=stories15M + export MODEL_DIR=/tmp + + python generate.py --device cpu --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager1 + python torchchat.py generate --device cpu --checkpoint-path ${MODEL_PATH} --temperature 0 > ./output_eager2 + cat ./output_eager1 + cat ./output_eager2 + echo "Tests complete." + \ No newline at end of file diff --git a/build/builder.py b/build/builder.py index f558989b7..8c156a911 100644 --- a/build/builder.py +++ b/build/builder.py @@ -3,19 +3,19 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -import itertools + +import os import sys import time from dataclasses import dataclass from pathlib import Path -from typing import Optional, Tuple, Union +from typing import Optional, Union import torch import torch._dynamo.config import torch._inductor.config -from cli import cli_args -from quantize import get_precision, name_to_dtype, quantize_model, set_precision +from quantize import name_to_dtype, quantize_model from sentencepiece import SentencePieceProcessor @@ -110,7 +110,7 @@ def from_args(cls, args): # -> TokenizerArgs: elif args.checkpoint_dir: tokenizer_path = args.checkpoint_dir / "tokenizer.model" else: - raise RuntimeError(f"cannot find tokenizer model") + raise RuntimeError("cannot find tokenizer model") if not tokenizer_path.is_file(): raise RuntimeError(f"did not find tokenizer at {tokenizer_path}") @@ -243,7 +243,7 @@ def _initialize_model( # assert model_dtype == "float32", f"dtype setting not valid for a DSO model. Specify dtype during export." assert ( quantize is None or quantize == "{ }" - ), f"quantize not valid for exported DSO model. Specify quantization during export." + ), "quantize not valid for exported DSO model. Specify quantization during export." try: model = model_ # Replace model forward with the AOT-compiled forward @@ -262,12 +262,12 @@ def _initialize_model( # assert model_dtype == "float32", f"dtype setting not valid for a DSO model. Specify dtype during export." assert ( quantize is None or quantize == "{ }" - ), f"quantize not valid for exported PTE model. Specify quantization during export." + ), "quantize not valid for exported PTE model. Specify quantization during export." try: from build.model_et import PTEModel model = PTEModel(model_.config, builder_args.pte_path) - except Exception as e: + except Exception: raise RuntimeError(f"Failed to load ET compiled {builder_args.pte_path}") else: model = model_ diff --git a/cli.py b/cli.py index 3d6873b78..af2d81fb4 100644 --- a/cli.py +++ b/cli.py @@ -4,12 +4,11 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -import os -import time +import json from pathlib import Path import torch -import torch.nn as nn + default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu' @@ -41,11 +40,19 @@ def check_args(args, command_name: str): print(f"Warning: {text}") -def cli_args(): - import argparse +def add_arguments_for_generate(parser): + _add_arguments_common(parser) + + +def add_arguments_for_eval(parser): + _add_arguments_common(parser) + + +def add_arguments_for_export(parser): + _add_arguments_common(parser) - parser = argparse.ArgumentParser(description="Your CLI description.") +def _add_arguments_common(parser): parser.add_argument( "--seed", type=int, @@ -60,21 +67,6 @@ def cli_args(): action="store_true", help="Whether to use tiktoken tokenizer.", ) - parser.add_argument( - "--export", - action="store_true", - help="Use torchchat to export a model.", - ) - parser.add_argument( - "--eval", - action="store_true", - help="Use torchchat to eval a model.", - ) - parser.add_argument( - "--generate", - action="store_true", - help="Use torchchat to generate a sequence using a model.", - ) parser.add_argument( "--chat", action="store_true", @@ -162,10 +154,10 @@ def cli_args(): parser.add_argument( "--quantize", type=str, default="{ }", help="Quantization options." ) + parser.add_argument("--params-table", type=str, default=None, help="Device to use") parser.add_argument( "--device", type=str, default=default_device, help="Device to use" ) - parser.add_argument("--params-table", type=str, default=None, help="Device to use") parser.add_argument( "--tasks", nargs="+", @@ -183,7 +175,8 @@ def cli_args(): help="maximum length sequence to evaluate", ) - args = parser.parse_args() + +def arg_init(args): if Path(args.quantize).is_file(): with open(args.quantize, "r") as f: @@ -191,5 +184,4 @@ def cli_args(): if args.seed: torch.manual_seed(args.seed) - return args diff --git a/eval.py b/eval.py index 9ebd5337d..6d719d460 100644 --- a/eval.py +++ b/eval.py @@ -3,24 +3,33 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. -import sys +import argparse import time -from pathlib import Path from typing import Optional import torch import torch._dynamo.config import torch._inductor.config +from build.builder import ( + _initialize_model, + _initialize_tokenizer, + BuilderArgs, + TokenizerArgs, +) + +from build.model import Transformer +from cli import add_arguments_for_eval, arg_init +from generate import encode_tokens, model_forward + +from quantize import set_precision + torch._dynamo.config.automatic_dynamic_shapes = True torch._inductor.config.triton.unique_kernel_names = True torch._inductor.config.epilogue_fusion = False torch._inductor.config.triton.cudagraphs = True torch._dynamo.config.cache_size_limit = 100000 -from build.model import Transformer -from cli import cli_args -from quantize import name_to_dtype, set_precision try: import lm_eval @@ -29,13 +38,6 @@ except: lm_eval_available = False -from build.builder import ( - _initialize_model, - _initialize_tokenizer, - BuilderArgs, - TokenizerArgs, -) -from generate import encode_tokens, model_forward if lm_eval_available: try: # lm_eval version 0.4 @@ -218,30 +220,19 @@ def main(args) -> None: builder_args = BuilderArgs.from_args(args) tokenizer_args = TokenizerArgs.from_args(args) - - checkpoint_path = args.checkpoint_path - checkpoint_dir = args.checkpoint_dir - params_path = args.params_path - params_table = args.params_table - gguf_path = args.gguf_path - tokenizer_path = args.tokenizer_path - dso_path = args.dso_path - pte_path = args.pte_path quantize = args.quantize device = args.device - model_dtype = args.dtype tasks = args.tasks limit = args.limit max_seq_length = args.max_seq_length - use_tiktoken = args.tiktoken print(f"Using device={device}") - set_precision(buildeer_args.precision) + set_precision(builder_args.precision) tokenizer = _initialize_tokenizer(tokenizer_args) builder_args.setup_caches = False model = _initialize_model( - buildeer_args, + builder_args, quantize, ) @@ -280,11 +271,8 @@ def main(args) -> None: if __name__ == "__main__": - - def cli(): - args = cli_args() - main(args) - - -if __name__ == "__main__": - cli() + parser = argparse.ArgumentParser(description="Export specific CLI.") + add_arguments_for_eval(parser) + args = parser.parse_args() + args = arg_init(args) + main(args) diff --git a/export.py b/export.py index f31af803a..45c61dbb4 100644 --- a/export.py +++ b/export.py @@ -4,16 +4,16 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. +import argparse import os -import time -from pathlib import Path import torch -import torch.nn as nn -from cli import cli_args -from quantize import get_precision, name_to_dtype, quantize_model, set_precision -from torch.export import Dim, export +from build.builder import _initialize_model, BuilderArgs +from cli import add_arguments_for_export, arg_init, check_args +from export_aoti import export_model as export_model_aoti + +from quantize import set_precision try: executorch_export_available = True @@ -22,13 +22,6 @@ executorch_exception = f"ET EXPORT EXCEPTION: {e}" executorch_export_available = False -from build.builder import _initialize_model, BuilderArgs, TokenizerArgs - -from build.model import Transformer -from export_aoti import export_model as export_model_aoti -from generate import decode_one_token -from quantize import name_to_dtype, quantize_model -from torch._export import capture_pre_autograd_graph default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu' @@ -44,7 +37,6 @@ def device_sync(device): def main(args): builder_args = BuilderArgs.from_args(args) - tokenizer_args = TokenizerArgs.from_args(args) quantize = args.quantize print(f"Using device={builder_args.device}") @@ -70,7 +62,7 @@ def main(args): export_model_et(model, builder_args.device, args.output_pte_path, args) else: print( - f"Export with executorch requested but Executorch could not be loaded" + "Export with executorch requested but Executorch could not be loaded" ) print(executorch_exception) if output_dso_path: @@ -79,10 +71,10 @@ def main(args): export_model_aoti(model, builder_args.device, output_dso_path, args) -def cli(): - args = cli_args() - main(args) - - if __name__ == "__main__": - cli() + parser = argparse.ArgumentParser(description="Export specific CLI.") + add_arguments_for_export(parser) + args = parser.parse_args() + check_args(args, "export") + args = arg_init(args) + main(args) diff --git a/generate.py b/generate.py index d4b26a75b..4d52b4c8b 100644 --- a/generate.py +++ b/generate.py @@ -3,6 +3,7 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. +import argparse import itertools import os import sys @@ -23,8 +24,8 @@ TokenizerArgs, ) from build.model import Transformer -from cli import cli_args -from quantize import get_precision, name_to_dtype, quantize_model, set_precision +from cli import add_arguments_for_generate, arg_init, check_args +from quantize import set_precision @dataclass @@ -137,7 +138,7 @@ def decode_n_tokens( **sampling_kwargs, ): new_tokens, new_probs = [], [] - for i in range(num_new_tokens): + for _ in range(num_new_tokens): with torch.backends.cuda.sdp_kernel( enable_flash=False, enable_mem_efficient=False, enable_math=True ): # Actually better for Inductor to codegen attention here @@ -356,8 +357,6 @@ def _main( # will add a version of _initialize_model in future # (need additional args) if is_speculative: - from builder import _load_model - speculative_builder_args = builder_args draft_model = _load_model( @@ -496,8 +495,6 @@ def main(args): builder_args = BuilderArgs.from_args(args) speculative_builder_args = BuilderArgs.from_speculative_args(args) tokenizer_args = TokenizerArgs.from_args(args) - generator_args = GeneratorArgs.from_args(args) - _main( builder_args, speculative_builder_args, @@ -516,10 +513,10 @@ def main(args): ) -def cli(): - args = cli_args() - main(args) - - if __name__ == "__main__": - cli() + parser = argparse.ArgumentParser(description="Generate specific CLI.") + add_arguments_for_generate(parser) + args = parser.parse_args() + check_args(args, "generate") + args = arg_init(args) + main(args) diff --git a/torchat.py b/torchat.py deleted file mode 100644 index d01b12f88..000000000 --- a/torchat.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. - -# This source code is licensed under the license found in the -# LICENSE file in the root directory of this source tree. - -import os -import time -from pathlib import Path - -import torch -import torch.nn as nn -from cli import check_args, cli_args -from eval import main as eval_main - -from export import main as export_main -from generate import main as generate_main -from torch.export import Dim, export - -default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu' - - -def cli(): - args = cli_args() - - if args.generate or args.chat: - check_args(args, "generate") - generate_main(args) - elif args.eval: - eval_main(args) - elif args.export: - check_args(args, "export") - export_main(args) - else: - raise RuntimeError("must specify either --generate or --export") - - -if __name__ == "__main__": - cli() diff --git a/torchchat.py b/torchchat.py new file mode 100644 index 000000000..8b5067cd0 --- /dev/null +++ b/torchchat.py @@ -0,0 +1,54 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. + +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import argparse + +from cli import ( + add_arguments_for_eval, + add_arguments_for_export, + add_arguments_for_generate, + arg_init, + check_args, +) + +default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu' + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Top-level command") + subparsers = parser.add_subparsers( + dest="subcommand", + help="Use `generate`, `eval` or `export` followed by subcommand specific options.", + ) + + parser_generate = subparsers.add_parser("generate") + add_arguments_for_generate(parser_generate) + + parser_eval = subparsers.add_parser("eval") + add_arguments_for_eval(parser_eval) + + parser_export = subparsers.add_parser("export") + add_arguments_for_export(parser_export) + + args = parser.parse_args() + args = arg_init(args) + + if args.subcommand == "generate": + check_args(args, "generate") + from generate import main as generate_main + + generate_main(args) + elif args.subcommand == "eval": + from eval import main as eval_main + + eval_main(args) + elif args.subcommand == "export": + check_args(args, "export") + from export import main as export_main + + export_main(args) + else: + raise RuntimeError("Must specify valid subcommands: generate, export, eval")