akx · akx · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024 · Sep 26, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,27 @@
+name: CI
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  Lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - uses: pre-commit/[email protected]
+
+  Build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - uses: astral-sh/setup-uv@v2
+      - run: uv build .
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
+*.egg-info
 *.py[cod]
-/models
+/models
+uv.lock
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.8
+    hooks:
+      - id: ruff
+        args:
+          - --fix
+      - id: ruff-format
+
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: debug-statements
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.1.0
+    hooks:
+      - id: prettier
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ A small tool that downloads models from [the Huggingface Hub](https://huggingfac
 
 - Download and compile `llama.cpp`.
 - Set up a virtualenv using the requirements from `llama.cpp`.
-- Install the requirements from this repo in that virtualenv.
+- Install this package in that virtualenv (e.g. `pip install -e .`).
 - Run e.g. `python ggify.py databricks/dolly-v2-12b` (nb.: I haven't tried with that particular repo)
 - You'll end up with GGML models under `models/...`.
 

diff --git a/ggify.py b/ggify.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import argparse
 import os
 import re
@@ -36,6 +38,27 @@ def get_llama_cpp_dir():
     return dir
 
 
+class ToolNotFoundError(RuntimeError):
+    pass
+
+
+def find_tool(
+    dir: str,
+    tool_name: str,
+    candidate_names: list[str] | None = None,
+    raise_on_missing=True,
+) -> str | None:
+    for candidate_name in candidate_names or [tool_name]:
+        candidate_path = os.path.join(dir, candidate_name)
+        if os.path.isfile(candidate_path):
+            return candidate_path
+    if raise_on_missing:
+        raise ToolNotFoundError(
+            f"Could not find {tool_name} in {dir} (set LLAMA_CPP_DIR (currently {get_llama_cpp_dir()}?))",
+        )
+    return None
+
+
 PYTHON_EXE = os.environ.get("PYTHON_EXE", sys.executable)
 GG_MODEL_EXTENSION = ".gguf"
 
@@ -62,13 +85,7 @@ def quantize(
     if not os.path.isfile(q_model_path):
         if not nonq_model_path:
             raise ValueError(f"Could not find nonquantized model at {nonq_model_path}")
-        quantize_cmd = os.path.join(get_llama_cpp_dir(), "quantize")
-
-        if not os.path.isfile(quantize_cmd):
-            raise RuntimeError(
-                f"Could not find quantize executable at {quantize_cmd} "
-                f"(set LLAMA_CPP_DIR (currently {get_llama_cpp_dir()}?))"
-            )
+        quantize_cmd = find_tool(get_llama_cpp_dir(), "quantize")
         concurrency = str(os.cpu_count() + 2)
         print_and_check_call([quantize_cmd, nonq_model_path, dest_type, concurrency])
     return q_model_path
@@ -90,7 +107,7 @@ def convert_pth(
     *,
     convert_type: str,
     vocab_type: str,
-    use_convert_hf_to_gguf=False,
+    converter: str,
 ):
     model_path = get_ggml_model_path(dirname, convert_type)
     try:
@@ -99,48 +116,61 @@ def convert_pth(
             print(f"Not believing a {stat.st_size:d}-byte model is valid, reconverting")
             raise FileNotFoundError()
     except FileNotFoundError:
-        if use_convert_hf_to_gguf:
-            convert_using_hf_to_gguf(dirname, convert_type=convert_type)
+        converters = {
+            "convert-hf-to-gguf": lambda: convert_using_hf_to_gguf(dirname, convert_type=convert_type),
+            "convert": lambda: convert_using_convert_py(
+                dirname,
+                convert_type=convert_type,
+                vocab_type=vocab_type,
+            ),
+        }
+        if converter == "auto":
+            for con, func in converters.items():
+                try:
+                    func()
+                    break
+                except ToolNotFoundError:
+                    pass
+            else:
+                raise ToolNotFoundError("Could not find a converter")
+        elif converter in converters:
+            converters[converter]()
         else:
-            convert_using_convert(
-                dirname, convert_type=convert_type, vocab_type=vocab_type
-            )
+            raise ValueError(f"Unknown converter {converter!r}")
+
     return model_path
 
 
-def convert_using_convert(dirname, *, convert_type, vocab_type):
-    convert_hf_to_gguf_py = os.path.join(get_llama_cpp_dir(), "convert.py")
-    if not os.path.isfile(convert_hf_to_gguf_py):
-        raise RuntimeError(
-            f"Could not find convert.py at {convert_hf_to_gguf_py} "
-            f"(set LLAMA_CPP_DIR (currently {get_llama_cpp_dir()}?))"
-        )
+def convert_using_convert_py(dirname, *, convert_type, vocab_type):
+    convert_py = find_tool(get_llama_cpp_dir(), "convert.py")
     print_and_check_call(
         [
             PYTHON_EXE,
-            convert_hf_to_gguf_py,
+            convert_py,
             dirname,
             f"--outtype={convert_type}",
             f"--vocab-type={vocab_type}",
-        ]
+        ],
     )
 
 
 def convert_using_hf_to_gguf(dirname, *, convert_type):
-    convert_hf_to_gguf_py = os.path.join(get_llama_cpp_dir(), "convert-hf-to-gguf.py")
-    if not os.path.isfile(convert_hf_to_gguf_py):
-        raise RuntimeError(
-            f"Could not find convert.py at {convert_hf_to_gguf_py} "
-            f"(set LLAMA_CPP_DIR (currently {get_llama_cpp_dir()}?))"
-        )
+    convert_hf_to_gguf_py = find_tool(
+        get_llama_cpp_dir(),
+        "convert-hf-to-gguf.py",
+        [
+            "convert-hf-to-gguf.py",
+            "convert_hf_to_gguf.py",
+        ],
+    )
     print_and_check_call(
         [
             PYTHON_EXE,
             convert_hf_to_gguf_py,
             dirname,
             f"--outtype={convert_type}",
             "--verbose",
-        ]
+        ],
     )
 
 
@@ -151,7 +181,7 @@ def convert_pth_to_types(
     remove_nonquantized_model=False,
     nonquantized_type: str,
     vocab_type: str,
-    use_convert_hf_to_gguf=False,
+    converter: str = "auto",
 ):
     # If f32 is requested, or a quantized type is requested, convert to fp32 GGML
     nonquantized_path = None
@@ -160,7 +190,7 @@ def convert_pth_to_types(
             dirname,
             convert_type=nonquantized_type,
             vocab_type=vocab_type,
-            use_convert_hf_to_gguf=use_convert_hf_to_gguf,
+            converter=converter,
         )
     # Other types
     for type in types:
@@ -186,10 +216,10 @@ def convert_pth_to_types(
 
 
 def download_repo(repo, dirname):
-    files = list(huggingface_hub.list_repo_tree(repo, token=hf_token))
+    files = [fi for fi in huggingface_hub.list_repo_tree(repo, token=hf_token) if isinstance(fi, RepoFile)]
     if not any(fi.rfilename.startswith("pytorch_model") for fi in files):
         print(
-            f"Repo {repo} does not seem to contain a PyTorch model, but continuing anyway"
+            f"Repo {repo} does not seem to contain a PyTorch model, but continuing anyway",
         )
 
     with tqdm.tqdm(files, unit="file", desc="Downloading files...") as pbar:
@@ -249,9 +279,16 @@ def main():
     ap.add_argument(
         "--use-convert-hf-to-gguf",
         action="store_true",
-        help="Use convert_hf_to_gguf.py instead of convert.py",
+        help="Use convert_hf_to_gguf.py instead of convert.py (deprecated; use `--converter`)",
+    )
+    ap.add_argument(
+        "--converter",
+        default="auto",
+        choices=("auto", "convert", "convert-hf-to-gguf"),
     )
     args = ap.parse_args()
+    if args.use_convert_hf_to_gguf:
+        args.converter = "convert-hf-to-gguf"
     if args.llama_cpp_dir:
         os.environ["LLAMA_CPP_DIR"] = args.llama_cpp_dir
     repo = args.repo
@@ -265,8 +302,8 @@ def main():
             remove_nonquantized_model=not args.keep_nonquantized,
             nonquantized_type=args.nonquantized_type,
             vocab_type=args.vocab_type,
-            use_convert_hf_to_gguf=args.use_convert_hf_to_gguf,
-        )
+            converter=args.converter,
+        ),
     )
     for output_path in output_paths:
         print(output_path)

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "ggify"
+version = "0.1.0"
+description = "Download and convert models from Huggingface Hub to GGML"
+readme = "README.md"
+requires-python = ">=3.8"
+dependencies = [
+    "huggingface-hub~=0.23.0",
+    "tqdm~=4.66.5",
+]
+
+[project.scripts]
+ggify = "ggify:main"
+
+[tool.ruff]
+line-length = 110
+
+[tool.ruff.lint]
+select = ["COM812", "E", "F", "W", "I"]
diff --git a/requirements.txt b/requirements.txt