-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Publish/load pre-quantized models (#34)
- Loading branch information
Showing
7 changed files
with
187 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer\n", | ||
"import torch\n", | ||
"import huggingface_hub\n", | ||
"\n", | ||
"print(torch.cuda.is_available())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"model_path = \"meta-llama/Llama-2-7b-chat-hf\"\n", | ||
"print(model_path)\n", | ||
"target_model_path = \"autora-doc/Llama-2-7b-chat-hf-nf4\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load the model in 4bit quantization for faster inference on smaller GPUs\n", | ||
"conf = BitsAndBytesConfig(\n", | ||
" load_in_4bit=True,\n", | ||
" bnb_4bit_use_double_quant=True,\n", | ||
" bnb_4bit_quant_type=\"nf4\",\n", | ||
" bnb_4bit_compute_dtype=torch.bfloat16,\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Load the tokenizer and model\n", | ||
"tokenizer = AutoTokenizer.from_pretrained(model_path)\n", | ||
"model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=conf, device_map=\"auto\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# This will work when running from a Jupyter notebook or Colab.\n", | ||
"# For other authentication methods, see https://huggingface.co/docs/huggingface_hub/main/en/quick-start#authentication\n", | ||
"huggingface_hub.notebook_login(new_session=False, write_permission=True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"tokenizer.push_to_hub(target_model_path)\n", | ||
"model.push_to_hub(target_model_path)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Alternatvely, upload to Azure Blob Storage (currently not used)\n", | ||
"from azureml.core import Workspace\n", | ||
"\n", | ||
"# save locally first\n", | ||
"tokenizer.save_pretrained(f\"./models/{model_path}\")\n", | ||
"model.save_pretrained(f\"./models/{model_path}\")\n", | ||
"\n", | ||
"# If all goes well, upload to blob storage:\n", | ||
"workspace = Workspace.from_config()\n", | ||
"ds = workspace.get_default_datastore()\n", | ||
"ds.upload(f\"./models/{model_path}\", f\"./base_models/{target_model_path}\", show_progress=True, overwrite=True)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".env", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,8 @@ | ||
[project] | ||
name = "autora-doc" | ||
license = {file = "LICENSE"} | ||
license = { file = "LICENSE" } | ||
readme = "README.md" | ||
authors = [ | ||
{ name = "Carlos Garcia Jurado Suarez", email = "[email protected]" } | ||
] | ||
authors = [{ name = "Carlos Garcia Jurado Suarez", email = "[email protected]" }] | ||
requires-python = ">=3.8" | ||
classifiers = [ | ||
"Development Status :: 4 - Beta", | ||
|
@@ -20,7 +18,7 @@ dependencies = [ | |
"scipy", | ||
# This works, while installing from pytorch and cuda from conda does not", | ||
"torch==2.0.1", | ||
"transformers>=4.35.2", | ||
"transformers>=4.37.2", | ||
"nltk", | ||
] | ||
|
||
|
@@ -29,36 +27,26 @@ description = "Automatic documentation generator from AutoRA code" | |
[project.optional-dependencies] | ||
dev = [ | ||
"pytest", | ||
"pytest-cov", # Used to report total code coverage | ||
"pre-commit", # Used to run checks before finalizing a git commit | ||
"sphinx", # Used to automatically generate documentation | ||
"pytest-cov", # Used to report total code coverage | ||
"pre-commit", # Used to run checks before finalizing a git commit | ||
"sphinx", # Used to automatically generate documentation | ||
"sphinx-rtd-theme", # Used to render documentation | ||
"sphinx-autoapi", # Used to automatically generate api documentation | ||
"black", # Used for static linting of files | ||
"mypy", # Used for static type checking of files | ||
"sphinx-autoapi", # Used to automatically generate api documentation | ||
"black", # Used for static linting of files | ||
"mypy", # Used for static type checking of files | ||
# if you add dependencies here while experimenting in a notebook and you | ||
# want that notebook to render in your documentation, please add the | ||
# dependencies to ./docs/requirements.txt as well. | ||
"nbconvert", # Needed for pre-commit check to clear output from Python notebooks | ||
"nbsphinx", # Used to integrate Python notebooks into Sphinx documentation | ||
"ipython", # Also used in building notebooks into Sphinx | ||
"matplotlib", # Used in sample notebook intro_notebook.ipynb | ||
"nbconvert", # Needed for pre-commit check to clear output from Python notebooks | ||
"nbsphinx", # Used to integrate Python notebooks into Sphinx documentation | ||
"ipython", # Also used in building notebooks into Sphinx | ||
"matplotlib", # Used in sample notebook intro_notebook.ipynb | ||
"ipykernel", | ||
"hf_transfer", | ||
] | ||
train = [ | ||
"jsonlines", | ||
"mlflow", | ||
] | ||
azure = [ | ||
"azureml-core", | ||
"azureml-mlflow", | ||
] | ||
cuda = [ | ||
"bitsandbytes>=0.41.2.post2", | ||
"accelerate>=0.24.1", | ||
"xformers", | ||
] | ||
train = ["jsonlines", "mlflow"] | ||
azure = ["azureml-core", "azureml-mlflow"] | ||
cuda = ["bitsandbytes>=0.42.0", "accelerate>=0.24.1", "xformers"] | ||
|
||
[project.urls] | ||
Homepage = "https://github.com/AutoResearch/autodoc" | ||
|
@@ -68,9 +56,7 @@ requires = ["hatchling", "hatch-vcs"] | |
build-backend = "hatchling.build" | ||
|
||
[tool.pytest.ini_options] | ||
testpaths = [ | ||
"tests", | ||
] | ||
testpaths = ["tests"] | ||
|
||
[tool.black] | ||
line-length = 110 | ||
|
@@ -81,7 +67,7 @@ profile = "black" | |
line_length = 110 | ||
|
||
[tool.coverage.run] | ||
omit=["src/autora/doc/_version.py"] | ||
omit = ["src/autora/doc/_version.py"] | ||
|
||
[tool.hatch] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters