Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor model-qa #253

Merged
merged 15 commits into from
Apr 11, 2024
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ examples/python/genai_models
examples/python/hf_cache

!test/test_models/hf-internal-testing/
!test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx
!test/test_models/hf-internal-testing/tiny-random-gpt2*/*.onnx

.ipynb_checkpoints/
299 changes: 299 additions & 0 deletions examples/python/assistant.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "2cdce572-02eb-421e-a1c2-d169a4c85e2b",
"metadata": {},
"outputs": [],
"source": [
"!pip install onnxruntime-genai\n",
"!pip install olive-ai"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "51ea1db1-4460-49ed-8da5-918a1adb460d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n",
"Token is valid (permission: read).\n",
"Your token has been saved to C:\\Users\\nakersha\\.cache\\huggingface\\token\n",
"Login successful\n"
]
}
],
"source": [
"!huggingface-cli login --token <TOKEN>"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e86b3453-0e9f-47fb-ab63-193e0e06c583",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2024-04-09 13:09:13,203] [INFO] [run.py:246:run] Loading Olive module configuration from: C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\olive_config.json\n",
"[2024-04-09 13:09:13,207] [INFO] [run.py:252:run] Loading run configuration from: olive.json\n",
"[2024-04-09 13:09:13,211] [INFO] [config.py:196:validate_evaluate_input_model] No evaluator is specified, skip to evaluate model\n",
"[2024-04-09 13:09:13,281] [DEBUG] [accelerator.py:245:normalize_accelerators] The accelerator device and execution providers are specified, skipping deduce.\n",
"[2024-04-09 13:09:13,283] [DEBUG] [accelerator.py:275:normalize_accelerators] Supported execution providers for device cpu: ['CPUExecutionProvider']\n",
"[2024-04-09 13:09:13,285] [DEBUG] [accelerator.py:302:create_accelerators] Initial accelerators and execution providers: {'CPU': ['CPUExecutionProvider']}\n",
"[2024-04-09 13:09:13,286] [INFO] [accelerator.py:324:create_accelerators] Running workflow on accelerator specs: cpu-cpu\n",
"[2024-04-09 13:09:13,287] [INFO] [run.py:199:run_engine] Importing pass module GenAIModelExporter\n",
"[2024-04-09 13:09:13,295] [INFO] [engine.py:115:initialize] Using cache directory: cache\n",
"[2024-04-09 13:09:13,298] [INFO] [engine.py:271:run] Running Olive on accelerator: cpu-cpu\n",
"[2024-04-09 13:09:13,300] [DEBUG] [engine.py:1079:create_system] create native OliveSystem SystemType.Local\n",
"[2024-04-09 13:09:13,302] [DEBUG] [engine.py:1079:create_system] create native OliveSystem SystemType.Local\n",
"[2024-04-09 13:09:13,307] [DEBUG] [engine.py:717:_cache_model] Cached model 1473a6e460df1ddcd4cf088ff0019b1e to cache\\models\\1473a6e460df1ddcd4cf088ff0019b1e.json\n",
"[2024-04-09 13:09:13,310] [DEBUG] [engine.py:344:run_accelerator] Running Olive in no-search mode ...\n",
"[2024-04-09 13:09:13,311] [DEBUG] [engine.py:436:run_no_search] Running ['genai_exporter'] with no search ...\n",
"[2024-04-09 13:09:13,313] [INFO] [engine.py:873:_run_pass] Running pass genai_exporter:GenAIModelExporter\n",
"[2024-04-09 13:09:14,689] [ERROR] [engine.py:955:_run_pass] Pass run failed.\n",
"Traceback (most recent call last):\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\engine\\engine.py\", line 943, in _run_pass\n",
" output_model_config = host.run_pass(p, input_model_config, data_root, output_model_path, pass_search_point)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\systems\\local.py\", line 31, in run_pass\n",
" output_model = the_pass.run(model, data_root, output_model_path, point)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\passes\\olive_pass.py\", line 216, in run\n",
" output_model = self._run_for_config(model, data_root, config, output_model_path)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\passes\\onnx\\genai_model_exporter.py\", line 100, in _run_for_config\n",
" create_model(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\onnxruntime_genai\\models\\builder.py\", line 1561, in create_model\n",
" config = AutoConfig.from_pretrained(hf_name, **extra_kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py\", line 1100, in from_pretrained\n",
" config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\configuration_utils.py\", line 634, in get_config_dict\n",
" config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\configuration_utils.py\", line 689, in _get_config_dict\n",
" resolved_config_file = cached_file(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\utils\\hub.py\", line 385, in cached_file\n",
" resolved_file = hf_hub_download(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\huggingface_hub\\utils\\_validators.py\", line 118, in _inner_fn\n",
" return fn(*args, **kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\huggingface_hub\\file_download.py\", line 1384, in hf_hub_download\n",
" os.makedirs(os.path.dirname(pointer_path), exist_ok=True)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\os.py\", line 225, in makedirs\n",
" mkdir(name, mode)\n",
"FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\\\Users\\\\nakersha\\\\Develop\\\\code\\\\microsoft\\\\onnxruntime-genai\\\\examples\\\\python\\\\cache\\\\models\\\\4_GenAIModelExporter-1473a6e460df1ddcd4cf088ff0019b1e-fe48ab55cdf4d03b843ede7c3c3be27b-cpu-cpu\\\\output_model\\\\genai_cache_dir\\\\models--microsoft--phi-2\\\\snapshots\\\\b10c3eba545ad279e7208ee3a5d644566f001670'\n",
"[2024-04-09 13:09:14,729] [WARNING] [engine.py:366:run_accelerator] Failed to run Olive on cpu-cpu.\n",
"Traceback (most recent call last):\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\engine\\engine.py\", line 345, in run_accelerator\n",
" output_footprint = self.run_no_search(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\engine\\engine.py\", line 437, in run_no_search\n",
" should_prune, signal, model_ids = self._run_passes(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\engine\\engine.py\", line 835, in _run_passes\n",
" model_config, model_id = self._run_pass(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\engine\\engine.py\", line 943, in _run_pass\n",
" output_model_config = host.run_pass(p, input_model_config, data_root, output_model_path, pass_search_point)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\systems\\local.py\", line 31, in run_pass\n",
" output_model = the_pass.run(model, data_root, output_model_path, point)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\passes\\olive_pass.py\", line 216, in run\n",
" output_model = self._run_for_config(model, data_root, config, output_model_path)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\olive\\passes\\onnx\\genai_model_exporter.py\", line 100, in _run_for_config\n",
" create_model(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\onnxruntime_genai\\models\\builder.py\", line 1561, in create_model\n",
" config = AutoConfig.from_pretrained(hf_name, **extra_kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py\", line 1100, in from_pretrained\n",
" config_dict, unused_kwargs = PretrainedConfig.get_config_dict(pretrained_model_name_or_path, **kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\configuration_utils.py\", line 634, in get_config_dict\n",
" config_dict, kwargs = cls._get_config_dict(pretrained_model_name_or_path, **kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\configuration_utils.py\", line 689, in _get_config_dict\n",
" resolved_config_file = cached_file(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\utils\\hub.py\", line 385, in cached_file\n",
" resolved_file = hf_hub_download(\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\huggingface_hub\\utils\\_validators.py\", line 118, in _inner_fn\n",
" return fn(*args, **kwargs)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\huggingface_hub\\file_download.py\", line 1384, in hf_hub_download\n",
" os.makedirs(os.path.dirname(pointer_path), exist_ok=True)\n",
" File \"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\os.py\", line 225, in makedirs\n",
" mkdir(name, mode)\n",
"FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\\\Users\\\\nakersha\\\\Develop\\\\code\\\\microsoft\\\\onnxruntime-genai\\\\examples\\\\python\\\\cache\\\\models\\\\4_GenAIModelExporter-1473a6e460df1ddcd4cf088ff0019b1e-fe48ab55cdf4d03b843ede7c3c3be27b-cpu-cpu\\\\output_model\\\\genai_cache_dir\\\\models--microsoft--phi-2\\\\snapshots\\\\b10c3eba545ad279e7208ee3a5d644566f001670'\n",
"[2024-04-09 13:09:14,741] [INFO] [engine.py:288:run] Run history for cpu-cpu:\n",
"[2024-04-09 13:09:14,746] [INFO] [engine.py:578:dump_run_history] Please install tabulate for better run history output\n",
"[2024-04-09 13:09:14,751] [INFO] [engine.py:303:run] No packaging config provided, skip packaging artifacts\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\nakersha\\AppData\\Local\\miniconda3\\envs\\phi2\\lib\\site-packages\\transformers\\models\\auto\\configuration_auto.py:1085: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
"{}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import olive.workflows\n",
"\n",
"olive.workflows.run(\"olive.json\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "569d2618-ff32-466a-8bec-eeb967ee364b",
"metadata": {},
"outputs": [],
"source": [
"import onnxruntime_genai as og\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "add45ace-14be-4ab3-a68c-303aebeea18c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model...\n",
"Model loaded in 41.10 seconds\n"
natke marked this conversation as resolved.
Show resolved Hide resolved
]
natke marked this conversation as resolved.
Show resolved Hide resolved
}
],
"source": [
"print(\"Loading model...\")\n",
"app_started_timestamp = time.time()\n",
"\n",
"model = og.Model(f'example-models\\phi2-int4-cpu')\n",
"model_loaded_timestamp = time.time()\n",
"\n",
"print(\"Model loaded in {:.2f} seconds\".format(model_loaded_timestamp - app_started_timestamp))\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "79513969-40bc-4588-a10c-8c482d224fdb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading tokenizer...\n",
"Tokenizer created\n"
]
}
],
"source": [
"print(\"Loading tokenizer...\")\n",
"tokenizer = og.Tokenizer(model)\n",
"tokenizer_stream = tokenizer.create_stream()\n",
"\n",
"print(\"Tokenizer created\")\n",
"\n",
"\n",
"system_prompt = \"You are a helpful assistant. Answer in one sentence.\"\n",
"text = \"What is Dilithium?\"\n",
"\n",
"input_tokens = tokenizer.encode(system_prompt + text)\n",
"\n",
"prompt_length = len(input_tokens)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "9dcf8cc3-d5d2-42b1-8ad1-76d6629667b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Creating generator ...\n",
"Generator created\n",
"\n",
"A: Dilithium is a fictional substance in the Star Trek universe that is used as a propellant and a power source for spaceships.\n",
"\n",
"Prompt tokens: 17, New tokens: 32, Time to first: 1.32s, New tokens per second: 4.29 tps\n"
]
}
],
"source": [
"started_timestamp = time.time()\n",
"\n",
"print(\"Creating generator ...\")\n",
"params = og.GeneratorParams(model)\n",
"params.set_search_options({\"do_sample\": False, \"max_length\": 2028, \"min_length\": 0, \"top_p\": 0.9, \"top_k\": 40, \"temperature\": 1.0, \"repetition_penalty\": 1.0})\n",
"params.input_ids = input_tokens\n",
"generator = og.Generator(model, params)\n",
"print(\"Generator created\")\n",
"\n",
"first = True\n",
"new_tokens = []\n",
"\n",
"while not generator.is_done():\n",
" generator.compute_logits()\n",
" generator.generate_next_token()\n",
" if first:\n",
" first_token_timestamp = time.time()\n",
" first = False\n",
"\n",
" new_token = generator.get_next_tokens()[0]\n",
" print(tokenizer_stream.decode(new_token), end=\"\")\n",
" new_tokens.append(new_token)\n",
"\n",
"print()\n",
"run_time = time.time() - started_timestamp\n",
"print(f\"Prompt tokens: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(first_token_timestamp - started_timestamp):.2f}s, New tokens per second: {len(new_tokens)/run_time:.2f} tps\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfd4e897-1316-4f80-8fe1-0088341be5b9",
"metadata": {},
"outputs": [],
"source": [
"# Compare with llama.cpp.\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
46 changes: 0 additions & 46 deletions examples/python/model-chat.py

This file was deleted.

Loading
Loading