From 67cba76a130b05e40802e8cc59d3213e062b7f1f Mon Sep 17 00:00:00 2001 From: Rajhans Jadhao Date: Tue, 26 Nov 2024 17:22:44 +0530 Subject: [PATCH] using gemma2 2B model for js llm inference --- examples/llm_inference/js/README.md | 2 +- examples/llm_inference/js/index.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llm_inference/js/README.md b/examples/llm_inference/js/README.md index e1ac8447..91731f77 100644 --- a/examples/llm_inference/js/README.md +++ b/examples/llm_inference/js/README.md @@ -13,7 +13,7 @@ This web sample demonstrates how to use the LLM Inference API to run common text Follow the following instructions to run the sample on your device: 1. Make a folder for the task, named as `llm_task`, and copy the [index.html](https://github.com/googlesamples/mediapipe/blob/main/examples/llm_inference/js/index.html) and [index.js](https://github.com/googlesamples/mediapipe/blob/main/examples/llm_inference/js/index.js) files into your `llm_task` folder. -2. Download [Gemma 2B](https://www.kaggle.com/models/google/gemma/frameworks/tfLite/variations/gemma-2b-it-gpu-int4) (TensorFlow Lite 2b-it-gpu-int4 or 2b-it-gpu-int8) or convert an external LLM (Phi-2, Falcon, or StableLM) following the [guide](https://developers.google.com/mediapipe/solutions/genai/llm_inference/web_js#convert-model) (only gpu backend is currently supported), into the `llm_task` folder. +2. Download [Gemma2 2B](https://www.kaggle.com/models/google/gemma-2/tfLite/gemma2-2b-it-gpu-int8) (TensorFlow Lite 2b-it-gpu-int8 or 2b-it-cpu-int8) or convert an external LLM (Phi-2, Falcon, or StableLM) following the [guide](https://developers.google.com/mediapipe/solutions/genai/llm_inference/web_js#convert-model) (only gpu backend is currently supported), into the `llm_task` folder. 3. In your `index.js` file, update [`modelFileName`](https://github.com/googlesamples/mediapipe/blob/main/examples/llm_inference/js/index.js#L23) with your model file's name. 4. Run `python3 -m http.server 8000` under the `llm_task` folder to host the three files (or `python -m SimpleHTTPServer 8000` for older python versions). 5. Open `localhost:8000` in Chrome. Then the button on the webpage will be enabled when the task is ready (~10 seconds). diff --git a/examples/llm_inference/js/index.js b/examples/llm_inference/js/index.js index bf57902b..df1791f6 100644 --- a/examples/llm_inference/js/index.js +++ b/examples/llm_inference/js/index.js @@ -20,7 +20,7 @@ const input = document.getElementById('input'); const output = document.getElementById('output'); const submit = document.getElementById('submit'); -const modelFileName = 'gemma-2b-it-gpu-int4.bin'; /* Update the file name */ +const modelFileName = 'gemma2-2b-it-gpu-int8.bin'; /* Update the file name */ /** * Display newly generated partial results to the output text box.