From a78c747705828f71cdf8ccf391c9326a32478b06 Mon Sep 17 00:00:00 2001 From: Michael OBrien Date: Mon, 26 Feb 2024 18:01:03 -0500 Subject: [PATCH] #13 - gemma cpu or gpu --- environments/windows/src/google-gemma/gemma-gpu.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/environments/windows/src/google-gemma/gemma-gpu.py b/environments/windows/src/google-gemma/gemma-gpu.py index 9f7d22c..027e5c6 100644 --- a/environments/windows/src/google-gemma/gemma-gpu.py +++ b/environments/windows/src/google-gemma/gemma-gpu.py @@ -1,16 +1,23 @@ from transformers import AutoTokenizer, AutoModelForCausalLM from datetime import datetime -access_token='hf_cfTP...XCQqH' +#access_token='hf_cfTP...XCQqH' -tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b", token=access_token) -model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", token=access_token) + +tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", token=access_token) +# GPU +model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", token=access_token) +# CPU +#model = AutoModelForCausalLM.from_pretrained("google/gemma-2b",token=access_token) input_text = "how is gold made in collapsing neutron stars - specifically what is the ratio created during the beta and r process." time_start = datetime.now().strftime("%H:%M:%S") print("genarate start: ", datetime.now().strftime("%H:%M:%S")) +# GPU input_ids = tokenizer(input_text, return_tensors="pt").to("cuda") +# CPU +#input_ids = tokenizer(input_text, return_tensors="pt") outputs = model.generate(**input_ids, max_new_tokens=10000) print(tokenizer.decode(outputs[0]))