#13 - gemma cpu or gpu

ObrienlabsDev · Feb 26, 2024 · a78c747 · a78c747
1 parent cdba0c4
commit a78c747
Showing 1 changed file with 10 additions and 3 deletions.
diff --git a/environments/windows/src/google-gemma/gemma-gpu.py b/environments/windows/src/google-gemma/gemma-gpu.py
@@ -1,16 +1,23 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from datetime import datetime
 
-access_token='hf_cfTP...XCQqH'
+#access_token='hf_cfTP...XCQqH'
 
-tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b", token=access_token)
-model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", token=access_token)
+
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b", token=access_token)
+# GPU
+model = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", token=access_token)
+# CPU
+#model = AutoModelForCausalLM.from_pretrained("google/gemma-2b",token=access_token)
 
 input_text = "how is gold made in collapsing neutron stars - specifically what is the ratio created during the beta and r process."
 time_start = datetime.now().strftime("%H:%M:%S")
 print("genarate start: ", datetime.now().strftime("%H:%M:%S"))
 
+# GPU
 input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+# CPU
+#input_ids = tokenizer(input_text, return_tensors="pt")
 outputs = model.generate(**input_ids, 
                          max_new_tokens=10000)
 print(tokenizer.decode(outputs[0]))