#13 - add timing - multigpu depends on pcie or nvlink = 2x

ObrienlabsDev · Feb 24, 2024 · cdba0c4 · cdba0c4
1 parent 1324c15
commit cdba0c4
Showing 1 changed file with 9 additions and 2 deletions.
diff --git a/environments/windows/src/google-gemma/gemma-gpu.py b/environments/windows/src/google-gemma/gemma-gpu.py
@@ -1,12 +1,19 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from datetime import datetime
 
 access_token='hf_cfTP...XCQqH'
 
 tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b", token=access_token)
 model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", token=access_token)
 
 input_text = "how is gold made in collapsing neutron stars - specifically what is the ratio created during the beta and r process."
+time_start = datetime.now().strftime("%H:%M:%S")
+print("genarate start: ", datetime.now().strftime("%H:%M:%S"))
+
 input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
+outputs = model.generate(**input_ids, 
+                         max_new_tokens=10000)
+print(tokenizer.decode(outputs[0]))
 
-outputs = model.generate(**input_ids, max_new_tokens=10000)
-print(tokenizer.decode(outputs[0]))
+print("end", datetime.now().strftime("%H:%M:%S"))
+time_end = datetime.now().strftime("%H:%M:%S")