Skip to content

Commit

Permalink
#13 - add timing - multigpu depends on pcie or nvlink = 2x
Browse files Browse the repository at this point in the history
  • Loading branch information
obriensystems committed Feb 24, 2024
1 parent 1324c15 commit cdba0c4
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions environments/windows/src/google-gemma/gemma-gpu.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
from transformers import AutoTokenizer, AutoModelForCausalLM
from datetime import datetime

access_token='hf_cfTP...XCQqH'

tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b", token=access_token)
model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", token=access_token)

input_text = "how is gold made in collapsing neutron stars - specifically what is the ratio created during the beta and r process."
time_start = datetime.now().strftime("%H:%M:%S")
print("genarate start: ", datetime.now().strftime("%H:%M:%S"))

input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
outputs = model.generate(**input_ids,
max_new_tokens=10000)
print(tokenizer.decode(outputs[0]))

outputs = model.generate(**input_ids, max_new_tokens=10000)
print(tokenizer.decode(outputs[0]))
print("end", datetime.now().strftime("%H:%M:%S"))
time_end = datetime.now().strftime("%H:%M:%S")

0 comments on commit cdba0c4

Please sign in to comment.