-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3fb7172
commit 1328a34
Showing
1 changed file
with
52 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# Michael O'Brien 20241204 | ||
import os, torch | ||
# default dual GPU - either PCIe bus or NVidia bus - slowdowns | ||
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1" | ||
# specific GPU - model must fit entierely in memory RTX-3500 ada = 12G, A4000=16G, A4500=20, A6000=48, 4000 ada = 20, 5000 ada = 32, 6000 ada = 48 | ||
#os.environ["CUDA_VISIBLE_DEVICES"] = "0" | ||
|
||
#from transformers import AutoTokenizer, AutoModelForCausalLM | ||
from transformers import pipeline | ||
from datetime import datetime | ||
|
||
#access_token='hf....CQqH' | ||
|
||
#https://huggingface.co/google/gemma-2-2b | ||
#11g | ||
amodel = "google/gemma-2-2b"#7b" | ||
#https://huggingface.co/google/gemma-2-9b | ||
#36g | ||
#amodel = "google/gemma-2-9b"#7b" | ||
#https://huggingface.co/google/gemma-2-27b | ||
#amodel = "google/gemma-2-27b"#7b" | ||
|
||
#tokenizer = AutoTokenizer.from_pretrained(model, token=access_token) | ||
# GPU | ||
pipe = pipeline( | ||
"text-generation", | ||
model=amodel, | ||
device="cuda", # replace with "mps" to run on a Mac device | ||
) | ||
#model = AutoModelForCausalLM.from_pretrained(amodel, device_map="auto", | ||
# torch_dtype=torch.float16, | ||
# token=access_token) | ||
# CPU | ||
#model = AutoModelForCausalLM.from_pretrained(amodel,token=access_token) | ||
|
||
input_text = "how is gold made in collapsing neutron stars - specifically what is the ratio created during the beta and r process." | ||
time_start = datetime.now().strftime("%H:%M:%S") | ||
print("genarate start: ", datetime.now().strftime("%H:%M:%S")) | ||
|
||
# GPU | ||
#input_ids = tokenizer(input_text, return_tensors="pt").to("cuda") | ||
# CPU | ||
#input_ids = tokenizer(input_text, return_tensors="pt") | ||
#outputs = model.generate(**input_ids, max_new_tokens=10000) | ||
#print(tokenizer.decode(outputs[0])) | ||
|
||
outputs = pipe(input_text, max_new_tokens=256) | ||
response = outputs[0]["generated_text"] | ||
print(response) | ||
|
||
print("end", datetime.now().strftime("%H:%M:%S")) | ||
time_end = datetime.now().strftime("%H:%M:%S") |