Skip to content

Commit

Permalink
Add attention export functionality to experiments (#134)
Browse files Browse the repository at this point in the history
* Add attention export functionality

* Update experiments/attention_export.py

Co-authored-by: Anush <[email protected]>

---------

Co-authored-by: Anush <[email protected]>
  • Loading branch information
NirantK and Anush008 authored Mar 4, 2024
1 parent 1e298a0 commit 74062e8
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
18 changes: 18 additions & 0 deletions experiments/attention_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np
import onnx
import onnxruntime
from optimum.exporters.onnx import main_export
from transformers import AutoTokenizer

model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2"
output_dir = f"models/{model_id.replace('/', '_')}"
model_kwargs = {"output_attentions": True, "return_dict": True}
tokenizer = AutoTokenizer.from_pretrained(model_id)

# export if the output model does not exist
# try:
# sess = onnxruntime.InferenceSession(f"{output_dir}/model.onnx")
# print("Model already exported")
# except FileNotFoundError:
print(f"Exporting model to {output_dir}")
main_export(model_id, output=output_dir, no_post_process=True, model_kwargs=model_kwargs)
30 changes: 30 additions & 0 deletions experiments/try_attention_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import numpy as np
import onnx
import onnxruntime
from optimum.exporters.onnx import main_export
from transformers import AutoTokenizer

model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2"
output_dir = f"models/{model_id.replace('/', '_')}"
model_kwargs = {"output_attentions": True, "return_dict": True}
tokenizer = AutoTokenizer.from_pretrained(model_id)

model_path = f"{output_dir}/model.onnx"
onnx_model = onnx.load(model_path)
ort_session = onnxruntime.InferenceSession(model_path)
text = "This is a test sentence"
tokenizer_output = tokenizer(text, return_tensors="np")
input_ids = tokenizer_output["input_ids"]
attention_mask = tokenizer_output["attention_mask"]
print(attention_mask)
# Prepare the input
input_ids = np.array(input_ids).astype(np.int64) # Replace your_input_ids with actual input data

# Run the ONNX model
outputs = ort_session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})

# Get the attention weights
attentions = outputs[-1]

# Print the attention weights for the first layer and first head
print(attentions[0][0])

0 comments on commit 74062e8

Please sign in to comment.