diff --git a/experiments/attention_export.py b/experiments/attention_export.py new file mode 100644 index 00000000..36a10698 --- /dev/null +++ b/experiments/attention_export.py @@ -0,0 +1,18 @@ +import numpy as np +import onnx +import onnxruntime +from optimum.exporters.onnx import main_export +from transformers import AutoTokenizer + +model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2" +output_dir = f"models/{model_id.replace('/', '_')}" +model_kwargs = {"output_attentions": True, "return_dict": True} +tokenizer = AutoTokenizer.from_pretrained(model_id) + +# export if the output model does not exist +# try: +# sess = onnxruntime.InferenceSession(f"{output_dir}/model.onnx") +# print("Model already exported") +# except FileNotFoundError: +print(f"Exporting model to {output_dir}") +main_export(model_id, output=output_dir, no_post_process=True, model_kwargs=model_kwargs) diff --git a/experiments/try_attention_export.py b/experiments/try_attention_export.py new file mode 100644 index 00000000..58f78862 --- /dev/null +++ b/experiments/try_attention_export.py @@ -0,0 +1,30 @@ +import numpy as np +import onnx +import onnxruntime +from optimum.exporters.onnx import main_export +from transformers import AutoTokenizer + +model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2" +output_dir = f"models/{model_id.replace('/', '_')}" +model_kwargs = {"output_attentions": True, "return_dict": True} +tokenizer = AutoTokenizer.from_pretrained(model_id) + +model_path = f"{output_dir}/model.onnx" +onnx_model = onnx.load(model_path) +ort_session = onnxruntime.InferenceSession(model_path) +text = "This is a test sentence" +tokenizer_output = tokenizer(text, return_tensors="np") +input_ids = tokenizer_output["input_ids"] +attention_mask = tokenizer_output["attention_mask"] +print(attention_mask) +# Prepare the input +input_ids = np.array(input_ids).astype(np.int64) # Replace your_input_ids with actual input data + +# Run the ONNX model +outputs = ort_session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask}) + +# Get the attention weights +attentions = outputs[-1] + +# Print the attention weights for the first layer and first head +print(attentions[0][0]) \ No newline at end of file