diff --git a/models/experimental/functional_bert/README.md b/models/experimental/functional_bert/README.md new file mode 100644 index 00000000000..ef978ca2522 --- /dev/null +++ b/models/experimental/functional_bert/README.md @@ -0,0 +1,21 @@ +## functional_bert Demo +## How to Run + +Use `pytest --disable-warnings --input-path="models/experimental/functional_bert/demo/input_data.json" models/experimental/functional_bert/demo/demo.py::test_demo[models.experimental.functional_bert.tt.ttnn_functional_bert-phiyodr/bert-large-finetuned-squad2]` to run the demo. + +If you wish to run the demo for ttnn_optimized_functional_bert, use `pytest --disable-warnings --input-path="models/experimental/functional_bert/demo/input_data.json" models/experimental/functional_bert/demo/demo.py::test_demo[models.experimental.functional_bert.tt.ttnn_optimized_functional_bert-phiyodr/bert-large-finetuned-squad2]` to run the demo. + +If you wish to run the demo with a different input use `pytest --disable-warnings --input-path="" models/experimental/functional_bert/demo/demo.py::test_demo[models.experimental.functional_bert.tt.ttnn_functional_bert-phiyodr/bert-large-finetuned-squad2]`. This file is expected to have exactly 8 inputs. + +Our second demo is designed to run SQuADV2 dataset, run this with `pytest --disable-warnings models/experimental/functional_bert/demo/demo.py::test_demo_squadv2[3-models.experimental.functional_bert.tt.ttnn_optimized_functional_bert-phiyodr/bert-large-finetuned-squad2]`. + +If you wish to run for `n_iterations` samples, use `pytest --disable-warnings models/experimental/functional_bert/demo/demo.py::test_demo_squadv2[-models.experimental.functional_bert.tt.ttnn_optimized_functional_bert-phiyodr/bert-large-finetuned-squad2]` + + +# Inputs +Inputs by default are provided from `input_data.json`. If you wish you to change the inputs, provide a different path to test_demo. + +We do not recommend modifying `input_data.json` file. + +# Details +The entry point to functional_bert model is bert_for_question_answering in `models/experimental/functional_bert/tt/ttnn_functional_bert.py` (`models/experimental/functional_bert/tt/ttnn_optimized_functional_bert.py` for optimized version). The model picks up certain configs and weights from huggingface pretrained model. We have used `phiyodr/bert-large-finetuned-squad2` version from huggingface as our reference. diff --git a/models/experimental/functional_bert/demo/demo.py b/models/experimental/functional_bert/demo/demo.py new file mode 100644 index 00000000000..43f8575811f --- /dev/null +++ b/models/experimental/functional_bert/demo/demo.py @@ -0,0 +1,312 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +import json +import pytest +import torch +from loguru import logger + +import transformers +import ttnn +import tt_lib +from models.utility_functions import ( + disable_compilation_reports, + disable_persistent_kernel_cache, + profiler, +) +from models.experimental.functional_bert.tt import ttnn_functional_bert +from models.experimental.functional_bert.tt import ttnn_optimized_functional_bert + +from models.datasets.dataset_squadv2 import squadv2_1K_samples_input, squadv2_answer_decode_batch +from ttnn.model_preprocessing import ( + preprocess_model_parameters, +) + +from ttnn.model_preprocessing import * +from transformers import BertForQuestionAnswering, BertTokenizer, pipeline + +import evaluate + + +def load_inputs(input_path, batch): + with open(input_path) as f: + input_data = json.load(f) + assert len(input_data) >= batch, f"Input data needs to have at least {batch} (batch size) entries." + + context = [] + question = [] + for i in range(batch): + context.append(input_data[i]["context"]) + question.append(input_data[i]["question"]) + + return context, question + + +def run_bert_question_and_answering_inference( + device, + use_program_cache, + model_name, + batch_size, + sequence_size, + functional_bert, + model_location_generator, + input_path, +): + disable_persistent_kernel_cache() + + model = str(model_location_generator(model_name, model_subdir="Bert")) + hugging_face_reference_model = BertForQuestionAnswering.from_pretrained(model, torchscript=False) + hugging_face_reference_model.eval() + + # set up tokenizer + tokenizer_name = str(model_location_generator(model_name, model_subdir="Bert")) + tokenizer = BertTokenizer.from_pretrained(tokenizer_name) + config = hugging_face_reference_model.config + nlp = pipeline("question-answering", model=hugging_face_reference_model, tokenizer=tokenizer) + + if functional_bert == ttnn_functional_bert: + tt_model_name = f"ttnn_{model_name}" + elif functional_bert == ttnn_optimized_functional_bert: + tt_model_name = f"ttnn_{model_name}_optimized" + else: + raise ValueError(f"Unknown functional_bert: {functional_bert}") + + profiler.start(f"preprocessing_parameter") + parameters = preprocess_model_parameters( + tt_model_name, + initialize_model=lambda: transformers.BertForQuestionAnswering.from_pretrained( + model_name, torchscript=False + ).eval(), + custom_preprocessor=functional_bert.custom_preprocessor, + device=device, + ) + profiler.end(f"preprocessing_parameter") + + context, question = load_inputs(input_path, batch_size) + + preprocess_params, _, postprocess_params = nlp._sanitize_parameters() + preprocess_params["max_seq_len"] = sequence_size + inputs = nlp._args_parser({"context": context, "question": question}) + preprocessed_inputs = [] + for i in range(batch_size): + model_input = next(nlp.preprocess(inputs[0][i], **preprocess_params)) + single_input = { + "example": model_input["example"], + "inputs": model_input, + } + preprocessed_inputs.append(single_input) + + bert_input = tokenizer.batch_encode_plus( + zip(question, context), + max_length=sequence_size, + padding="max_length", + truncation=True, + return_attention_mask=True, + return_token_type_ids=True, + return_tensors="pt", + ) + profiler.start(f"preprocessing_input") + ttnn_bert_inputs = functional_bert.preprocess_inputs( + bert_input["input_ids"], + bert_input["token_type_ids"], + torch.zeros(1, sequence_size) if functional_bert == ttnn_optimized_functional_bert else None, + device=device, + ) + profiler.end(f"preprocessing_input") + + profiler.start(f"inference_time") + tt_output = functional_bert.bert_for_question_answering( + config, + *ttnn_bert_inputs, + parameters=parameters, + ) + profiler.end(f"inference_time") + + tt_output = ttnn.to_torch(ttnn.from_device(tt_output)).reshape(batch_size, 1, sequence_size, -1).to(torch.float32) + + tt_start_logits = tt_output[..., :, 0].squeeze(1) + tt_end_logits = tt_output[..., :, 1].squeeze(1) + + model_answers = {} + profiler.start("post_processing_output_to_string") + for i in range(batch_size): + tt_res = { + "start": tt_start_logits[i], + "end": tt_end_logits[i], + "example": preprocessed_inputs[i]["example"], + **preprocessed_inputs[i]["inputs"], + } + + tt_answer = nlp.postprocess([tt_res], **postprocess_params) + + logger.info(f"answer: {tt_answer['answer']}\n") + model_answers[i] = tt_answer["answer"] + + profiler.end("post_processing_output_to_string") + + measurements = { + "preprocessing_parameter": profiler.get("preprocessing_parameter"), + "preprocessing_input": profiler.get("preprocessing_input"), + "inference_time": profiler.get("inference_time"), + "post_processing": profiler.get("post_processing_output_to_string"), + } + logger.info(f"preprocessing_parameter: {measurements['preprocessing_parameter']} s") + logger.info(f"preprocessing_input: {measurements['preprocessing_input']} s") + logger.info(f"inference_time: {measurements['inference_time']} s") + logger.info(f"post_processing : {measurements['post_processing']} s") + + return measurements + + +def run_bert_question_and_answering_inference_squad_v2( + device, + use_program_cache, + model_name, + batch_size, + sequence_size, + functional_bert, + model_location_generator, + n_iterations, +): + disable_persistent_kernel_cache() + + model = str(model_location_generator(model_name, model_subdir="Bert")) + hugging_face_reference_model = BertForQuestionAnswering.from_pretrained(model, torchscript=False) + hugging_face_reference_model.eval() + + # set up tokenizer + tokenizer_name = str(model_location_generator(model_name, model_subdir="Bert")) + tokenizer = BertTokenizer.from_pretrained(tokenizer_name) + config = hugging_face_reference_model.config + + if functional_bert == ttnn_functional_bert: + tt_model_name = f"ttnn_{model_name}" + elif functional_bert == ttnn_optimized_functional_bert: + tt_model_name = f"ttnn_{model_name}_optimized" + else: + raise ValueError(f"Unknown functional_bert: {functional_bert}") + + parameters = preprocess_model_parameters( + tt_model_name, + initialize_model=lambda: transformers.BertForQuestionAnswering.from_pretrained( + model_name, torchscript=False + ).eval(), + custom_preprocessor=functional_bert.custom_preprocessor, + device=device, + ) + + nlp = pipeline("question-answering", model=hugging_face_reference_model, tokenizer=tokenizer) + + attention_mask = True + token_type_ids = True + inputs_squadv2 = squadv2_1K_samples_input(tokenizer, sequence_size, attention_mask, token_type_ids, batch_size) + squad_metric = evaluate.load("squad_v2") + + with torch.no_grad(): + pred_labels = [] + cpu_pred_labels = [] + true_labels = [] + i = 0 + for batch in inputs_squadv2: + if i < n_iterations: + batch_data = batch[0] + curr_batch_size = batch_data["input_ids"].shape[0] + ttnn_bert_inputs = functional_bert.preprocess_inputs( + batch_data["input_ids"], + batch_data["token_type_ids"], + torch.zeros(1, sequence_size) if functional_bert == ttnn_optimized_functional_bert else None, + device=device, + ) + + tt_output = functional_bert.bert_for_question_answering( + config, + *ttnn_bert_inputs, + parameters=parameters, + ) + tt_output = ( + ttnn.to_torch(ttnn.from_device(tt_output)) + .reshape(batch_size, 1, sequence_size, -1) + .to(torch.float32) + ) + cpu_output = hugging_face_reference_model(**batch_data) + references = batch[1] + question = batch[2] + context = batch[3] + + cpu_predictions, tt_predictions = squadv2_answer_decode_batch( + hugging_face_reference_model, + tokenizer, + nlp, + references, + cpu_output, + tt_output, + curr_batch_size, + question, + context, + ) + pred_labels.extend(tt_predictions) + cpu_pred_labels.extend(cpu_predictions) + true_labels.extend(references) + + del tt_output + i += 1 + eval_score = squad_metric.compute(predictions=pred_labels, references=true_labels) + cpu_eval_score = squad_metric.compute(predictions=cpu_pred_labels, references=true_labels) + logger.info(f"\tTT_Eval: exact: {eval_score['exact']} -- F1: {eval_score['f1']}") + logger.info(f"\tCPU_Eval: exact: {cpu_eval_score['exact']} -- F1: {cpu_eval_score['f1']}") + + +@pytest.mark.parametrize("model_name", ["phiyodr/bert-large-finetuned-squad2"]) +@pytest.mark.parametrize("functional_bert", [ttnn_functional_bert, ttnn_optimized_functional_bert]) +def test_demo( + input_path, + model_name, + functional_bert, + model_location_generator, + device, + use_program_cache, +): + disable_persistent_kernel_cache() + disable_compilation_reports() + + tt_lib.profiler.set_profiler_location(f"tt_metal/tools/profiler/logs/functional_bert") + return run_bert_question_and_answering_inference( + device=device, + use_program_cache=use_program_cache, + model_name=model_name, + batch_size=8, + sequence_size=384, + functional_bert=functional_bert, + model_location_generator=model_location_generator, + input_path=input_path, + ) + + +@pytest.mark.parametrize("model_name", ["phiyodr/bert-large-finetuned-squad2"]) +@pytest.mark.parametrize("functional_bert", [ttnn_functional_bert, ttnn_optimized_functional_bert]) +@pytest.mark.parametrize( + "n_iterations", + ((3),), +) +def test_demo_squadv2( + model_name, + functional_bert, + n_iterations, + model_location_generator, + device, + use_program_cache, +): + disable_persistent_kernel_cache() + disable_compilation_reports() + + return run_bert_question_and_answering_inference_squad_v2( + device=device, + use_program_cache=use_program_cache, + model_name=model_name, + batch_size=8, + sequence_size=384, + functional_bert=functional_bert, + model_location_generator=model_location_generator, + n_iterations=n_iterations, + ) diff --git a/models/experimental/functional_bert/demo/input_data.json b/models/experimental/functional_bert/demo/input_data.json new file mode 100644 index 00000000000..950b8d36323 --- /dev/null +++ b/models/experimental/functional_bert/demo/input_data.json @@ -0,0 +1,50 @@ +[ + { + "context" : "Johann Joachim Winckelmann was a German art historian and archaeologist. He was a pioneering Hellenist who first articulated the difference between Greek, Greco-Roman and Roman art. The prophet and founding hero of modern archaeology, Winckelmann was one of the founders of scientific archaeology and first applied the categories of style on a large, systematic basis to the history of art.", + "question" : "What discipline did Winkelmann create?" + }, + { + "context" : "The Norman dynasty had a major political, cultural and military impact on medieval Europe and even the Near East. The Normans were famed for their martial spirit and eventually for their Christian piety, becoming exponents of the Catholic orthodoxy into which they assimilated. They adopted the Gallo-Romance language of the Frankish land they settled, their dialect becoming known as Norman, Normaund or Norman French, an important literary language. The Duchy of Normandy, which they formed by treaty with the French crown, was a great fief of medieval France, and under Richard I of Normandy was forged into a cohesive and formidable principality in feudal tenure. The Normans are noted both for their culture, such as their unique Romanesque architecture and musical traditions, and for their significant military accomplishments and innovations. Norman adventurers founded the Kingdom of Sicily under Roger II after conquering southern Italy on the Saracens and Byzantines, and an expedition on behalf of their duke, William the Conqueror, led to the Norman conquest of England at the Battle of Hastings in 1066. Norman cultural and military influence spread from these new European centres to the Crusader states of the Near East, where their prince Bohemond I founded the Principality of Antioch in the Levant, to Scotland and Wales in Great Britain, to Ireland, and to the coasts of north Africa and the Canary Islands.", + "question" : "Who ruled the duchy of Normandy" + }, + { + "context" : "In many countries, there is a Gender pay gap in favor of males in the labor market. Several factors other than discrimination may contribute to this gap. On average, women are more likely than men to consider factors other than pay when looking for work, and may be less willing to travel or relocate. Thomas Sowell, in his book Knowledge and Decisions, claims that this difference is due to women not taking jobs due to marriage or pregnancy, but income studies show that that does not explain the entire difference. A U.S. Census's report stated that in US once other factors are accounted for there is still a difference in earnings between women and men. The income gap in other countries ranges from 53% in Botswana to -40% in Bahrain.", + "question" : "Who does a gender pay gap tend to favor?" + }, + { + "context" : "Most of the Huguenot congregations (or individuals) in North America eventually affiliated with other Protestant denominations with more numerous members. The Huguenots adapted quickly and often married outside their immediate French communities, which led to their assimilation. Their descendants in many families continued to use French first names and surnames for their children well into the nineteenth century. Assimilated, the French made numerous contributions to United States economic life, especially as merchants and artisans in the late Colonial and early Federal periods. For example, E.I. du Pont, a former student of Lavoisier, established the Eleutherian gunpowder mills.", + "question" : "How were Huguenot settlers assimilated into North American society at large?" + }, + { + "context" : "In the laboratory, biostratigraphers analyze rock samples from outcrop and drill cores for the fossils found in them. These fossils help scientists to date the core and to understand the depositional environment in which the rock units formed. Geochronologists precisely date rocks within the stratigraphic section in order to provide better absolute bounds on the timing and rates of deposition. Magnetic stratigraphers look for signs of magnetic reversals in igneous rock units within the drill cores. Other scientists perform stable isotope studies on the rocks to gain information about past climate.", + "question" : "Who analyzes rock samples from drill cores in the lab?" + }, + { + "context" : "Neutrophils and macrophages are phagocytes that travel throughout the body in pursuit of invading pathogens. Neutrophils are normally found in the bloodstream and are the most abundant type of phagocyte, normally representing 50% to 60% of the total circulating leukocytes. During the acute phase of inflammation, particularly as a result of bacterial infection, neutrophils migrate toward the site of inflammation in a process called chemotaxis, and are usually the first cells to arrive at the scene of infection. Macrophages are versatile cells that reside within tissues and produce a wide array of chemicals including enzymes, complement proteins, and regulatory factors such as interleukin 1. Macrophages also act as scavengers, ridding the body of worn-out cells and other debris, and as antigen-presenting cells that activate the adaptive immune system.", + "question" : "What is the process in which neutrophils move towards the site of inflammation called?" + }, + { + "context" : "In Afghanistan, the mujahideen's victory against the Soviet Union in the 1980s did not lead to justice and prosperity, due to a vicious and destructive civil war between political and tribal warlords, making Afghanistan one of the poorest countries on earth. In 1992, the Democratic Republic of Afghanistan ruled by communist forces collapsed, and democratic Islamist elements of mujahdeen founded the Islamic State of Afghanistan. In 1996, a more conservative and anti-democratic Islamist movement known as the Taliban rose to power, defeated most of the warlords and took over roughly 80% of Afghanistan.", + "question" : "When did the Democratic Republic of Afghanistan collapse?" + }, + { + "context" : "The largest single sensory feature is the aboral organ (at the opposite end from the mouth). Its main component is a statocyst, a balance sensor consisting of a statolith, a solid particle supported on four bundles of cilia, called \"balancers\", that sense its orientation. The statocyst is protected by a transparent dome made of long, immobile cilia. A ctenophore does not automatically try to keep the statolith resting equally on all the balancers. Instead its response is determined by the animal's \"mood\", in other words the overall state of the nervous system. For example, if a ctenophore with trailing tentacles captures prey, it will often put some comb rows into reverse, spinning the mouth towards the prey.", + "question" : "What is the main component of the aboral organ?" + }, + { + "context": "Mark Rothko was a Latvian-born American abstract painter. He is best known for his color field paintings that depicted irregular and painterly rectangular regions of color, which he produced from 1949 to 1970. Although Rothko did not personally subscribe to any one school, he is associated with the American Abstract Expressionist movement of modern art. Originally emigrating to Portland, Oregon, from Russian Empire (Latvia) with his family, Rothko later moved to New York City where his youthful period of artistic production dealt primarily with urban scenery.", + "question": "what is Rothko best known for?" + }, + { + "context": "Malignant narcissism is a psychological syndrome that could include aspects of narcissistic personality disorder (NPD) alongside a mix of antisocial, paranoid and sadistic personality disorder traits. The importance of malignant narcissism and of projection as a defense mechanism has been confirmed in paranoia, as well as the patient's vulnerability to malignant narcissistic regression. A person with malignant narcissism exhibits paranoia in addition to the symptoms of a Narcissistic Personality Disorder. Because a malignant narcissist's personality cannot tolerate any criticism, being mocked typically causes paranoia.", + "question": "What symptoms a malignant narcissist might exhibit in addition to the symptoms of a NPD patient?" + }, + { + "context": "The 14 July Revolution, also known as the 1958 Iraqi military coup, was a coup d'état that took place on 14 July 1958 in Iraq which resulted in the toppling of King Faisal II and the overthrow of the Hashemite-led Kingdom of Iraq. The Iraqi Republic established in its wake ended the Hashemite Arab Federation between Iraq and Jordan that had been established just six months earlier. In July 1958, units of the Royal Iraqi Army were dispatched to Jordan in support of King Hussein. A group of Iraqi Free Officers, led by Brigadier Abd al-Karim Qasim and Colonel Abdul Salam Arif, took advantage of the opportunity and instead marched on Baghdad. On 14 July, revolutionary forces seized control of the capital and proclaimed a new republic, headed by a Revolutionary Council.", + "question": "When was the Hashemite Arab Federation formed?" + }, + { + "context": "The Tasmanian devil is a carnivorous marsupial of the family Dasyuridae. It was formerly present across mainland Australia, but became extinct there around 3,500 years ago. The size of a small dog, the Tasmanian devil became the largest carnivorous marsupial in the world following the extinction of the thylacine in 1936. It is related to quolls, and distantly related to the thylacine. It is characterised by its stocky and muscular build, black fur, pungent odour, extremely loud and disturbing screech, keen sense of smell, and ferocity when feeding. The Tasmanian devil's large head and neck allow it to generate among the strongest bites per unit body mass of any extant predatory land mammal. It hunts prey and scavenges on carrion.", + "question": "What allows Tasmanian devil to generate strong bites?" + } +]