data/terms.json

{
  "terms": [
    {
      "name": "Descriptive analytics",
      "description": "The analysis of data from the past that can help answer “What happened?” or “What is happening?”.<br><br>For example, an analysis of A&E attendance may show a decrease in visitor numbers over the last 3 months.",
      "termCode": "descriptive-analytics",
      "related": [
        "predictive-analytics",
        "prescriptive-analytics",
        "statistics"
      ]
    },
    {
      "name": "Predictive analytics",
      "description": "The analysis of data where the goal is to create predictions of the future based on the past that can help answer “What will happen?”<br><br>For example, using historical admissions data, a Trust may be able to predict or forecast the number of admissions in the next 24 hours.",
      "termCode": "predictive-analytics",
      "related": [
        "descriptive-analytics",
        "prescriptive-analytics",
        "statistics",
        "supervised",
        "machine-learning"
      ]
    },
    {
      "name": "Prescriptive analytics",
      "description": "The use of predictive analytics to recommend or automatically action an activity.<br><br>For example, using a prediction of future admissions, a Trust could recommend or automatically assign a bed to an incoming patient.",
      "termCode": "prescriptive-analytics",
      "related": [
        "descriptive-analytics",
        "predictive-analytics",
        "machine-learning",
        "ai"
      ]
    },
    {
      "name": "Statistics",
      "description": "The process of collecting, classifying and analysing data. Many statistical techniques are used in analytics and modern day machine learning.<br><br>For example, calculating the “average” number of patients arriving at a Trust to inform bed managers of capacity requirements or defining how to optimise the performance of an AI model.",
      "termCode": "statistics",
      "related": [
        "descriptive-analytics",
        "predictive-analytics",
        "prescriptive-analytics",
        "machine-learning",
        "unsupervised",
        "supervised"
      ]
    },
    {
      "name": "Artificial Intelligence",
      "acronym": "AI",
      "description": "The use of digital technology to create systems capable of performing tasks commonly thought to require human intelligence.<br><br>For example, an AI system may analyse radiography images and detect tumours in cancer patients.<br><br>In 2021, the UK government announced its <a target='_blank' href='https://www.gov.uk/government/publications/national-ai-strategy'>National AI Strategy</a>.",
      "termCode": "ai",
      "related": [
        "machine-learning",
        "supervised",
        "unsupervised",
        "reinforcement",
        "computer-vision",
        "fairness",
        "bias",
        "explainability"
      ]
    },
    {
      "name": "Algorithmic Impact Assessment",
      "acronym": "AIA",
      "description": "Algorithmic impact assessments (often abbreviated as ‘AIAs’) are tools that set out frameworks and processes for assessing possible societal impacts, both beneficial or adverse, of AI systems before the systems are in use (with ongoing monitoring often advised). An example AIA can be found <a target='_blank' href='https://www.adalovelaceinstitute.org/report/algorithmic-impact-assessment-case-study-healthcare/'>here<a>.",
      "termCode": "aia",
      "related": [
        "fairness",
        "bias"
      ]
    },
    {
      "name": "Algorithm",
      "description": "A set of instructions that can be followed by a human or computer.<br><br>For example, the NHS <a target='_blank' href='https://www.england.nhs.uk/wp-content/uploads/2014/06/psa-aki-alg.pdf'>algorithm for detecting Acute Kidney Injury</a> is a set of instructions that can be repeated for multiple patients.<br><br>In AI, machine learning algorithms use data to make predictions or recommendations which can inform decision making",
      "termCode": "algorithm",
      "related": [
        "statistics",
        "ai",
        "machine-learning",
        "gradient-descent"
      ]
    },
    {
      "name": "Data",
      "description": "Information stored in a digital way.<br><br>For example, this can be information on your physical state such as heart rate, blood pressure, or notes on your recent visit to your primary care physician.<br><br>Imaging data is a common type of healthcare data, which includes data generated from X-ray machines, CT scanners, MRI scanners, OCT systems etc.",
      "termCode": "data",
      "related": [
        "metadata",
        "linked-data",
        "synthetic-data",
        "structured",
        "unstructured",
        "feature",
        "data-cleaning",
        "database",
        "tre",
        "sql",
        "binary",
        "sequential-data",
        "model"
      ]
    },
    {
      "name": "Model",
      "description": "A model is a simplified representation of something in the real world. In AI, models are the result of an algorithm and data.<br><br>Models are by definition attempts to define real world phenomena, and can be very helpful when trying to assist in decision making.<br><br>For example, an AI model of bed management may use data on past admissions to predict how many patients will arrive at a point in time, and therefore what the best beds are for these patients taking into account future arrivals.",
      "termCode": "model",
      "related": [
        "ai",
        "machine-learning",
        "feature",
        "algorithm",
        "data",
        "mlops"
      ]
    },
    {
      "name": "Machine learning",
      "description": "An approach to building models using (normally large amounts of) data. This differs from traditional approaches to building models by defining rules by hand.<br><br>For example, a self-driving car will contain many different machine learning algorithms that have been built using data generated by expert drivers.<br><br>AI used in medical imaging leverages machine learning on images with known conditions.<br><br>In 2021, the Medicines & Healthcare products Regulatory Agency (MHRA) published guidance on <a target='blank' href='https://www.gov.uk/government/publications/good-machine-learning-practice-for-medical-device-development-guiding-principles/good-machine-learning-practice-for-medical-device-development-guiding-principles'>Good Machine Learning Practice for Medical Device Development: Guiding Principles</a>.",
      "termCode": "machine-learning",
      "related": [
        "ai",
        "supervised",
        "unsupervised",
        "reinforcement",
        "algorithm",
        "feature",
        "label",
        "gradient-descent",
        "statistics",
        "federated-learning",
        "mlops",
        "data",
        "model"
      ]
    },
    {
      "name": "Unsupervised machine learning",
      "description": "A type of machine learning where you do not know the outcome or definition of your data, and are looking for patterns. This includes clustering techniques such as k-means and principal component analysis (PCA).<br><br>For example, unsupervised machine learning can help identify different groups of hospital patients who use hospital services in different ways.",
      "termCode": "unsupervised",
      "related": [
        "clustering",
        "supervised",
        "reinforcement",
        "statistics",
        "machine-learning",
        "ai",
        "descriptive-analytics"
      ]
    },
    {
      "name": "Supervised machine learning",
      "description": "A type of machine learning where you know the outcome of the data you are looking to model. This includes regression and classification techniques.<br><br>For example, an AI to detect Covid-19 in CT scans can use supervised machine learning with a data set of CT scans where patients have a known outcome (e.g. Covid-19 or not), to predict the likelihood of a new patient having Covid-19 from their CT scan (which the model has not seen before).",
      "termCode": "supervised",
      "related": [
        "unsupervised",
        "reinforcement",
        "statistics",
        "label",
        "feature",
        "machine-learning",
        "ai",
        "predictive-analytics",
        "prescriptive-analytics"
      ]
    },
    {
      "name": "Reinforcement learning",
      "description": "A type of machine learning where you define an environment and a goal, and iteratively attempt at maximising the goal by reinforcing actions that increase the goal.<br><br>For example, Deepmind successfully used reinforcement learning to master the board game Go by defining the game parameters, the goal and suggesting the best moves to play against real human players.",
      "termCode": "reinforcement",
      "related": [
        "ai",
        "machine-learning",
        "supervised",
        "unsupervised"
      ]
    },
    {
      "name": "Semi-supervised machine learning",
      "description": "An approach to machine learning that combines known data (supervised machine learning) with unknown data to improve its ability to act on data it has not seen before.<br><br>This approach is being used to improve models where there is not a lot of data available with a known outcome, for example where you may have a rare disease with less data available than in more common diseases.",
      "termCode": "semi-supervised",
      "related": [
        "supervised",
        "unsupervised",
        "self-supervised",
        "machine-learning"
      ]
    },
    {
      "name": "Self-supervised machine learning",
      "description": "An approach to machine learning where you do not have the outcomes of your data, but use the structure of your data to help determine what these outcomes are.<br><br>For example, recent developments in understanding text and audio can use the existing structure to predict what words should come next.",
      "termCode": "self-supervised",
      "related": [
        "semi-supervised",
        "unsupervised",
        "supervised",
        "machine-learning"
      ]
    },
    {
      "name": "General artificial intelligence",
      "acronym": "General AI",
      "description": "A theoretical concept of AI that is able to generalise, or adapt, to different applications, much like a human or animal.<br><br>For example, if a human learns to drive a car, they could without too much trouble drive other vehicles.",
      "termCode": "general-ai",
      "related": [
        "ai",
        "narrow-ai"
      ]
    },
    {
      "name": "Narrow artificial intelligence",
      "acronym": "Narrow AI",
      "description": "AI focussed on solving a specific problem.<br><br>For example, an AI built to identify cancerous tumours in breast scans, would not automatically be able to detect tumours in other parts of the body without significant rework.",
      "termCode": "narrow-ai",
      "related": [
        "ai",
        "general-ai",
        "supervised",
        "reinforcement",
        "machine-learning"
      ]
    },
    {
      "name": "Machine learning operations",
      "acronym": "MLOps",
      "description": "The process of safely deploying, monitoring and updating machine learning models in production, or real-world, environments.<br><br>Because machine learning models are built on data, and data can change, it is important to build robustness into the system so they can adapt to a changing environment without losing performance.",
      "termCode": "mlops",
      "related": [
        "machine-learning",
        "model",
        "deployment-platform"
      ]
    },
    {
      "name": "Cloud",
      "description": "An approach to computing where resources are no longer on premise, but hosted in different locations managed by a third-party.<br><Br>For example, a hospital may store patient records on computers physically housed within the hospital, which require maintenance. By moving to the cloud (a marketplace of cloud vendors is established), the hospital can outsource the maintenance and expenditure of physically owning the computers, in return for renting space.<br><br>In 2013 the UK government released its “<a target='_blank' href='https://www.gov.uk/guidance/government-cloud-first-policy'>Cloud First policy</a>”.",
      "termCode": "cloud",
      "related": [
        "on-prem",
        "database"
      ]
    },
    {
      "name": "On-premise",
      "description": "An approach to computing where you physically locate your equipment on your premise. For example, a hospital which stores your records within the hospital buildings or nearby in hospital-owned buildings.",
      "termCode": "on-prem",
      "related": [
        "database",
        "cloud"
      ]
    },
    {
      "name": "Hybrid cloud",
      "description": "An approach to working with both cloud and on-premise computing resources, where some resources can be on-premise (e.g. data), and other resources can be in the Cloud (e.g. applications that use the data). With a secure connection between the premise and the Cloud you can implement a hybrid cloud.",
      "termCode": "hybrid",
      "related": [
        "cloud",
        "on-prem",
        "database"
      ]
    },
    {
      "name": "Python",
      "description": "A general purpose computer programming language that has become very popular for data science, machine learning and AI. It is free to learn and has a large community of developers who contribute additional features.<br><br>There is a thriving <a target='_blank' href='https://nhs-pycom.net/'>NHS Python Community for Healthcare</a>.",
      "termCode": "python",
      "related": [
        "R"
      ]
    },
    {
      "name": "R",
      "description": "A statistical computer programming language that is commonly used for data analysis and data science. R is free to learn and has a large community of developers who contribute additional features.<br><br>There is a thriving <a target='_blank' href='https://nhsrcommunity.com/'>NHS-R Community</a>.",
      "termCode": "R",
      "related": [
        "python"
      ]
    },
    {
      "name": "Application programming interface",
      "acronym": "API",
      "description": "A standardised way to share data. An API defines the mechanisms to receive and send data, which is agnostic to how the underlying data is stored.<br><br>For example, NHS Digital has a <a target='_blank' href='https://digital.nhs.uk/developer/api-catalogue'>number of APIs</a> available to help build modern healthcare technology.",
      "termCode": "api",
      "related": [
        "database",
        "cloud",
        "interoperability",
        "standard"
      ]
    },
    {
      "name": "Standard",
      "description": "An agreed set of definitions, guidelines and sometimes technical approaches for a specific area. Formal Standards may be mandated by the Government, whereas de facto standards are created and used by communities working in that space.<br><br>For example, ISO 13485 is a UK <a target='_blank' href='https://www.gov.uk/guidance/designated-standards#healthcare-engineering'>Designated Standard</a> for quality management systems for medical device, it can be used to <a target='_blank' href='https://www.gov.uk/guidance/medical-devices-conformity-assessment-and-the-ukca-mark#compliance-with-designated-standards'>demonstrate conformance</a> with parts of the medical device regulations.",
      "termCode": "standard",
      "related": [
        "interoperability",
        "rap"
      ]
    },
    {
      "name": "Pseudonymisation",
      "description": "A technique that separates data from direct identifiers (for example name, surname, NHS number) and replaces them with a pseudonym (for example, a reference number), so that identifying an individual from that data is not possible without additional information. The organisation that conducted pseudonymisation will be able to re-identify individuals if required.",
      "termCode": "pseudonymisation",
      "related": [
        "data-protection",
        "anonymisation",
        "dpia"
      ]
    },
    {
      "name": "Anonymisation",
      "description": "The process of removing all identifiable information from data in a way which makes it theoretically infeasible to identify an individual.  Anonymised data is not considered as personal data under the GDPR. This means it is not subject to the same restrictions as personal data.<br><br>For example, by removing direct identifiers such as NHS number and name, and translating e.g. age into an age range (25-40) and grouping postcodes together.<br><br>You can read more about the challenges and approaches to anonymisation in the <a target='_blank' href='https://ico.org.uk/media/1061/anonymisation-code.pdf'>ICO code of practice</a>.",
      "termCode": "anonymisation",
      "related": [
        "pseudonymisation",
        "data-protection",
        "dpia",
        "synthetic-data"
      ]
    },
    {
      "name": "Data protection",
      "description": "The principles, legislation and processes to ensure that individuals can trust an organisation to use their data fairly and safely.<br><br>There are three key pieces of legislation that protect the collection, sharing and processing of data within the health and care system: Common Law Duty of Confidentiality, the General Data Protection Regulation (GDPR) and the Data Protection Act 2018 (DPR).",
      "termCode": "data-protection",
      "related": [
        "anonymisation",
        "pseudonymisation",
        "federated-learning",
        "synthetic-data",
        "dpia"
      ]
    },
    {
      "name": "Electronic health record",
      "acronym": "EHR",
      "description": "Also known as electronic patient record (EPR) or electronic medical record (EMR), contains the personal health records of an individual in digital format, such as visits to primary and secondary care, prescriptions, diagnoses and clinical notes.",
      "termCode": "ehr",
      "related": [
        "database",
        "cloud",
        "on-prem",
        "hybrid"
      ]
    },
    {
      "name": "Database",
      "description": "A collection of information or data stored and managed electronically. There are many different types of database depending on the data stored e.g. image data, text data or numerical data.<br><br>For example, your local Trust will manage a database containing your electronic health records (EHR). This database and the software that manages it may be referred to as an EHR system.",
      "termCode": "database",
      "related": [
        "data",
        "sql",
        "linked-data",
        "tre",
        "structured",
        "cloud",
        "on-prem",
        "hybrid"
      ]
    },
    {
      "name": "Structured data",
      "description": "Information which is well structured, such as a spreadsheet of information. This means there are defined columns and you can expect additional data to follow the same or similar structure (perhaps with some missing values).<br><br>Historically, data analysis was limited to structured data as it is well defined. More recently, advances in AI mean unstructured data is now more accessible.",
      "termCode": "structured",
      "related": [
        "data",
        "unstructured",
        "database",
        "sql"
      ]
    },
    {
      "name": "Unstructured data",
      "description": "Information which may have some structure (e.g. a name or some simple attributes or metadata), but by definition could contain a range of information.<br><br>For example, images contain some structured data (image size, image format, image name) but the contents of those images can vary completely. Audio also contains some structure (length, format) but can vary considerably.<br><br>Recent developments in approaches to AI, computational power and quantity of data have led to an increase in AI in unstructured data, such as medical imaging and speech to text.",
      "termCode": "unstructured",
      "related": [
        "data",
        "structured",
        "database"
      ]
    },
    {
      "name": "Explainability",
      "acronym": "XAI",
      "description": "AI can be built on complex algorithms and data, and explainability is a measure of how understandable, or explainable, the decisions of an AI system are to humans.<br><br>For example, an AI may predict which patients are most in need of surgery, but should be able to explain why it has prioritised patients in a certain way.<br><br>XAI (\"eXplainable Artificial Intelligence\") is where humans can understand how the results of an AI model were obtained.",
      "termCode": "explainability",
      "related": [
        "ai",
        "fairness",
        "bias",
        "local-explainability",
        "general-explainability",
        "ecological-fallacy"
      ]
    },
    {
      "name": "Local explainability",
      "description": "The ability to explain why an AI prediction has been made for a specific data point.<br><br>For example, an AI predicts that you should attend a follow-up appointment in 6 months, and is able to share what factors led to this specific, individual, decision (e.g. date of your last appointment, pre-existing conditions).",
      "termCode": "local-explainability",
      "related": [
        "explainability",
        "ai",
        "fairness",
        "bias",
        "general-explainability"
      ]
    },
    {
      "name": "General explainability",
      "description": "Sometimes referred to as global explainability, general explainability is an approach to sharing what features or data points had the most influence over an AI model's predictions.<br><br>For example, in an AI system for predicting length of stay in hospital, age and location were the two most important factors.",
      "termCode": "general-explainability",
      "related": [
        "explainability",
        "feature",
        "ai",
        "fairness",
        "bias",
        "local-explainability"
      ]
    },
    {
      "name": "Inference",
      "description": "In AI, inference is the process of making a prediction from a model that has already been trained.<br><br>For example, a hospital may implement a new AI model that can suggest the best bed allocation for an incoming patient. Inference occurs when that new patient arrives, and the system is run to suggest a new allocation.",
      "termCode": "inference",
      "related": [
        "ai",
        "machine-learning",
        "predictive-analytics",
        "supervised"
      ]
    },
    {
      "name": "Training",
      "description": "Most types of AI require a training process, which uses historical data to build a model able to predict future cases.<br><br>For example, researchers have trained models to predict Covid-19 from patient X-rays using the NCCID database.",
      "termCode": "training",
      "related": [
        "machine-learning",
        "gradient-descent",
        "supervised",
        "training-data",
        "test-data",
        "validation-data",
        "underfitting",
        "overfitting"
      ]
    },
    {
      "name": "Causality",
      "description": "The influence of an event resulting, or causing, another.<br><br>For example, consuming more calories than you use up will lead to weight gain.<br><br>While many AI systems use patterns found in data to make predictions, very rarely are these patterns sufficient to determine the underlying cause of a behaviour.",
      "termCode": "causality",
      "related": [
        "correlation",
        "explainability"
      ]
    },
    {
      "name": "Correlation",
      "description": "A measure that expresses the extent to which data are related in a direct, or linear, way. It describes a relationship between data without making any statement about cause and effect (i.e. correlation does not mean causality). However, it can describe a positive or negative, also known as inverse, relationship between these data. Positive would be if one goes up the other goes up too, negative would be if one if one goes up the other one goes down.<br><br>For example, an individual's weight is correlated to their height and waiting times are correlated to the number of people waiting.",
      "termCode": "correlation",
      "related": [
        "causality",
        "unsupervised"
      ]
    },
    {
      "name": "Ecological fallacy",
      "description": "It occurs when attributing characteristics of a group to an individual part of that group. In other words, you should not make conclusions about individuals based on findings about the group they belong to.",
      "termCode": "ecological-fallacy",
      "related": [
        "explainability"
      ]
    },
    {
      "name": "Clinical trials",
      "description": "A formal experiment used to scientifically evaluate the performance of a medicine, technology or process before being approved for widespread use.<br><br>For example, many medicines will undertake a series of trials to demonstrate their effectiveness and safety.<br><br>You can find out more about clinical trials in the UK at the <a target='_blank' href='https://www.nhs.uk/conditions/clinical-trials/'>NHS website</a>.",
      "termCode": "clinical-trials",
      "related": [
        "test-data"
      ]
    },
    {
      "name": "Fairness",
      "description": "When individuals are not penalised by algorithms because they are part of a (sensitive) group.<br><br>For example, an AI algorithm used in law enforcement in the USA was shown to unfairly increase sentence recommendations based on racial background (an example of unfairness caused by discrimination bias).<br><br>Note that in <a href='https://ico.org.uk/for-organisations/guide-to-data-protection/guide-to-the-general-data-protection-regulation-gdpr/principles/lawfulness-fairness-and-transparency/#fairness' target='_blank'>data protection law</a>, fairness refers to the use of data in ways that people would reasonably expect, and that would not have unjustified adverse affects on them.",
      "termCode": "fairness",
      "related": [
        "bias",
        "explainability",
        "ai",
        "predictive-analytics",
        "algorithm",
        "data-protection"
      ]
    },
    {
      "name": "Bias",
      "description": "The disproportionate weighting in favour of, or against a specific item or individual. There are <a href='https://en.wikipedia.org/wiki/Bias' target='_blank'>many types of bias</a> that exist.<br><br>AI algorithms are often trained on historical data which can contain bias that exists in society, and are trained by humans who themselves have bias. Unless the algorithms are built with fairness in mind, they may repeat these biases in their predictions.<br><br>For example, an AI trained on CVs from engineers, who are historically predominantly male, may unfairly penalise CVs from women if the algorithms are not tested for fairness and modified to remove bias. This is an example of selection bias.",
      "termCode": "bias",
      "related": [
        "fairness",
        "explainability",
        "ai",
        "predictive-analytics",
        "algorithm"
      ]
    },
    {
      "name": "Proof of concept",
      "acronym": "PoC",
      "description": "A demonstration of the feasibility, or possibility, of a technology to be able to perform a task or solve a specific problem. A PoC is an early stage exploration, and would be followed by additional testing and engineering to ensure its viability in a real world setting.<br><br>For example, the <a target='_blank' href='https://www.nhsx.nhs.uk/ai-lab/ai-lab-programmes/skunkworks/'>NHS AI Lab Skunkworks</a> team publishes PoCs on a range of problems, such as using AI to allocate beds or predict length of stay in hospital.",
      "termCode": "poc",
      "related": [
        "trl"
      ]
    },
    {
      "name": "Technology readiness level",
      "acronym": "TRL",
      "description": "A <a target='_blank' href='https://en.wikipedia.org/wiki/Technology_readiness_level'>framework</a> developed by NASA to describe the different levels of maturity of a technology.<br><br>For example, a proof of concept may come under TRL level 4.<br><br>TRL levels start at 1 for a basic idea through to 9 for a fully deployed solution.",
      "termCode": "trl",
      "related": [
        "poc"
      ]
    },
    {
      "name": "Training data",
      "description": "The data required to train, or “teach” a machine learning algorithm when developing a model.<br><br>Good quality training data that is reflective of the population, unbiased and large enough to ensure a robust model is a key prerequisite for AI.",
      "termCode": "training-data",
      "related": [
        "test-data",
        "validation-data",
        "data-augmentation",
        "training",
        "supervised",
        "predictive-analytics",
        "machine-learning"
      ]
    },
    {
      "name": "Test data",
      "description": "Data that is not included in the training data, and used to test that a model has accurately identified the patterns in the data that result in the desired behaviour.<br><br>For example, the NCCID holds an external validation data set used to test commercial and academic models built to detect Covid-19 from chest X-rays and CT scans.",
      "termCode": "test-data",
      "related": [
        "validation-data",
        "training-data",
        "supervised",
        "training",
        "predictive-analytics",
        "machine-learning"
      ]
    },
    {
      "name": "Cross validation",
      "description": "An approach to reducing overfitting during model development, by iteratively selecting different portions of the data to train and validate a predictive (supervised) machine learning model.<br><br>Cross validation can increase the overall performance of a model, along with data augmentation techniques.",
      "termCode": "cross-validation",
      "related": [
        "training-data",
        "validation-data",
        "overfitting",
        "data-augmentation",
        "training",
        "supervised",
        "predictive-analytics",
        "machine-learning"
      ]
    },
    {
      "name": "Validation data",
      "description": "Data that is not included in the training data, but is used to check the performance of the model as it is being trained. This is separate to the test data used to check the final performance of the model.<br><br>This definition relates to the definition within AI, and not the regulatory aspects of medical devices.",
      "termCode": "validation-data",
      "related": [
        "test-data",
        "training-data",
        "training",
        "supervised",
        "predictive-analytics",
        "machine-learning"
      ]
    },
    {
      "name": "Classification",
      "description": "A type of machine learning model which can predict whether or not you belong to a specific class, or label. Common approaches include logistic regression, decision trees and random forests.<br><br>For example, a classification model may be built to identify individuals with  diabetes. The classes could be Type-I diabetes, Type-II diabetes, gestational diabetes or no diabetes. Normally, a model would return the probability that you belonged to each class, and would assign you to the class with the highest probability.",
      "termCode": "classification",
      "related": [
        "supervised",
        "label",
        "feature",
        "ai",
        "machine-learning",
        "predictive-analytics",
        "binary",
        "regression"
      ]
    },
    {
      "name": "Regression",
      "description": "A type of machine learning model that predicts a continuous value, instead of a discrete value as is the case in classification models.<br><br>For example, a regression model may predict how long you will stay in a hospital bed upon admission. This value will be a numerical value e.g. 27 hours, or 28 hours.",
      "termCode": "regression",
      "related": [
        "supervised",
        "ai",
        "machine-learning",
        "predictive-analytics",
        "classification"
      ]
    },
    {
      "name": "Accuracy",
      "description": "<em>Classification metric</em>.<br><br>The proportion of correctly identified positive and negative cases in a classification model.<br><br>For example, an AI model that correctly identifies all positive and negative cases of patients with e.g. Covid-19 would have an accuracy of 1.0 (100%).<br><br>Note that in <a href='https://ico.org.uk/for-organisations/guide-to-data-protection/key-dp-themes/guidance-on-artificial-intelligence-and-data-protection/' target='_blank'>data protection law</a>, accuracy refers to accurate and up to date record keeping.",
      "termCode": "accuracy",
      "related": [
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point",
        "data-protection"
      ]
    },
    {
      "name": "Specificity",
      "description": "The ability of a classification model to correctly identify individuals <em>without</em> a condition. This is also known as the true negative rate.<br><br>For example, an AI tool that has been developed to detect lung cancer from medical images is said to be specific if it correctly identifies people who do not have lung cancer.",
      "termCode": "specificity",
      "related": [
        "accuracy",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "Sensitivity",
      "alternateName": "Recall",
      "description": "The ability of a classification model to correctly identify individuals <em>with</em> a condition. This is also known as the true positive rate and recall. It is defined as the number of true positives over true positives and false negatives.<br><br>For example, an AI tool that has been developed to detect lung cancer from medical images is said to be sensitive if it correctly identifies people who have lung cancer.",
      "termCode": "sensitivity",
      "related": [
        "accuracy",
        "specificity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "False positive",
      "description": "The incorrect prediction of a data point or individual having a specific outcome or class.<br><br>For example, if an AI tool incorrectly predicts you have diabetes.",
      "termCode": "false-positive",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "False negative",
      "description": "The incorrect prediction of a data point or individual not having a specific outcome or class.<br><br>For example, if an AI tool incorrectly predicts you do not have diabetes, when in fact, you do.",
      "termCode": "false-negative",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "True positive",
      "description": "The correct prediction of a data point or individual having a specific outcome or class.<br><br>For example, if an AI tool correctly predicts you have diabetes.",
      "termCode": "true-positive",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "True negative",
      "description": "The correct prediction of a data point or individual not having a specific outcome or class.<br><br>For example, if an AI tool correctly predicts you do not have diabetes.",
      "termCode": "true-negative",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "operating-point"
      ]
    },
    {
      "name": "Precision",
      "description": "A way of measuring how effective a classification model is at detecting positive cases. It is the ratio of true positives over all positive cases (true and false positives).<br><br>For example, a model with high precision (1.0) will correctly identify all positive cases. Note this does not account for false negatives, and a high precision could be obtained by assigning every case as a positive case.",
      "termCode": "precision",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "Operating point",
      "description": "In a classification model, this is the point chosen to define when a case is positive or not. The location of this point will determine the model performance measured by true positives, false positives, true negatives and false negatives.<br><br>For an interactive demonstration on the impact on deciding the operating point, visit this <a target='_blank' href='https://nhsx.github.io/covid-chest-imaging-database/experiments'>NCCID operating point experiment</a>.",
      "termCode": "operating-point",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "roc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative"
      ]
    },
    {
      "name": "Receiver Operator Characteristic",
      "acronym": "ROC",
      "description": "Used to create a plotted curve which demonstrates how the trade off between true positive rate and false positive rate changes as you vary the operating point of a classification model.<br><br>For an interactive demonstration of a ROC curve, visit this <a target='_blank' href='https://nhsx.github.io/covid-chest-imaging-database/experiments'>NCCID operating point experiment</a>.",
      "termCode": "roc",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "auc",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "Area Under the (Receiver Operator Character) Curve",
      "acronym": "AUC",
      "description": "A single number calculated from a ROC curve to help summarise the performance of a classification model.",
      "termCode": "auc",
      "related": [
        "roc",
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "f1-score",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative",
        "operating-point"
      ]
    },
    {
      "name": "F1 score",
      "description": "A metric which describes the accuracy of a classification model, by combining the precision and sensitivity (recall) values into a single number, which ranges from 0 (poor accuracy) to 1 (high accuracy).",
      "termCode": "f1-score",
      "related": [
        "precision",
        "sensitivity",
        "accuracy",
        "false-positive",
        "false-negative",
        "true-positive",
        "operating-point"
      ]
    },
    {
      "name": "National COVID-19 Chest Imaging Database",
      "acronym": "NCCID",
      "description": "The NCCID is a national database that supports better understanding of COVID-19 and the development of technology enabling the best care for patients hospitalised with a severe infection.",
      "termCode": "nccid",
      "related": [
        "database"
      ]
    },
    {
      "name": "Interoperability",
      "description": "The ability of digital systems to exchange information without requiring significant efforts to convert data from different formats. AI is built on data, and accessing data from different systems requires a level of standardisation and interoperability to ensure a robust model can be built and used.",
      "termCode": "interoperability",
      "related": [
        "standard",
        "rap",
        "api"
      ]
    },
    {
      "name": "Multimodal artificial intelligence",
      "acronymn": "Multimodal AI",
      "description": "An approach to AI which incorporates multiple types of data. For example, a speech-to-text model that is typically trained on audio and text data, could include image data of lip movements taken from video recordings.<br><br>Multimodal AI can combine both numerical data such as blood pressure, heart rate, and imaging data such as a CT scan.<br><br>In healthcare, the term is also associated with specific 'modalities' of data, such as the sequences in <a target='_blank' href='https://prostatecanceruk.org/about-us/projects-and-policies/mpmri'>mpMRI</a> scanning. However, the concept of multimodality with respect to AI is more than just bringing different types of data together - it is about how very different AI models effectively interoperate - even 'merge' - to create a whole that is greater than the sum of its parts.",
      "termCode": "multimodal",
      "related": [
        "model",
        "machine-learning",
        "ai"
      ]
    },
    {
      "name": "Neural network",
      "description": "Neural networks are an approach to machine learning, loosely inspired by nature, that can describe complex relationships using a broader range of data than traditional approaches.<br><br>For example, neural networks can be trained on image data to describe features in medical images such as tumours. They can also be trained on free text such as clinical notes, allowing their use in clinical coding applications.<br><br>Neural networks can also be trained on tabulated, or structured data, such as a spreadsheet. Their ability to model complexity often comes at the cost of explainability, whereby the more complex the model, the harder to explain it becomes.",
      "termCode": "neural-network",
      "related": [
        "deep-learning",
        "cnn",
        "rnn",
        "graph-neural-network",
        "model",
        "feature",
        "machine-learning",
        "structured",
        "unstructured",
        "supervised",
        "explainability"
      ]
    },
    {
      "name": "Deep learning",
      "description": "An approach to building models using neural networks with more than one 'hidden' layer of artificial neurons. This is a common approach when working with image and text data.<br><br>Deep learning models are able to capture complex relationships but can be difficult to interpret what data leads to a particular outcome.",
      "termCode": "deep-learning",
      "related": [
        "neural-network",
        "cnn",
        "rnn",
        "machine-learning",
        "transformer",
        "supervised",
        "ai",
        "computer-vision",
        "explainability"
      ]
    },
    {
      "name": "Overfitting",
      "description": "The process of building a model which is based too closely on the data. This results in a model which may be very accurate on the training data, but when tested on additional datasets such as the test data, unseen data or data from a new environment, performs badly.<br><br>Approaches to reduce overfitting include cross-validation, data augmentation and ensemble techniques (which combine different models).",
      "termCode": "overfitting",
      "related": [
        "underfitting",
        "machine-learning",
        "bias",
        "model",
        "training-data",
        "test-data",
        "cross-validation",
        "data-augmentation"
      ]
    },
    {
      "name": "Underfitting",
      "description": "The process of building a model which is not based closely enough on the data. This results in a model which performs badly and fails to capture the relationships you are looking for.<br><br>There is a balance to be made between underfitting and overfitting.",
      "termCode": "underfitting",
      "related": [
        "overfitting",
        "machine-learning",
        "model",
        "training-data"
      ]
    },
    {
      "name": "Data augmentation",
      "description": "The process of artificially increasing the amount of data used to train a model, to reduce overfitting and improve model performance.<br><br>Commonly used in imaging applications, this can include rotating, cropping, adding noise or random levels of blur to existing images.",
      "termCode": "data-augmentation",
      "related": [
        "overfitting",
        "training",
        "model",
        "machine-learning"
      ]
    },
    {
      "name": "Natural language processing",
      "acronym": "NLP",
      "description": "A collection of techniques which use speech and text data.<br><br>Speech-to-text systems convert verbal speech to text, such as in a smart speaker.<br><br>Natural language understanding systems convert text into concepts or instructions, such as your requests to play music or offer directions.<br><br>Text-to-speech systems convert text into verbal speech, such as responses from a smart speaker.<br><br>Natural language generation will create human-like text based on concepts, such as writing a report from a summary table of data.<br><br>NLP has developed significantly in recent years in part due to the availability of deep learning algorithms and transfomers.",
      "termCode": "nlp",
      "related": [
        "ner",
        "deep-learning",
        "transformer",
        "sequential-data",
        "neural-network",
        "rnn",
        "machine-learning",
        "supervised",
        "ai"
      ]
    },
{
      "name": "Large Language Model",
      "acronym": "LLM",
      "description": "A large language model is a neural network that is trained on a vast amount of text. The training uses unlabelled text and some form of self-supervised learning. Usually, LLMs will have billions of parametres. They are an instance of a foundation model (FM).",
      "termCode": "llm",
      "related": [
        "nlp",
	"semi-supervised",
	"foundation-model",
        "neural-network",
        "machine-learning",
        "supervised",
        "ai"
      ]
    },
{
      "name": "Foundation Model",
      "acronym": "FM",
      "description": "A foundation model is a ML model trained on large data. The data is unlabelled and the model is training by a self-supervised learning algorithm. ",
      "termCode": "foundation-model",
      "related": [
        "nlp",
        "semi-supervised",
        "llm",
        "neural-network",
        "machine-learning",
        "supervised",
        "ai"
      ]
    },
    {
      "name": "Transformer",
      "description": "An approach to deep learning which uses an 'attention' mechanism to understand context in text or image data, without requiring data to be processed in order.<br><br>Transformers have led to recent breakthroughs in NLP and computer vision.",
      "termCode": "transformer",
      "related": [
        "nlp",
        "deep-learning",
        "machine-learning",
        "ai"
      ]
    },
    {
      "name": "Confusion matrix",
      "description": "A table that is used to describe the performance of a classification model on a set of test data for which the true values are known. It can be used to derive a number of measures such as sensitivity and specificity.<br><br>For example, a model may predict whether a patient has a form of diabetes, or not. It's performance can be described by writing out where it correctly or incorrectly predicted diabetes (positive) or not (negative) where we know the actual results:<br><br><table id='matrix-table'><thead><tr><th colspan='2' rowspan='2'></th><th colspan='2'>Predicted</th></tr><tr><th>Positive</th><th>Negative</th></tr></thead><tbody><tr><th rowspan='2'>Actual</th><th>Positive</th><td>True positive</td><td>False negative</td></tr><tr><th>Negative</th><td>False positive</td><td>True negative</td></tr></tbody></table>",
      "termCode": "confusion-matrix",
      "related": [
        "accuracy",
        "specificity",
        "sensitivity",
        "precision",
        "false-positive",
        "false-negative",
        "true-positive",
        "true-negative"
      ]
    },
    {
      "name": "Linked data",
      "description": "Data which has been combined (connected, or \"linked\") with other relevant data to increase the available information on a specific individual or population.<br><br>In health and care, an example would be linking GP records with hospital records, which are typically generated independently.<br><br>There are many different datasets that could be linked to improve the information available to treat patients, such as social care records, dental records and mental health records.",
      "termCode": "linked-data",
      "related": [
        "data",
        "database"
      ]
    },
    {
      "name": "Metadata",
      "description": "Data about data, or data that provides information on one or more aspects of the data.<br><br>For example, an MRI scan contains data of the scan itself (a series of images), but also metadata including patient identifier, date of scan, settings of the scan and more.<br><br>Metadata isn't always immediately visible to humans, but is machine readable and can contain important information.",
      "termCode": "metadata",
      "related": [
        "data",
        "database"
      ]
    },
    {
      "name": "Graph Neural Network",
      "acronym": "GNN",
      "description": "A class of deep learning methods designed to make predictions on data described by graphs.<br><br>Graphs are a way of representing data, relationships and their complexity.<br><br>GNNs are neural networks that can be directly applied to graphs, and provide a way to generate node-level, edge-level, and graph-level predictions.<br><br>In recent years, a number of variants of GNNs have been developed such as graph convolutional networks (GCN), graph attention networks (GAT) and graph recurrent networks (GRN).<br><br>An example of data in healthcare representable as graphs is psychopathology networks consist of aspects (e.g. symptoms) of mental disorders (nodes) and the connections between those aspects (edges). A trained GNN on this graph would be able to predict disorders based on the provided symptoms.",
      "termCode": "graph-neural-network",
      "related": [
        "neural-network",
        "deep-learning",
        "database"
      ]
    },
    {
      "name": "Generative Adversarial Network",
      "acronym": "GAN",
      "description": "A class of deep learning where two networks compete with each other to improve the overall performance of the model.<br><br>A generator network will generate artificial examples based on the training data, and the discriminator will try to judge whether they are real or artificial. This is an iterative process which continues until the generator can adequately \"fool\" the discriminator.<br><br>This technique is predominantly used in image generation, and has potential in health and care to generate example scans for training purposes, or to augment missing data.",
      "termCode": "generative-adversarial-network",
      "related": [
        "neural-network",
        "deep-learning",
        "ai"
      ]
    },
    {
      "name": "Federated Learning",
      "description": "An approach to machine learning where a model is trained on data where the data exists (in multiple locations), rather than the traditional approach of moving the data to a central location for model training.<br><br>This decentralised approach reduces the need to transfer data across different entities, and may reduce the associated information governance and security overheads that this can entail.",
      "termCode": "federated-learning",
      "related": [
        "machine-learning",
        "model",
        "data-protection"
      ]
    },
    {
      "name": "Synthetic Data",
      "description": "Data that reflects the distribution and characteristics of real data, but is not real data and cannot be used to identify an individual.<br><br>Good quality synthetic data will enable research on realistic data without the associated information governance and security overheads that using real data can entail.",
      "termCode": "synthetic-data",
      "related": [
        "data",
        "data-protection"
      ]
    },
    {
      "name": "Gradient descent",
      "description": "A common approach used in supervised machine learning, where models are trained to fit the training data, by minimising the error in predictions in an iterative fashion.<br><br>In gradient descent, small 'steps' are taken in different 'directions' in search of the smallest error.",
      "termCode": "gradient-descent",
      "related": [
        "machine-learning",
        "supervised",
        "training",
        "training-data",
        "algorithm"
      ]
    },
    {
      "name": "Binary",
      "description": "A representation of information with only two states: 0 or 1.<br><br>Binary encoding is frequently used to store presence/absence e.g. `is_smoker` which would either be `0` or `1`.<br><br>Binary classification is a model which can predict whether something is one thing or not, e.g. a model to predict whether or not a patient has Covid-19, or not.",
      "termCode": "binary",
      "related": [
        "data",
        "classification",
        "model"
      ]
    },
    {
      "name": "Sequential data",
      "description": "Data where the order matters.<br><br>For example, time series data of blood pressure over time, or natural language (text) such as clinician's notes, or DNA sequences.",
      "termCode": "sequential-data",
      "related": [
        "data",
        "rnn",
        "nlp"
      ]
    },
    {
      "name": "Feature",
      "description": "A single type of measurement that is used as the input for a model, for example age, sex or heart rate are all types of features. Synonymous to variables, covariates or columns.",
      "termCode": "feature",
      "related": [
        "feature-selection",
        "feature-engineering",
        "label",
        "data",
        "machine-learning",
        "supervised",
        "unsupervised",
        "statistics"
      ]
    },
    {
      "name": "Feature selection",
      "description": "The process of deciding which features to include in a model.",
      "termCode": "feature-selection",
      "related": [
        "feature",
        "model",
        "feature-engineering",
        "data-cleaning",
        "machine-learning",
        "data",
        "supervised",
        "unsupervised",
        "statistics"
      ]
    },
    {
      "name": "Feature engineering",
      "description": "The process of creating a new feature to include in a model, for example the time since a patient's appointment, or their average blood pressure measurement.",
      "termCode": "feature-engineering",
      "related": [
        "feature",
        "feature-selection",
        "machine-learning",
        "data",
        "supervised",
        "unsupervised",
        "statistics"
      ]
    },
    {
      "name": "Label",
      "description": "Category used to describe an outcome, or class, that may be used to train a supervised machine learning classification model.<br><br>For example: vital status or whether a chest radiograph showed pneumonia or not. Often binary, but not always.",
      "termCode": "label",
      "related": [
        "feature",
        "machine-learning",
        "supervised",
        "classification"
      ]
    },
    {
      "name": "Clustering",
      "description": "An application of unsupervised machine learning that seeks to 'cluster' data into groups by identifying similar pattens.",
      "termCode": "clustering",
      "related": [
        "machine-learning",
        "unsupervised"
      ]
    },
    {
      "name": "Computer vision",
      "description": "A branch of AI relating to processing of images, videos and other spatially defined inputs.<br><br>For example processing chest radiographs or dental x-rays.",
      "termCode": "computer-vision",
      "related": [
        "neural-network",
        "deep-learning",
        "ai",
        "cnn",
        "segmentation"
      ]
    },
    {
      "name": "Convolutional neural network",
      "acronym": "CNN",
      "description": "A type of neural network architecture normally used for computer vision/image processing tasks, for example classifying chest radiographs as normal/abnormal or segmenting areas of possible lung cancer.<br><br>CNNs use a convolutional filter to extract patterns in the spatial domain such as edges, curves and shapes.",
      "termCode": "cnn",
      "related": [
        "neural-network",
        "deep-learning",
        "ai",
        "computer-vision",
        "segmentation"
      ]
    },
    {
      "name": "Segmentation",
      "description": "A computer vision task that involves separating out regions of interest from an image, for example identifying the lung areas from a chest radiograph.",
      "termCode": "segmentation",
      "related": [
        "neural-network",
        "deep-learning",
        "ai",
        "cnn",
        "computer-vision"
      ]
    },
    {
      "name": "Recurrent neural network",
      "acronym": "RNN",
      "description": "A type of neural network architecture used to model sequential data, for example predicting deterioration from regular vital signs in ICU patients.",
      "termCode": "rnn",
      "related": [
        "neural-network",
        "deep-learning",
        "ai",
        "sequential-data",
        "nlp"
      ]
    },
    {
      "name": "Named entity recognition",
      "acronym": "NER",
      "description": "A type of natural language processing task that seeks to identify named entities from free text into pre-specified categories, for example extracting drug names from a discharge letter.",
      "termCode": "ner",
      "related": [
        "neural-network",
        "deep-learning",
        "ai",
        "sequential-data",
        "nlp",
        "transformer"
      ]
    },
    {
      "name": "Structured query language",
      "acronym": "SQL",
      "description": "A programming languaged used to query (search) information stored in relational databases.",
      "termCode": "sql",
      "related": [
        "data",
        "database",
        "structured"
      ]
    },
    {
      "name": "Trusted research environment",
      "acronym": "TRE",
      "description": "A secure computing environment that allows remote access to data for approved researchers. Also known as Secure Data Environment (SDE) or Data Safe Haven.<br><br>In June 2022, the Department for Health and Social Care published the <a target='_blank' href='https://www.gov.uk/government/publications/data-saves-lives-reshaping-health-and-social-care-with-data/data-saves-lives-reshaping-health-and-social-care-with-data'>Data saves lives: reshaping health and social care with data</a> policy paper, which includes a mandate on using secure data environments for NHS data.",
      "termCode": "tre",
      "related": [
        "data",
        "database",
        "standard"
      ]
    },
    {
      "name": "Reproducible analytical pipeline",
      "acronym": "RAP",
      "description": "An automated set of code that processes data in line with best practice and ensures that it delivers the same results each time it runs.<br><br>In June 2022, the Department for Health and Social Care published the <a target='_blank' href='https://www.gov.uk/government/publications/data-saves-lives-reshaping-health-and-social-care-with-data/data-saves-lives-reshaping-health-and-social-care-with-data'>Data saves lives: reshaping health and social care with data</a> policy paper, which includes a recommendation on promoting and resourcing RAPs as  the minimum standard for academic and NHS data analysis.<br><br>NHS Digital runs an open <a target='_blank' href='https://github.com/NHSDigital/rap-community-of-practice'>RAP community of practice</a>.",
      "termCode": "rap",
      "related": [
        "data",
        "standard",
        "interoperability"
      ]
    },
    {
      "name": "Data cleaning",
      "description": "The process of curating data to identify and fix, or remove, missing, corrupted or incorrect items to ensure quality control.",
      "termCode": "data-cleaning",
      "related": [
        "data",
        "feature-selection"
      ]
    },
    {
      "name": "Genetic Algorithm",
      "description": "A method of searching for the best solution based on a defined scoring method (also called a heuristic). It is used to find optimised solutions for complex problems based on the theory of natural selection.<br><br>The algorithm goes through iterations, using random processes to produce candidates to be assessed by the algoritm. At the end of the process, the strongest candidate(s) are selected, based on your chosen heuristic. ",
      "termCode": "genetic-algorithm",
      "related": [
        "ai",
        "algorithm",
        "data",
        "feature-selection"
      ]
    },
    {
      "name": "Deployment Platform",
      "description": "Software tools that automate the process of deploying software applications and services to production environments.",
      "termCode": "deployment-platform",
      "related": [
        "api",
        "interoperability",
        "cloud",
        "rap"
      ]
    },
    {
      "name": "Data Protection Impact Assessment",
      "acronym": "DPIA",
      "description": "A process designed to help organisations systematically analyse, identify and minimise the data protection risks of a project or plan.<br><br>It is a key part of your accountability obligations under the <a target='_blank' href='https://ico.org.uk/for-organisations/uk-gdpr-guidance-and-resources/'>UK General Data Protection Regulation (GDPR)</a>.",
      "termCode": "dpia",
      "related": [
        "aia",
        "data-protection",
        "fairness",
        "bias",
        "linked-data",
        "anonymisation",
        "pseudonymisation"
      ]
    },
    {
      "name": "Ethical AI",
      "description": "AI systems that are developed and deployed with attention to the harms created or exacerbated by the systems. As a minimum this usually includes consideration of fairness, accountability and transparency.",
      "termCode": "ethical-ai",
      "related": [
        "aia",
        "data-protection",
        "fairness",
        "bias",
        "explainability"
      ]
    },
    {
      "name": "Information Governance",
      "acronym": "IG",
      "description": "The legal framework governing the use of personal confidential data in health care is complex. It includes the NHS Act 2006, the Health and Social Care Act 2012, the Data Protection Act, and the Human Rights Act.",
      "termCode": "ig",
      "related": [
        "data-protection",
        "dpia"
      ]
    },
    {
      "name": "Memorandum of Understanding",
      "acronym": "MoU",
      "description": "A type of agreement between two (bilateral) or more (multilateral) parties. It expresses a convergence of will between the parties, indicating an intended common line of action.",
      "termCode": "mou",
      "related": [
        "data-protection",
        "dpia",
	"ig"
      ]
    },
    {
      "name": "Pilot",
      "description": "A small-scale experiment or set of observations undertaken to decide how and whether to launch a full-scale project.",
      "termCode": "pilot",
      "related": [
        "clinical-trials",
        "poc"
      ]
    },
    {
      "name": "Picture Archiving and Communication Systems",
      "acronym": "PACS",
      "description": "A system based on the universal (Digital Imaging and Communications in Medicine, DICOM) standard, which uses a server to store and allow easy access to high quality radiologic images.",
      "termCode": "pacs",
      "related": [
        "database",
        "standard",
        "on-prem",
        "cloud"
      ]
    },
    {
      "name": "Shadow Deployment",
      "description": "A method of testing a candidate model for production where production data runs through the model without the model actually returning predictions to the service or customers. Essentially, simulating how the model would perform in the production environment.",
      "termCode": "shadow-deployment",
      "related": [
        "mlops",
        "deployment-platform"
      ]
    },
    {
      "name": "Retrieval Augmented Generation (RAG)",
      "description": "A type of AI pipeline where the submitted query is first used to retrieve relevant documents from a database, and then the retrieved documents are used to generate a response to the query such as through stuffing these into the prompt prior to passing to the model.",
      "termCode": "rag",
      "related": [
        "retrieval",
        "stuffing",
        "vector-store",
        "chunking",
        "embedding",
        "prompt",
        "prompt-engineering"
      ]
    },
    {
      "name": "Retrieval",
      "description": "The stage in a RAG pipeline where the submitted query is used to retrieve relevant documents from a database. There are numerous retrieval strategies, including simply returning top N nearest vectors to the embedded vector of the query.",
      "termCode": "retrieval",
      "related": [
        "rag",
        "stuffing",
        "vector-store",
        "chunking",
        "embedding",
        "prompt",
        "prompt-engineering"
      ]
    },
    {
      "name": "Stuffing",
      "description": "The stage in the RAG pipeline where the retrieved documents are inserted (\"stuffed\") into the prompt prior to passing to the model. This can also include inserting relevant metadata about the documents.",
      "termCode": "stuffing",
      "related": [
        "rag",
        "retrieval",
        "vector-store",
        "chunking",
        "embedding",
        "prompt",
        "prompt-engineering"
      ]
    },
    {
      "name": "Vector-store",
      "description": "A database (such as that use in RAG pipelines) in which are stored embedding vectors - high dimensional representations of documents, etc. alongside any metadata about these documents. Retrieving similar vectors is very fast, even when the database is very large.",
      "termCode": "vector-store",
      "related": [
        "rag",
        "retrieval",
        "stuffing",
        "chunking",
        "embedding",
        "prompt",
        "prompt-engineering"
      ]
    },
    {
      "name": "Chunking",
      "description": "The process of breaking larger documents up into chunks prior to embedding and insertion into a vector-store. This is done to ensure that during RAG, only relevant sections of a large document are retrieved and stuffed into the prompt (to avoid using up too much of the models context window).",
      "termCode": "chunking",
      "related": [
        "rag",
        "retrieval",
        "stuffing",
        "vector-store",
        "embedding",
        "prompt",
        "prompt-engineering",
        "context-window"
      ]
    },
    {
      "name": "Embedding",
      "description": "The process which some text (or other complex input data) is rendered down into a vector (potentially of many dimensions). There are a number of ways of doing this, one such is <a target='_blank' href='https://www.sbert.net'>Sentence Transformer</a>.",
      "termCode": "embedding",
      "related": [
        "rag",
        "retrieval",
        "vector-store"
      ]
    },
    {
      "name": "Prompt",
      "description": "The string submitted to an AI model to generate a response. In RAG pipelines, this is the query with the stuffed documents inserted.",
      "termCode": "prompt",
      "related": [
        "rag",
        "llm",
        "prompt-engineering",
        "context-window"
      ]
    },
    {
      "name": "Prompt Engineering",
      "description": "The process of adjusting a prompt to improve the performance of a model. This can involve giving the AI a clear 'role', reorganising the prompt content, providing tags to guide the AO, providing examples within the prompt, etc.",
      "termCode": "prompt-engineering",
      "related": [
        "rag",
        "llm",
        "prompt",
        "context-window"
      ]
    },
    {
      "name": "Context Window",
      "description": "The maximum number of tokens that can be submitted to an AI model (sometimes this also includes those returned). This is important in RAG pipelines as retrieved information needs to fit into the context window, along with the original query (and potentially along with any response).",
      "termCode": "context-window",
      "related": [
        "rag",
        "retrieval",
        "stuffing",
        "vector-store",
        "chunking",
        "prompt",
        "prompt-engineering"
      ]
    },
    {
      "name": "Machine unlearning",
      "description": "Approaches to efficiently remove the influence of a subset of the training data from the weights of a trained model, without retraining the model from scratch, and whilst retaining the model’s performance on downstream tasks. Machine unlearning could be used to remove the influence of personal data from a model if someone exercises their “Right to be Forgotten”.",
      "termCode": "machine-unlearning",
      "related": [
        "machine-editing",
        "memorization",
	"training-data-leakage"
      ]
    },
    {
      "name": "Machine editing",
      "description": "Approaches to efficiently modify the behaviour of a machine learning model on certain inputs, whilst having little impact on unrelated inputs. Machine Editing can be used to inject or update knowledge in the model or modify undesired behaviours.",
      "termCode": "machine-editing",
      "related": [
        "machine-unlearning",
        "memorization",
	"training-data-leakage"
      ]
    },
    {
      "name": "Memorization",
      "description": "Machine Learning Models have been shown to memorize aspects of their training data during the training process. This has been demonstrated to correlate with model size (number of parameters).",
      "termCode": "memorization",
      "related": [
        "machine-editing",
        "machine-unlearning",
	"training-data-leakage"
      ]
    },
    {
      "name": "Training data leakage",
      "description": "Aspects of the training data can be memorized by a machine learning model during training and are consequently vulnerable to being inferred or extracted verbatim from the model alone. This is possible as the behaviour of the model on samples which were members of the training data is distinguishable from samples the model has not seen before. This leakage has been demonstrated on a range of machine learning models including Transformer-based Image and Language Models.",
      "termCode": "training-data-leakage",
      "related": [
        "machine-editing",
        "machine-unlearning",
	      "memorization"
      ]
    }
  ]
}