-
Notifications
You must be signed in to change notification settings - Fork 9
/
terms.json
1516 lines (1516 loc) · 70.4 KB
/
terms.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{
"terms": [
{
"name": "Descriptive analytics",
"description": "The analysis of data from the past that can help answer “What happened?” or “What is happening?”.<br><br>For example, an analysis of A&E attendance may show a decrease in visitor numbers over the last 3 months.",
"termCode": "descriptive-analytics",
"related": [
"predictive-analytics",
"prescriptive-analytics",
"statistics"
]
},
{
"name": "Predictive analytics",
"description": "The analysis of data where the goal is to create predictions of the future based on the past that can help answer “What will happen?”<br><br>For example, using historical admissions data, a Trust may be able to predict or forecast the number of admissions in the next 24 hours.",
"termCode": "predictive-analytics",
"related": [
"descriptive-analytics",
"prescriptive-analytics",
"statistics",
"supervised",
"machine-learning"
]
},
{
"name": "Prescriptive analytics",
"description": "The use of predictive analytics to recommend or automatically action an activity.<br><br>For example, using a prediction of future admissions, a Trust could recommend or automatically assign a bed to an incoming patient.",
"termCode": "prescriptive-analytics",
"related": [
"descriptive-analytics",
"predictive-analytics",
"machine-learning",
"ai"
]
},
{
"name": "Statistics",
"description": "The process of collecting, classifying and analysing data. Many statistical techniques are used in analytics and modern day machine learning.<br><br>For example, calculating the “average” number of patients arriving at a Trust to inform bed managers of capacity requirements or defining how to optimise the performance of an AI model.",
"termCode": "statistics",
"related": [
"descriptive-analytics",
"predictive-analytics",
"prescriptive-analytics",
"machine-learning",
"unsupervised",
"supervised"
]
},
{
"name": "Artificial Intelligence",
"acronym": "AI",
"description": "The use of digital technology to create systems capable of performing tasks commonly thought to require human intelligence.<br><br>For example, an AI system may analyse radiography images and detect tumours in cancer patients.<br><br>In 2021, the UK government announced its <a target='_blank' href='https://www.gov.uk/government/publications/national-ai-strategy'>National AI Strategy</a>.",
"termCode": "ai",
"related": [
"machine-learning",
"supervised",
"unsupervised",
"reinforcement",
"computer-vision",
"fairness",
"bias",
"explainability"
]
},
{
"name": "Algorithmic Impact Assessment",
"acronym": "AIA",
"description": "Algorithmic impact assessments (often abbreviated as ‘AIAs’) are tools that set out frameworks and processes for assessing possible societal impacts, both beneficial or adverse, of AI systems before the systems are in use (with ongoing monitoring often advised). An example AIA can be found <a target='_blank' href='https://www.adalovelaceinstitute.org/report/algorithmic-impact-assessment-case-study-healthcare/'>here<a>.",
"termCode": "aia",
"related": [
"fairness",
"bias"
]
},
{
"name": "Algorithm",
"description": "A set of instructions that can be followed by a human or computer.<br><br>For example, the NHS <a target='_blank' href='https://www.england.nhs.uk/wp-content/uploads/2014/06/psa-aki-alg.pdf'>algorithm for detecting Acute Kidney Injury</a> is a set of instructions that can be repeated for multiple patients.<br><br>In AI, machine learning algorithms use data to make predictions or recommendations which can inform decision making",
"termCode": "algorithm",
"related": [
"statistics",
"ai",
"machine-learning",
"gradient-descent"
]
},
{
"name": "Data",
"description": "Information stored in a digital way.<br><br>For example, this can be information on your physical state such as heart rate, blood pressure, or notes on your recent visit to your primary care physician.<br><br>Imaging data is a common type of healthcare data, which includes data generated from X-ray machines, CT scanners, MRI scanners, OCT systems etc.",
"termCode": "data",
"related": [
"metadata",
"linked-data",
"synthetic-data",
"structured",
"unstructured",
"feature",
"data-cleaning",
"database",
"tre",
"sql",
"binary",
"sequential-data",
"model"
]
},
{
"name": "Model",
"description": "A model is a simplified representation of something in the real world. In AI, models are the result of an algorithm and data.<br><br>Models are by definition attempts to define real world phenomena, and can be very helpful when trying to assist in decision making.<br><br>For example, an AI model of bed management may use data on past admissions to predict how many patients will arrive at a point in time, and therefore what the best beds are for these patients taking into account future arrivals.",
"termCode": "model",
"related": [
"ai",
"machine-learning",
"feature",
"algorithm",
"data",
"mlops"
]
},
{
"name": "Machine learning",
"description": "An approach to building models using (normally large amounts of) data. This differs from traditional approaches to building models by defining rules by hand.<br><br>For example, a self-driving car will contain many different machine learning algorithms that have been built using data generated by expert drivers.<br><br>AI used in medical imaging leverages machine learning on images with known conditions.<br><br>In 2021, the Medicines & Healthcare products Regulatory Agency (MHRA) published guidance on <a target='blank' href='https://www.gov.uk/government/publications/good-machine-learning-practice-for-medical-device-development-guiding-principles/good-machine-learning-practice-for-medical-device-development-guiding-principles'>Good Machine Learning Practice for Medical Device Development: Guiding Principles</a>.",
"termCode": "machine-learning",
"related": [
"ai",
"supervised",
"unsupervised",
"reinforcement",
"algorithm",
"feature",
"label",
"gradient-descent",
"statistics",
"federated-learning",
"mlops",
"data",
"model"
]
},
{
"name": "Unsupervised machine learning",
"description": "A type of machine learning where you do not know the outcome or definition of your data, and are looking for patterns. This includes clustering techniques such as k-means and principal component analysis (PCA).<br><br>For example, unsupervised machine learning can help identify different groups of hospital patients who use hospital services in different ways.",
"termCode": "unsupervised",
"related": [
"clustering",
"supervised",
"reinforcement",
"statistics",
"machine-learning",
"ai",
"descriptive-analytics"
]
},
{
"name": "Supervised machine learning",
"description": "A type of machine learning where you know the outcome of the data you are looking to model. This includes regression and classification techniques.<br><br>For example, an AI to detect Covid-19 in CT scans can use supervised machine learning with a data set of CT scans where patients have a known outcome (e.g. Covid-19 or not), to predict the likelihood of a new patient having Covid-19 from their CT scan (which the model has not seen before).",
"termCode": "supervised",
"related": [
"unsupervised",
"reinforcement",
"statistics",
"label",
"feature",
"machine-learning",
"ai",
"predictive-analytics",
"prescriptive-analytics"
]
},
{
"name": "Reinforcement learning",
"description": "A type of machine learning where you define an environment and a goal, and iteratively attempt at maximising the goal by reinforcing actions that increase the goal.<br><br>For example, Deepmind successfully used reinforcement learning to master the board game Go by defining the game parameters, the goal and suggesting the best moves to play against real human players.",
"termCode": "reinforcement",
"related": [
"ai",
"machine-learning",
"supervised",
"unsupervised"
]
},
{
"name": "Semi-supervised machine learning",
"description": "An approach to machine learning that combines known data (supervised machine learning) with unknown data to improve its ability to act on data it has not seen before.<br><br>This approach is being used to improve models where there is not a lot of data available with a known outcome, for example where you may have a rare disease with less data available than in more common diseases.",
"termCode": "semi-supervised",
"related": [
"supervised",
"unsupervised",
"self-supervised",
"machine-learning"
]
},
{
"name": "Self-supervised machine learning",
"description": "An approach to machine learning where you do not have the outcomes of your data, but use the structure of your data to help determine what these outcomes are.<br><br>For example, recent developments in understanding text and audio can use the existing structure to predict what words should come next.",
"termCode": "self-supervised",
"related": [
"semi-supervised",
"unsupervised",
"supervised",
"machine-learning"
]
},
{
"name": "General artificial intelligence",
"acronym": "General AI",
"description": "A theoretical concept of AI that is able to generalise, or adapt, to different applications, much like a human or animal.<br><br>For example, if a human learns to drive a car, they could without too much trouble drive other vehicles.",
"termCode": "general-ai",
"related": [
"ai",
"narrow-ai"
]
},
{
"name": "Narrow artificial intelligence",
"acronym": "Narrow AI",
"description": "AI focussed on solving a specific problem.<br><br>For example, an AI built to identify cancerous tumours in breast scans, would not automatically be able to detect tumours in other parts of the body without significant rework.",
"termCode": "narrow-ai",
"related": [
"ai",
"general-ai",
"supervised",
"reinforcement",
"machine-learning"
]
},
{
"name": "Machine learning operations",
"acronym": "MLOps",
"description": "The process of safely deploying, monitoring and updating machine learning models in production, or real-world, environments.<br><br>Because machine learning models are built on data, and data can change, it is important to build robustness into the system so they can adapt to a changing environment without losing performance.",
"termCode": "mlops",
"related": [
"machine-learning",
"model",
"deployment-platform"
]
},
{
"name": "Cloud",
"description": "An approach to computing where resources are no longer on premise, but hosted in different locations managed by a third-party.<br><Br>For example, a hospital may store patient records on computers physically housed within the hospital, which require maintenance. By moving to the cloud (a marketplace of cloud vendors is established), the hospital can outsource the maintenance and expenditure of physically owning the computers, in return for renting space.<br><br>In 2013 the UK government released its “<a target='_blank' href='https://www.gov.uk/guidance/government-cloud-first-policy'>Cloud First policy</a>”.",
"termCode": "cloud",
"related": [
"on-prem",
"database"
]
},
{
"name": "On-premise",
"description": "An approach to computing where you physically locate your equipment on your premise. For example, a hospital which stores your records within the hospital buildings or nearby in hospital-owned buildings.",
"termCode": "on-prem",
"related": [
"database",
"cloud"
]
},
{
"name": "Hybrid cloud",
"description": "An approach to working with both cloud and on-premise computing resources, where some resources can be on-premise (e.g. data), and other resources can be in the Cloud (e.g. applications that use the data). With a secure connection between the premise and the Cloud you can implement a hybrid cloud.",
"termCode": "hybrid",
"related": [
"cloud",
"on-prem",
"database"
]
},
{
"name": "Python",
"description": "A general purpose computer programming language that has become very popular for data science, machine learning and AI. It is free to learn and has a large community of developers who contribute additional features.<br><br>There is a thriving <a target='_blank' href='https://nhs-pycom.net/'>NHS Python Community for Healthcare</a>.",
"termCode": "python",
"related": [
"R"
]
},
{
"name": "R",
"description": "A statistical computer programming language that is commonly used for data analysis and data science. R is free to learn and has a large community of developers who contribute additional features.<br><br>There is a thriving <a target='_blank' href='https://nhsrcommunity.com/'>NHS-R Community</a>.",
"termCode": "R",
"related": [
"python"
]
},
{
"name": "Application programming interface",
"acronym": "API",
"description": "A standardised way to share data. An API defines the mechanisms to receive and send data, which is agnostic to how the underlying data is stored.<br><br>For example, NHS Digital has a <a target='_blank' href='https://digital.nhs.uk/developer/api-catalogue'>number of APIs</a> available to help build modern healthcare technology.",
"termCode": "api",
"related": [
"database",
"cloud",
"interoperability",
"standard"
]
},
{
"name": "Standard",
"description": "An agreed set of definitions, guidelines and sometimes technical approaches for a specific area. Formal Standards may be mandated by the Government, whereas de facto standards are created and used by communities working in that space.<br><br>For example, ISO 13485 is a UK <a target='_blank' href='https://www.gov.uk/guidance/designated-standards#healthcare-engineering'>Designated Standard</a> for quality management systems for medical device, it can be used to <a target='_blank' href='https://www.gov.uk/guidance/medical-devices-conformity-assessment-and-the-ukca-mark#compliance-with-designated-standards'>demonstrate conformance</a> with parts of the medical device regulations.",
"termCode": "standard",
"related": [
"interoperability",
"rap"
]
},
{
"name": "Pseudonymisation",
"description": "A technique that separates data from direct identifiers (for example name, surname, NHS number) and replaces them with a pseudonym (for example, a reference number), so that identifying an individual from that data is not possible without additional information. The organisation that conducted pseudonymisation will be able to re-identify individuals if required.",
"termCode": "pseudonymisation",
"related": [
"data-protection",
"anonymisation",
"dpia"
]
},
{
"name": "Anonymisation",
"description": "The process of removing all identifiable information from data in a way which makes it theoretically infeasible to identify an individual. Anonymised data is not considered as personal data under the GDPR. This means it is not subject to the same restrictions as personal data.<br><br>For example, by removing direct identifiers such as NHS number and name, and translating e.g. age into an age range (25-40) and grouping postcodes together.<br><br>You can read more about the challenges and approaches to anonymisation in the <a target='_blank' href='https://ico.org.uk/media/1061/anonymisation-code.pdf'>ICO code of practice</a>.",
"termCode": "anonymisation",
"related": [
"pseudonymisation",
"data-protection",
"dpia",
"synthetic-data"
]
},
{
"name": "Data protection",
"description": "The principles, legislation and processes to ensure that individuals can trust an organisation to use their data fairly and safely.<br><br>There are three key pieces of legislation that protect the collection, sharing and processing of data within the health and care system: Common Law Duty of Confidentiality, the General Data Protection Regulation (GDPR) and the Data Protection Act 2018 (DPR).",
"termCode": "data-protection",
"related": [
"anonymisation",
"pseudonymisation",
"federated-learning",
"synthetic-data",
"dpia"
]
},
{
"name": "Electronic health record",
"acronym": "EHR",
"description": "Also known as electronic patient record (EPR) or electronic medical record (EMR), contains the personal health records of an individual in digital format, such as visits to primary and secondary care, prescriptions, diagnoses and clinical notes.",
"termCode": "ehr",
"related": [
"database",
"cloud",
"on-prem",
"hybrid"
]
},
{
"name": "Database",
"description": "A collection of information or data stored and managed electronically. There are many different types of database depending on the data stored e.g. image data, text data or numerical data.<br><br>For example, your local Trust will manage a database containing your electronic health records (EHR). This database and the software that manages it may be referred to as an EHR system.",
"termCode": "database",
"related": [
"data",
"sql",
"linked-data",
"tre",
"structured",
"cloud",
"on-prem",
"hybrid"
]
},
{
"name": "Structured data",
"description": "Information which is well structured, such as a spreadsheet of information. This means there are defined columns and you can expect additional data to follow the same or similar structure (perhaps with some missing values).<br><br>Historically, data analysis was limited to structured data as it is well defined. More recently, advances in AI mean unstructured data is now more accessible.",
"termCode": "structured",
"related": [
"data",
"unstructured",
"database",
"sql"
]
},
{
"name": "Unstructured data",
"description": "Information which may have some structure (e.g. a name or some simple attributes or metadata), but by definition could contain a range of information.<br><br>For example, images contain some structured data (image size, image format, image name) but the contents of those images can vary completely. Audio also contains some structure (length, format) but can vary considerably.<br><br>Recent developments in approaches to AI, computational power and quantity of data have led to an increase in AI in unstructured data, such as medical imaging and speech to text.",
"termCode": "unstructured",
"related": [
"data",
"structured",
"database"
]
},
{
"name": "Explainability",
"acronym": "XAI",
"description": "AI can be built on complex algorithms and data, and explainability is a measure of how understandable, or explainable, the decisions of an AI system are to humans.<br><br>For example, an AI may predict which patients are most in need of surgery, but should be able to explain why it has prioritised patients in a certain way.<br><br>XAI (\"eXplainable Artificial Intelligence\") is where humans can understand how the results of an AI model were obtained.",
"termCode": "explainability",
"related": [
"ai",
"fairness",
"bias",
"local-explainability",
"general-explainability",
"ecological-fallacy"
]
},
{
"name": "Local explainability",
"description": "The ability to explain why an AI prediction has been made for a specific data point.<br><br>For example, an AI predicts that you should attend a follow-up appointment in 6 months, and is able to share what factors led to this specific, individual, decision (e.g. date of your last appointment, pre-existing conditions).",
"termCode": "local-explainability",
"related": [
"explainability",
"ai",
"fairness",
"bias",
"general-explainability"
]
},
{
"name": "General explainability",
"description": "Sometimes referred to as global explainability, general explainability is an approach to sharing what features or data points had the most influence over an AI model's predictions.<br><br>For example, in an AI system for predicting length of stay in hospital, age and location were the two most important factors.",
"termCode": "general-explainability",
"related": [
"explainability",
"feature",
"ai",
"fairness",
"bias",
"local-explainability"
]
},
{
"name": "Inference",
"description": "In AI, inference is the process of making a prediction from a model that has already been trained.<br><br>For example, a hospital may implement a new AI model that can suggest the best bed allocation for an incoming patient. Inference occurs when that new patient arrives, and the system is run to suggest a new allocation.",
"termCode": "inference",
"related": [
"ai",
"machine-learning",
"predictive-analytics",
"supervised"
]
},
{
"name": "Training",
"description": "Most types of AI require a training process, which uses historical data to build a model able to predict future cases.<br><br>For example, researchers have trained models to predict Covid-19 from patient X-rays using the NCCID database.",
"termCode": "training",
"related": [
"machine-learning",
"gradient-descent",
"supervised",
"training-data",
"test-data",
"validation-data",
"underfitting",
"overfitting"
]
},
{
"name": "Causality",
"description": "The influence of an event resulting, or causing, another.<br><br>For example, consuming more calories than you use up will lead to weight gain.<br><br>While many AI systems use patterns found in data to make predictions, very rarely are these patterns sufficient to determine the underlying cause of a behaviour.",
"termCode": "causality",
"related": [
"correlation",
"explainability"
]
},
{
"name": "Correlation",
"description": "A measure that expresses the extent to which data are related in a direct, or linear, way. It describes a relationship between data without making any statement about cause and effect (i.e. correlation does not mean causality). However, it can describe a positive or negative, also known as inverse, relationship between these data. Positive would be if one goes up the other goes up too, negative would be if one if one goes up the other one goes down.<br><br>For example, an individual's weight is correlated to their height and waiting times are correlated to the number of people waiting.",
"termCode": "correlation",
"related": [
"causality",
"unsupervised"
]
},
{
"name": "Ecological fallacy",
"description": "It occurs when attributing characteristics of a group to an individual part of that group. In other words, you should not make conclusions about individuals based on findings about the group they belong to.",
"termCode": "ecological-fallacy",
"related": [
"explainability"
]
},
{
"name": "Clinical trials",
"description": "A formal experiment used to scientifically evaluate the performance of a medicine, technology or process before being approved for widespread use.<br><br>For example, many medicines will undertake a series of trials to demonstrate their effectiveness and safety.<br><br>You can find out more about clinical trials in the UK at the <a target='_blank' href='https://www.nhs.uk/conditions/clinical-trials/'>NHS website</a>.",
"termCode": "clinical-trials",
"related": [
"test-data"
]
},
{
"name": "Fairness",
"description": "When individuals are not penalised by algorithms because they are part of a (sensitive) group.<br><br>For example, an AI algorithm used in law enforcement in the USA was shown to unfairly increase sentence recommendations based on racial background (an example of unfairness caused by discrimination bias).<br><br>Note that in <a href='https://ico.org.uk/for-organisations/guide-to-data-protection/guide-to-the-general-data-protection-regulation-gdpr/principles/lawfulness-fairness-and-transparency/#fairness' target='_blank'>data protection law</a>, fairness refers to the use of data in ways that people would reasonably expect, and that would not have unjustified adverse affects on them.",
"termCode": "fairness",
"related": [
"bias",
"explainability",
"ai",
"predictive-analytics",
"algorithm",
"data-protection"
]
},
{
"name": "Bias",
"description": "The disproportionate weighting in favour of, or against a specific item or individual. There are <a href='https://en.wikipedia.org/wiki/Bias' target='_blank'>many types of bias</a> that exist.<br><br>AI algorithms are often trained on historical data which can contain bias that exists in society, and are trained by humans who themselves have bias. Unless the algorithms are built with fairness in mind, they may repeat these biases in their predictions.<br><br>For example, an AI trained on CVs from engineers, who are historically predominantly male, may unfairly penalise CVs from women if the algorithms are not tested for fairness and modified to remove bias. This is an example of selection bias.",
"termCode": "bias",
"related": [
"fairness",
"explainability",
"ai",
"predictive-analytics",
"algorithm"
]
},
{
"name": "Proof of concept",
"acronym": "PoC",
"description": "A demonstration of the feasibility, or possibility, of a technology to be able to perform a task or solve a specific problem. A PoC is an early stage exploration, and would be followed by additional testing and engineering to ensure its viability in a real world setting.<br><br>For example, the <a target='_blank' href='https://www.nhsx.nhs.uk/ai-lab/ai-lab-programmes/skunkworks/'>NHS AI Lab Skunkworks</a> team publishes PoCs on a range of problems, such as using AI to allocate beds or predict length of stay in hospital.",
"termCode": "poc",
"related": [
"trl"
]
},
{
"name": "Technology readiness level",
"acronym": "TRL",
"description": "A <a target='_blank' href='https://en.wikipedia.org/wiki/Technology_readiness_level'>framework</a> developed by NASA to describe the different levels of maturity of a technology.<br><br>For example, a proof of concept may come under TRL level 4.<br><br>TRL levels start at 1 for a basic idea through to 9 for a fully deployed solution.",
"termCode": "trl",
"related": [
"poc"
]
},
{
"name": "Training data",
"description": "The data required to train, or “teach” a machine learning algorithm when developing a model.<br><br>Good quality training data that is reflective of the population, unbiased and large enough to ensure a robust model is a key prerequisite for AI.",
"termCode": "training-data",
"related": [
"test-data",
"validation-data",
"data-augmentation",
"training",
"supervised",
"predictive-analytics",
"machine-learning"
]
},
{
"name": "Test data",
"description": "Data that is not included in the training data, and used to test that a model has accurately identified the patterns in the data that result in the desired behaviour.<br><br>For example, the NCCID holds an external validation data set used to test commercial and academic models built to detect Covid-19 from chest X-rays and CT scans.",
"termCode": "test-data",
"related": [
"validation-data",
"training-data",
"supervised",
"training",
"predictive-analytics",
"machine-learning"
]
},
{
"name": "Cross validation",
"description": "An approach to reducing overfitting during model development, by iteratively selecting different portions of the data to train and validate a predictive (supervised) machine learning model.<br><br>Cross validation can increase the overall performance of a model, along with data augmentation techniques.",
"termCode": "cross-validation",
"related": [
"training-data",
"validation-data",
"overfitting",
"data-augmentation",
"training",
"supervised",
"predictive-analytics",
"machine-learning"
]
},
{
"name": "Validation data",
"description": "Data that is not included in the training data, but is used to check the performance of the model as it is being trained. This is separate to the test data used to check the final performance of the model.<br><br>This definition relates to the definition within AI, and not the regulatory aspects of medical devices.",
"termCode": "validation-data",
"related": [
"test-data",
"training-data",
"training",
"supervised",
"predictive-analytics",
"machine-learning"
]
},
{
"name": "Classification",
"description": "A type of machine learning model which can predict whether or not you belong to a specific class, or label. Common approaches include logistic regression, decision trees and random forests.<br><br>For example, a classification model may be built to identify individuals with diabetes. The classes could be Type-I diabetes, Type-II diabetes, gestational diabetes or no diabetes. Normally, a model would return the probability that you belonged to each class, and would assign you to the class with the highest probability.",
"termCode": "classification",
"related": [
"supervised",
"label",
"feature",
"ai",
"machine-learning",
"predictive-analytics",
"binary",
"regression"
]
},
{
"name": "Regression",
"description": "A type of machine learning model that predicts a continuous value, instead of a discrete value as is the case in classification models.<br><br>For example, a regression model may predict how long you will stay in a hospital bed upon admission. This value will be a numerical value e.g. 27 hours, or 28 hours.",
"termCode": "regression",
"related": [
"supervised",
"ai",
"machine-learning",
"predictive-analytics",
"classification"
]
},
{
"name": "Accuracy",
"description": "<em>Classification metric</em>.<br><br>The proportion of correctly identified positive and negative cases in a classification model.<br><br>For example, an AI model that correctly identifies all positive and negative cases of patients with e.g. Covid-19 would have an accuracy of 1.0 (100%).<br><br>Note that in <a href='https://ico.org.uk/for-organisations/guide-to-data-protection/key-dp-themes/guidance-on-artificial-intelligence-and-data-protection/' target='_blank'>data protection law</a>, accuracy refers to accurate and up to date record keeping.",
"termCode": "accuracy",
"related": [
"specificity",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative",
"operating-point",
"data-protection"
]
},
{
"name": "Specificity",
"description": "The ability of a classification model to correctly identify individuals <em>without</em> a condition. This is also known as the true negative rate.<br><br>For example, an AI tool that has been developed to detect lung cancer from medical images is said to be specific if it correctly identifies people who do not have lung cancer.",
"termCode": "specificity",
"related": [
"accuracy",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "Sensitivity",
"alternateName": "Recall",
"description": "The ability of a classification model to correctly identify individuals <em>with</em> a condition. This is also known as the true positive rate and recall. It is defined as the number of true positives over true positives and false negatives.<br><br>For example, an AI tool that has been developed to detect lung cancer from medical images is said to be sensitive if it correctly identifies people who have lung cancer.",
"termCode": "sensitivity",
"related": [
"accuracy",
"specificity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "False positive",
"description": "The incorrect prediction of a data point or individual having a specific outcome or class.<br><br>For example, if an AI tool incorrectly predicts you have diabetes.",
"termCode": "false-positive",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-negative",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "False negative",
"description": "The incorrect prediction of a data point or individual not having a specific outcome or class.<br><br>For example, if an AI tool incorrectly predicts you do not have diabetes, when in fact, you do.",
"termCode": "false-negative",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "True positive",
"description": "The correct prediction of a data point or individual having a specific outcome or class.<br><br>For example, if an AI tool correctly predicts you have diabetes.",
"termCode": "true-positive",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-negative",
"operating-point"
]
},
{
"name": "True negative",
"description": "The correct prediction of a data point or individual not having a specific outcome or class.<br><br>For example, if an AI tool correctly predicts you do not have diabetes.",
"termCode": "true-negative",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"operating-point"
]
},
{
"name": "Precision",
"description": "A way of measuring how effective a classification model is at detecting positive cases. It is the ratio of true positives over all positive cases (true and false positives).<br><br>For example, a model with high precision (1.0) will correctly identify all positive cases. Note this does not account for false negatives, and a high precision could be obtained by assigning every case as a positive case.",
"termCode": "precision",
"related": [
"accuracy",
"specificity",
"sensitivity",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "Operating point",
"description": "In a classification model, this is the point chosen to define when a case is positive or not. The location of this point will determine the model performance measured by true positives, false positives, true negatives and false negatives.<br><br>For an interactive demonstration on the impact on deciding the operating point, visit this <a target='_blank' href='https://nhsx.github.io/covid-chest-imaging-database/experiments'>NCCID operating point experiment</a>.",
"termCode": "operating-point",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"auc",
"roc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative"
]
},
{
"name": "Receiver Operator Characteristic",
"acronym": "ROC",
"description": "Used to create a plotted curve which demonstrates how the trade off between true positive rate and false positive rate changes as you vary the operating point of a classification model.<br><br>For an interactive demonstration of a ROC curve, visit this <a target='_blank' href='https://nhsx.github.io/covid-chest-imaging-database/experiments'>NCCID operating point experiment</a>.",
"termCode": "roc",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"auc",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "Area Under the (Receiver Operator Character) Curve",
"acronym": "AUC",
"description": "A single number calculated from a ROC curve to help summarise the performance of a classification model.",
"termCode": "auc",
"related": [
"roc",
"accuracy",
"specificity",
"sensitivity",
"precision",
"f1-score",
"false-positive",
"false-negative",
"true-positive",
"true-negative",
"operating-point"
]
},
{
"name": "F1 score",
"description": "A metric which describes the accuracy of a classification model, by combining the precision and sensitivity (recall) values into a single number, which ranges from 0 (poor accuracy) to 1 (high accuracy).",
"termCode": "f1-score",
"related": [
"precision",
"sensitivity",
"accuracy",
"false-positive",
"false-negative",
"true-positive",
"operating-point"
]
},
{
"name": "National COVID-19 Chest Imaging Database",
"acronym": "NCCID",
"description": "The NCCID is a national database that supports better understanding of COVID-19 and the development of technology enabling the best care for patients hospitalised with a severe infection.",
"termCode": "nccid",
"related": [
"database"
]
},
{
"name": "Interoperability",
"description": "The ability of digital systems to exchange information without requiring significant efforts to convert data from different formats. AI is built on data, and accessing data from different systems requires a level of standardisation and interoperability to ensure a robust model can be built and used.",
"termCode": "interoperability",
"related": [
"standard",
"rap",
"api"
]
},
{
"name": "Multimodal artificial intelligence",
"acronymn": "Multimodal AI",
"description": "An approach to AI which incorporates multiple types of data. For example, a speech-to-text model that is typically trained on audio and text data, could include image data of lip movements taken from video recordings.<br><br>Multimodal AI can combine both numerical data such as blood pressure, heart rate, and imaging data such as a CT scan.<br><br>In healthcare, the term is also associated with specific 'modalities' of data, such as the sequences in <a target='_blank' href='https://prostatecanceruk.org/about-us/projects-and-policies/mpmri'>mpMRI</a> scanning. However, the concept of multimodality with respect to AI is more than just bringing different types of data together - it is about how very different AI models effectively interoperate - even 'merge' - to create a whole that is greater than the sum of its parts.",
"termCode": "multimodal",
"related": [
"model",
"machine-learning",
"ai"
]
},
{
"name": "Neural network",
"description": "Neural networks are an approach to machine learning, loosely inspired by nature, that can describe complex relationships using a broader range of data than traditional approaches.<br><br>For example, neural networks can be trained on image data to describe features in medical images such as tumours. They can also be trained on free text such as clinical notes, allowing their use in clinical coding applications.<br><br>Neural networks can also be trained on tabulated, or structured data, such as a spreadsheet. Their ability to model complexity often comes at the cost of explainability, whereby the more complex the model, the harder to explain it becomes.",
"termCode": "neural-network",
"related": [
"deep-learning",
"cnn",
"rnn",
"graph-neural-network",
"model",
"feature",
"machine-learning",
"structured",
"unstructured",
"supervised",
"explainability"
]
},
{
"name": "Deep learning",
"description": "An approach to building models using neural networks with more than one 'hidden' layer of artificial neurons. This is a common approach when working with image and text data.<br><br>Deep learning models are able to capture complex relationships but can be difficult to interpret what data leads to a particular outcome.",
"termCode": "deep-learning",
"related": [
"neural-network",
"cnn",
"rnn",
"machine-learning",
"transformer",
"supervised",
"ai",
"computer-vision",
"explainability"
]
},
{
"name": "Overfitting",
"description": "The process of building a model which is based too closely on the data. This results in a model which may be very accurate on the training data, but when tested on additional datasets such as the test data, unseen data or data from a new environment, performs badly.<br><br>Approaches to reduce overfitting include cross-validation, data augmentation and ensemble techniques (which combine different models).",
"termCode": "overfitting",
"related": [
"underfitting",
"machine-learning",
"bias",
"model",
"training-data",
"test-data",
"cross-validation",
"data-augmentation"
]
},
{
"name": "Underfitting",
"description": "The process of building a model which is not based closely enough on the data. This results in a model which performs badly and fails to capture the relationships you are looking for.<br><br>There is a balance to be made between underfitting and overfitting.",
"termCode": "underfitting",
"related": [
"overfitting",
"machine-learning",
"model",
"training-data"
]
},
{
"name": "Data augmentation",
"description": "The process of artificially increasing the amount of data used to train a model, to reduce overfitting and improve model performance.<br><br>Commonly used in imaging applications, this can include rotating, cropping, adding noise or random levels of blur to existing images.",
"termCode": "data-augmentation",
"related": [
"overfitting",
"training",
"model",
"machine-learning"
]
},
{
"name": "Natural language processing",
"acronym": "NLP",
"description": "A collection of techniques which use speech and text data.<br><br>Speech-to-text systems convert verbal speech to text, such as in a smart speaker.<br><br>Natural language understanding systems convert text into concepts or instructions, such as your requests to play music or offer directions.<br><br>Text-to-speech systems convert text into verbal speech, such as responses from a smart speaker.<br><br>Natural language generation will create human-like text based on concepts, such as writing a report from a summary table of data.<br><br>NLP has developed significantly in recent years in part due to the availability of deep learning algorithms and transfomers.",
"termCode": "nlp",
"related": [
"ner",
"deep-learning",
"transformer",
"sequential-data",
"neural-network",
"rnn",
"machine-learning",
"supervised",
"ai"
]
},
{
"name": "Large Language Model",
"acronym": "LLM",
"description": "A large language model is a neural network that is trained on a vast amount of text. The training uses unlabelled text and some form of self-supervised learning. Usually, LLMs will have billions of parametres. They are an instance of a foundation model (FM).",
"termCode": "llm",
"related": [
"nlp",
"semi-supervised",
"foundation-model",
"neural-network",
"machine-learning",
"supervised",
"ai"
]
},
{
"name": "Foundation Model",
"acronym": "FM",
"description": "A foundation model is a ML model trained on large data. The data is unlabelled and the model is training by a self-supervised learning algorithm. ",
"termCode": "foundation-model",
"related": [
"nlp",
"semi-supervised",
"llm",
"neural-network",
"machine-learning",
"supervised",
"ai"
]
},
{
"name": "Transformer",
"description": "An approach to deep learning which uses an 'attention' mechanism to understand context in text or image data, without requiring data to be processed in order.<br><br>Transformers have led to recent breakthroughs in NLP and computer vision.",
"termCode": "transformer",
"related": [
"nlp",
"deep-learning",
"machine-learning",
"ai"
]
},
{
"name": "Confusion matrix",
"description": "A table that is used to describe the performance of a classification model on a set of test data for which the true values are known. It can be used to derive a number of measures such as sensitivity and specificity.<br><br>For example, a model may predict whether a patient has a form of diabetes, or not. It's performance can be described by writing out where it correctly or incorrectly predicted diabetes (positive) or not (negative) where we know the actual results:<br><br><table id='matrix-table'><thead><tr><th colspan='2' rowspan='2'></th><th colspan='2'>Predicted</th></tr><tr><th>Positive</th><th>Negative</th></tr></thead><tbody><tr><th rowspan='2'>Actual</th><th>Positive</th><td>True positive</td><td>False negative</td></tr><tr><th>Negative</th><td>False positive</td><td>True negative</td></tr></tbody></table>",
"termCode": "confusion-matrix",
"related": [
"accuracy",
"specificity",
"sensitivity",
"precision",
"false-positive",
"false-negative",
"true-positive",
"true-negative"
]
},
{
"name": "Linked data",
"description": "Data which has been combined (connected, or \"linked\") with other relevant data to increase the available information on a specific individual or population.<br><br>In health and care, an example would be linking GP records with hospital records, which are typically generated independently.<br><br>There are many different datasets that could be linked to improve the information available to treat patients, such as social care records, dental records and mental health records.",
"termCode": "linked-data",
"related": [