-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path1.1-DecisionTrees.html
774 lines (742 loc) · 48.2 KB
/
1.1-DecisionTrees.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.4.549">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<meta name="dcterms.date" content="2024-03-17">
<title>Learning with (Decision) Trees</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="1.1-DecisionTrees_files/libs/clipboard/clipboard.min.js"></script>
<script src="1.1-DecisionTrees_files/libs/quarto-html/quarto.js"></script>
<script src="1.1-DecisionTrees_files/libs/quarto-html/popper.min.js"></script>
<script src="1.1-DecisionTrees_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="1.1-DecisionTrees_files/libs/quarto-html/anchor.min.js"></script>
<link href="1.1-DecisionTrees_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="1.1-DecisionTrees_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="1.1-DecisionTrees_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="1.1-DecisionTrees_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="1.1-DecisionTrees_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>
<script type="text/javascript">
const typesetMath = (el) => {
if (window.MathJax) {
// MathJax Typeset
window.MathJax.typeset([el]);
} else if (window.katex) {
// KaTeX Render
var mathElements = el.getElementsByClassName("math");
var macros = [];
for (var i = 0; i < mathElements.length; i++) {
var texText = mathElements[i].firstChild;
if (mathElements[i].tagName == "SPAN") {
window.katex.render(texText.data, mathElements[i], {
displayMode: mathElements[i].classList.contains('display'),
throwOnError: false,
macros: macros,
fleqn: false
});
}
}
}
}
window.Quarto = {
typesetMath
};
</script>
</head>
<body>
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction">Introduction</a>
<ul class="collapse">
<li><a href="#supervised-learning" id="toc-supervised-learning" class="nav-link" data-scroll-target="#supervised-learning">Supervised Learning</a></li>
</ul></li>
<li><a href="#decision-tree-basics" id="toc-decision-tree-basics" class="nav-link" data-scroll-target="#decision-tree-basics">Decision tree basics</a>
<ul class="collapse">
<li><a href="#decision-tree" id="toc-decision-tree" class="nav-link" data-scroll-target="#decision-tree">Decision tree</a></li>
<li><a href="#training-and-testing-decision-trees" id="toc-training-and-testing-decision-trees" class="nav-link" data-scroll-target="#training-and-testing-decision-trees">Training and testing decision trees</a></li>
<li><a href="#basic-notation" id="toc-basic-notation" class="nav-link" data-scroll-target="#basic-notation">Basic notation</a></li>
<li><a href="#tree-growing-procedure" id="toc-tree-growing-procedure" class="nav-link" data-scroll-target="#tree-growing-procedure">Tree-Growing Procedure</a></li>
<li><a href="#choosing-the-best-split-for-a-variable" id="toc-choosing-the-best-split-for-a-variable" class="nav-link" data-scroll-target="#choosing-the-best-split-for-a-variable">Choosing the best split for a Variable</a>
<ul class="collapse">
<li><a href="#splitting-strategies" id="toc-splitting-strategies" class="nav-link" data-scroll-target="#splitting-strategies">Splitting Strategies</a></li>
<li><a href="#node-information-functions" id="toc-node-information-functions" class="nav-link" data-scroll-target="#node-information-functions">Node information functions</a></li>
<li><a href="#information-gain" id="toc-information-gain" class="nav-link" data-scroll-target="#information-gain">Information gain</a></li>
</ul></li>
<li><a href="#example" id="toc-example" class="nav-link" data-scroll-target="#example">Example</a></li>
<li><a href="#recursive-partitioning" id="toc-recursive-partitioning" class="nav-link" data-scroll-target="#recursive-partitioning">Recursive Partitioning</a></li>
<li><a href="#stop-spliting" id="toc-stop-spliting" class="nav-link" data-scroll-target="#stop-spliting">Stop spliting</a></li>
<li><a href="#plurality-rule" id="toc-plurality-rule" class="nav-link" data-scroll-target="#plurality-rule">Plurality rule</a></li>
<li><a href="#choosing-the-best-split-in-regression-trees" id="toc-choosing-the-best-split-in-regression-trees" class="nav-link" data-scroll-target="#choosing-the-best-split-in-regression-trees">Choosing the best split in Regression Trees</a></li>
<li><a href="#estimating-the-misclassification-rate" id="toc-estimating-the-misclassification-rate" class="nav-link" data-scroll-target="#estimating-the-misclassification-rate">Estimating the misclassification rate</a></li>
<li><a href="#tree-pruning" id="toc-tree-pruning" class="nav-link" data-scroll-target="#tree-pruning">Tree Pruning</a>
<ul class="collapse">
<li><a href="#cost-complexity-pruning" id="toc-cost-complexity-pruning" class="nav-link" data-scroll-target="#cost-complexity-pruning">Cost complexity pruning</a></li>
</ul></li>
<li><a href="#advantages-and-disadvantages-of-trees" id="toc-advantages-and-disadvantages-of-trees" class="nav-link" data-scroll-target="#advantages-and-disadvantages-of-trees">Advantages and disadvantages of trees</a></li>
</ul></li>
<li><a href="#bibliography" id="toc-bibliography" class="nav-link" data-scroll-target="#bibliography">Bibliography</a></li>
</ul>
<div class="quarto-alternate-formats"><h2>Other Formats</h2><ul><li><a href="1.1-DecisionTrees.pdf"><i class="bi bi-file-pdf"></i>PDF</a></li></ul></div></nav>
</div>
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Learning with (Decision) Trees</h1>
</div>
<div class="quarto-title-meta">
<div>
<div class="quarto-title-meta-heading">Authors</div>
<div class="quarto-title-meta-contents">
<p>Esteban Vegas </p>
<p>Ferran Reverter </p>
<p>Alex Sanchez </p>
</div>
</div>
<div>
<div class="quarto-title-meta-heading">Published</div>
<div class="quarto-title-meta-contents">
<p class="date">March 17, 2024</p>
</div>
</div>
</div>
</header>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">options</span>(<span class="at">width=</span><span class="dv">100</span>) </span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="cf">if</span>(<span class="sc">!</span><span class="fu">require</span>(<span class="st">"knitr"</span>)) <span class="fu">install.packages</span>(<span class="st">"knitr"</span>)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(<span class="st">"knitr"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div>
<section id="introduction" class="level1">
<h1>Introduction</h1>
<p>Decision trees have been around for a number of years. Their recent revival is due to the discovery that ensembles of slightly different trees tend to produce much higher accuracy on previously unseen data, a phenomenon known as generalization. Ensembles of trees will be discussed but let us focus first on individual trees.</p>
<section id="supervised-learning" class="level2">
<h2 class="anchored" data-anchor-id="supervised-learning">Supervised Learning</h2>
<p>Classification and Regression are supervised learning task. Learning set is of the form: <span class="math display">\[
\mathcal{L}=\{(\mathbf{x}_i,y_i)\}_{i=1}^n
\]</span> Classification (Two classes, binary) <span class="math display">\[
\mathbf{x}\in\mathbb{R}^d,\qquad y\in\{-1,+1\}
\]</span> Regression <span class="math display">\[
\mathbf{x}\in\mathbb{R}^d,\qquad y\in\mathbb{R}
\]</span></p>
</section>
</section>
<section id="decision-tree-basics" class="level1">
<h1>Decision tree basics</h1>
<p><img src="images/rf0.jpg" class="img-fluid" style="width:75.0%"></p>
<section id="decision-tree" class="level2">
<h2 class="anchored" data-anchor-id="decision-tree">Decision tree</h2>
<p>We can see Fig.1.</p>
<ol type="1">
<li>A tree is a set of nodes and edges organized in a hierarchical fashion. In contrast to a graph, in a tree there are no loops. Internal nodes are denoted with circles and terminal nodes with squares.</li>
<li>A decision tree is a tree where each split node stores a boolean test function to be applied to the incoming data. Each leaf stores the final answer (predictor)</li>
</ol>
</section>
<section id="training-and-testing-decision-trees" class="level2">
<h2 class="anchored" data-anchor-id="training-and-testing-decision-trees">Training and testing decision trees</h2>
<p><img src="images/rf00.jpg" class="img-fluid" style="width:75.0%"></p>
</section>
<section id="basic-notation" class="level2">
<h2 class="anchored" data-anchor-id="basic-notation">Basic notation</h2>
<p>We can see Fig.2.</p>
<ol type="1">
<li>Input data is represented as a collection of points in the <span class="math inline">\(d\)</span>-dimensional space which are labeled by their feature responses.</li>
<li>A decision tree is a hierarchical structure of connected nodes.<br>
</li>
<li>Training a decision tree involves sending all training data <span class="math inline">\(\{v\}\)</span> into the tree and optimizing the parameters of the split nodes so as to optimize a chosen cost function.</li>
<li>During testing, a split (internal) node applies a test to the input data <span class="math inline">\(v\)</span> and sends it to the appropriate child. The process is repeated until a leaf (terminal) node is reached (beige path).</li>
</ol>
<p><img src="images/partition.jpg" class="img-fluid" style="width:75.0%"></p>
</section>
<section id="tree-growing-procedure" class="level2">
<h2 class="anchored" data-anchor-id="tree-growing-procedure">Tree-Growing Procedure</h2>
<p>We can see Fig.3.</p>
<p>In order to grow a classification tree, we need to answer four basic questions:</p>
<ol type="1">
<li>How do we choose the Boolean conditions for splitting at each node?</li>
<li>Which criterion should we use to split a parent node into its two child nodes?</li>
<li>How do we decide when a node become a terminal node (i.e., stop splitting)?</li>
<li>How do we assign a class to a terminal node?</li>
</ol>
</section>
<section id="choosing-the-best-split-for-a-variable" class="level2">
<h2 class="anchored" data-anchor-id="choosing-the-best-split-for-a-variable">Choosing the best split for a Variable</h2>
<section id="splitting-strategies" class="level3">
<h3 class="anchored" data-anchor-id="splitting-strategies">Splitting Strategies</h3>
<p>At each node, the tree-growing algorithm has to decide on which variable it is “best” to split. We need to consider every possible split over all variables present at that node, then enumerate all possible splits, evaluate each one, and decide which is best in some sense.</p>
<p>For a description of splitting rules, we need to make a distinction between ordinal (or continuous) and nominal (or categorical) variables. For a continuous or ordinal variable, the number of possible splits at a given node is one fewer than the number of its distinctly observed values. Suppose that a particular categorical variable is defined by <span class="math inline">\(m\)</span> distinct categories, there are <span class="math inline">\(2^{m-1}-1\)</span> distinct splits.</p>
<p>We first need to choose the best split for a given variable. Accordingly, we have to measure of goodness of a split. Let <span class="math inline">\(C_1\)</span>,…,<span class="math inline">\(C_K\)</span> be the <span class="math inline">\(K\geq 2\)</span> classes. For node <span class="math inline">\(\tau\)</span>, we denote by <span class="math display">\[
P(X\in C_k\vert \tau)
\]</span> the conditional probability that an observation <span class="math inline">\(x\)</span> is in <span class="math inline">\(C_k\)</span> given that it falls into the node <span class="math inline">\(\tau\)</span>.</p>
</section>
<section id="node-information-functions" class="level3">
<h3 class="anchored" data-anchor-id="node-information-functions">Node information functions</h3>
<p>We can see Fig.4</p>
<p>![Node impurity functions for the two-class case. The entropy function (rescaled) is the red curve, the Gini index is the green curve, and the resubstitution estimate of the misclassification rate is the blue curve. <img src="images/impurity.jpg" class="img-fluid" style="width:75.0%"></p>
<p>To choose the best split over all variables, we first need to choose the best split for a given variable. Accordingly, we define a measure of goodness of a split. For node <span class="math inline">\(\tau\)</span>, the node information function <span class="math inline">\(i(\tau)\)</span> is definded by <span class="math display">\[
i(\tau)=\phi\Big(p(1\vert \tau),...,p(K\vert \tau)\Big)
\]</span> where <span class="math inline">\(p(k\vert \tau)\)</span> is an estimate of <span class="math inline">\(P(X\in C_k\vert \tau)\)</span>.</p>
<p>We require <span class="math inline">\(\phi\)</span> to be a symmetric function, defined on the set of all <span class="math inline">\(K\)</span>-tuples of probabilities <span class="math inline">\((p_1,\dots,p_k)\)</span> with unit sum, minimized at the points <span class="math inline">\((1,0,\cdots,0)\)</span>, <span class="math inline">\((0,1,\cdots,0)\)</span>, <span class="math inline">\((0,0,\cdots,1)\)</span>, and maximized at the point <span class="math inline">\((\frac{1}{K},\frac{1}{K},\cdots,\frac{1}{K})\)</span>. One such function <span class="math inline">\(\phi\)</span> is the entropy function, <span class="math display">\[
i(\tau)=-\sum_{i=1}^K p(k\vert \tau)\log p(k\vert \tau)
\]</span></p>
<p>An alternative to the the entropy is the Gini index, given by</p>
<p><span class="math display">\[
i(\tau)=1-\sum_{i=1}^K p(k\vert \tau)^2
\]</span></p>
<p>And the misclassification rate</p>
<p><span class="math display">\[
i(\tau)=\sum_{i=1}^K p(k\vert \tau)(1-p(k\vert \tau))
\]</span></p>
</section>
<section id="information-gain" class="level3">
<h3 class="anchored" data-anchor-id="information-gain">Information gain</h3>
<p><img src="images/rf11.jpg" class="img-fluid" style="width:80.0%"></p>
<p>We can see Fig.5.</p>
<p>Suppose, at node <span class="math inline">\(\tau\)</span> , we apply split <span class="math inline">\(s\)</span> so that a proportion <span class="math inline">\(p_L\)</span> of the observations drops down to the left child-node <span class="math inline">\(\tau_L\)</span> and the remaining proportion <span class="math inline">\(p_R\)</span> drops down to the right child-node <span class="math inline">\(\tau_R\)</span></p>
<p>The goodness of split <span class="math inline">\(s\)</span> at node <span class="math inline">\(\tau\)</span> is given by the reduction in impurity gained by splitting the parent node <span class="math inline">\(\tau\)</span> into its child nodes, <span class="math inline">\(\tau_L\)</span> and <span class="math inline">\(\tau_R\)</span>,</p>
<p><span class="math display">\[
I(s,\tau)= i(\tau)-p_L i(\tau_L)-p_R i(\tau_R)
\]</span></p>
<p>The best split for the single variable <span class="math inline">\(X_j\)</span> is the one that has the largest value of <span class="math inline">\(I(s,\tau)\)</span> over all <span class="math inline">\(s\in\mathcal{S}_j\)</span>, the set of possible distinct splits for <span class="math inline">\(X_j\)</span> .</p>
</section>
</section>
<section id="example" class="level2">
<h2 class="anchored" data-anchor-id="example">Example</h2>
<p>Consider first, the parent node <span class="math inline">\(\tau\)</span>, estimate <span class="math inline">\(P(\frac{+1}{\tau})\)</span> by <span class="math inline">\(n_{+1}/n_{++}\)</span> and <span class="math inline">\(P(\frac{-1}{\tau})\)</span> by <span class="math inline">\(n_{+2}/n_{++}\)</span>, and then the estimated impurity function is: <span class="math display">\[
i(\tau)=-\big(\frac{n_{+1}}{n_{++}}\big)\log\big(\frac{n_{+1}}{n_{++}}\big)-\big(\frac{n_{+2}}{n_{++}}\big)\log\big(\frac{n_{+2}}{n_{++}}\big)
\]</span> Note that <span class="math inline">\(i(\tau)\)</span> is completely independent of the type of proposed split.</p>
<p>Now, for the child nodes, <span class="math inline">\(\tau_L\)</span> y <span class="math inline">\(\tau_R\)</span>. We estimated <span class="math inline">\(p_L=n_{1+}/n_{++}\)</span> and <span class="math inline">\(p_R=n_{2+}/n_{++}\)</span></p>
<p>For <span class="math inline">\(X_j\leq s\)</span>, we estimate <span class="math inline">\(P(\frac{+1}{\tau_L})=n_{11}/n_{1+}\)</span> and <span class="math inline">\(P(\frac{-1}{\tau_L})=n_{12}/n_{1+}\)</span> For the condition <span class="math inline">\(X_j> s\)</span>, we estimate <span class="math inline">\(P(\frac{+1}{\tau_R})=n_{21}/n_{2+}\)</span> and <span class="math inline">\(P(\frac{-1}{\tau_R})=n_{22}/n_{2+}\)</span>. We then compute: <span class="math display">\[
i(\tau_L)=-\big(\frac{n_{11}}{n_{1+}}\big)\log\big(\frac{n_{11}}{n_{1+}}\big)-\big(\frac{n_{12}}{n_{1+}}\big)\log\big(\frac{n_{12}}{n_{1+}}\big)
\]</span> <span class="math display">\[
i(\tau_R)=-\big(\frac{n_{21}}{n_{2+}}\big)\log\big(\frac{n_{21}}{n_{2+}}\big)-\big(\frac{n_{22}}{n_{2+}}\big)\log\big(\frac{n_{22}}{n_{2+}}\big)
\]</span> and, then we can compute <span class="math inline">\(I(s,\tau)\)</span>.</p>
</section>
<section id="recursive-partitioning" class="level2">
<h2 class="anchored" data-anchor-id="recursive-partitioning">Recursive Partitioning</h2>
</section>
<section id="stop-spliting" class="level2">
<h2 class="anchored" data-anchor-id="stop-spliting">Stop spliting</h2>
</section>
<section id="plurality-rule" class="level2">
<h2 class="anchored" data-anchor-id="plurality-rule">Plurality rule</h2>
<p>We can see Fig.6.</p>
<p>How do we associate a class with a terminal node?</p>
<p>Suppose at terminal node <span class="math inline">\(\tau\)</span> there are <span class="math inline">\(n(\tau)\)</span> observations, of which <span class="math inline">\(n_k(\tau)\)</span> are from class <span class="math inline">\(C_k\)</span>, <span class="math inline">\(k=1,...,K\)</span>. Then, the class which corresponds to the largest of the <span class="math inline">\(\{n_1(\tau),...,n_k(\tau)\}\)</span> is assigned to <span class="math inline">\(\tau\)</span>.</p>
<p>This is called the plurality rule. This rule can be derived from the Bayes’s rule classifier, where we assign the node <span class="math inline">\(\tau\)</span> to class <span class="math inline">\(C_i\)</span> if <span class="math inline">\(p(i|\tau) = \max_k p(k|\tau)\)</span>.</p>
<p><img src="images/rf1.jpg" class="img-fluid" style="width:75.0%"></p>
</section>
<section id="choosing-the-best-split-in-regression-trees" class="level2">
<h2 class="anchored" data-anchor-id="choosing-the-best-split-in-regression-trees">Choosing the best split in Regression Trees</h2>
<p>We now discuss the process of building a regression tree. Roughly speaking, there are two steps.</p>
<p>For instance, suppose that in Step 1 we obtain two regions, <span class="math inline">\(R_1\)</span> and <span class="math inline">\(R_2\)</span>, and that the response mean of the training observations in the first region is 10, while the response mean of the training observations in the second region is 20. Then for a given observation <span class="math inline">\(X=x\)</span>, if <span class="math inline">\(x\in R_1\)</span> we will predict a value of 10, and if <span class="math inline">\(x\in R_2\)</span> we will predict a value of 20.</p>
<p>We now elaborate on Step 1 above. How do we construct the regions <span class="math inline">\(R_1,...,R_J\)</span>? In theory, the regions could have any shape. However, we choose to divide the predictor space into high-dimensional rectangles, or boxes, for simplicity and for ease of interpretation of the resulting predictive model. The goal is to find boxes <span class="math inline">\(R_1,...,R_J\)</span>? that minimize the RSS (Residual Sum of Squares), given by <span class="math display">\[
\sum_{j=1}^J\sum_{i\in R_j}(y_i-\hat{y}_{R_j})^2
\]</span></p>
<p>where <span class="math inline">\(\hat{y}_{R_j}\)</span> is the mean response for the training observations within the j-th box. Unfortunately, it is computationally infeasible to consider every possible partition of the feature space into <span class="math inline">\(J\)</span> boxes. For this reason, we take a top-down, greedy approach that is known as recursive binary splitting. The approach is top-down because it begins at the top of the tree (at which point all observations belong to a single region) and then successively splits the predictor space; each split is indicated via two new branches further down on the tree. It is greedy because at each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better tree in some future step.</p>
<p>In order to perform recursive binary splitting, we first select the predictor <span class="math inline">\(X_j\)</span> and the cutpoint <span class="math inline">\(s\)</span> such that splitting the predictor space into the regions <span class="math inline">\(\{X|X_j \leq s\}\)</span> and <span class="math inline">\(\{X|X_j > s\}\)</span> leads to the greatest possible reduction in RSS. (The notation <span class="math inline">\(\{X|X_j \leq s\}\)</span> means the region of predictor space in which <span class="math inline">\(X_j\)</span> takes on a value less than <span class="math inline">\(s\)</span>). That is, we consider all predictors <span class="math inline">\(X_1,...,X_p\)</span>, and all possible values of the cutpoint <span class="math inline">\(s\)</span> for each of the predictors, and then choose the predictor and cutpoint such that the resulting tree has the lowest RSS. In greater detail, for any <span class="math inline">\(j\)</span> and <span class="math inline">\(s\)</span>, we define the pair of half-planes <span class="math display">\[
R_1(j,s)=\{X|X_j \leq s\},\qquad R_2(j,s)=\{X|X_j > s\}
\]</span> and we seek the value of <span class="math inline">\(j\)</span> and <span class="math inline">\(s\)</span> that minimize the equation <span class="math display">\[\begin{equation}\label{tree1}
\sum_{i:x_i\in R_1}(y_i-\hat{y}_{R_1})^2+\sum_{i:x_i\in R_2}(y_i-\hat{y}_{R_2})^2
\end{equation}\]</span></p>
<p>where <span class="math inline">\(\hat{y}_{R_1}\)</span> is the mean response for the training observations in <span class="math inline">\(R_1(j,s)\)</span>, and <span class="math inline">\(\hat{y}_{R_2}\)</span> is the mean response for the training observations in <span class="math inline">\(R_2(j,s)\)</span>. Finding the values of <span class="math inline">\(j\)</span> and <span class="math inline">\(s\)</span> that minimize (<span class="math inline">\(\ref{tree1}\)</span>) can be done quite quickly, especially when the number of features <span class="math inline">\(p\)</span> is not too large.</p>
<p>Next, we repeat the process, looking for the best predictor and best cutpoint in order to split the data further so as to minimize the RSS within each of the resulting regions. However, this time, instead of splitting the entire predictor space, we split one of the two previously identified regions. We now have three regions. Again, we look to split one of these three regions further, so as to minimize the RSS. The process continues until a stopping criterion is reached; for instance, we may continue until no region contains more than five observations.</p>
<p>Once the regions <span class="math inline">\(R_1,...,R_J\)</span> have been created, we predict the response for a given test observation using the mean of the training observations in the region to which that test observation belongs.</p>
</section>
<section id="estimating-the-misclassification-rate" class="level2">
<h2 class="anchored" data-anchor-id="estimating-the-misclassification-rate">Estimating the misclassification rate</h2>
<p>Let <span class="math inline">\(T\)</span> be the tree classifier and let <span class="math inline">\(\tilde{T}=\{\tau_1,\tau_2,...,\tau_L\}\)</span> denote the set of all terminal nodes of <span class="math inline">\(T\)</span>. We can now estimate the true misclassification rate, <span class="math display">\[\begin{equation}\label{e1}
R(T)=\sum_{l=1}^LR(\tau_l)P(\tau_l),
\end{equation}\]</span> for <span class="math inline">\(T\)</span>, where <span class="math inline">\(P(\tau)\)</span> is the probability that an observation falls into node <span class="math inline">\(\tau\)</span> and <span class="math inline">\(R(\tau)\)</span> is the within-node misclassification rate of an observation in node <span class="math inline">\(\tau\)</span>.</p>
<p>If we estimate <span class="math inline">\(R(\tau)\)</span> by <span class="math display">\[
r(\tau)=1-\max_kp(k|\tau)
\]</span> and we estimate <span class="math inline">\(P(\tau_l)\)</span> by the proportion <span class="math inline">\(p(\tau_l)\)</span> of all observations that fall into node <span class="math inline">\(\tau_l\)</span>, then, the resubstitution estimate of <span class="math inline">\(R(T)\)</span> is</p>
<p><span class="math display">\[
\hat{R}(T)=\sum_{l=1}^Lr(\tau_l)p(\tau_l).
\]</span></p>
<p>The resubstitution estimate <span class="math inline">\(\hat{R}(T)\)</span>, however, leaves much to be desired as an estimate of <span class="math inline">\(R(T)\)</span>.</p>
<p>First, bigger trees (i.e., more splitting) have smaller values of <span class="math inline">\(\hat{R}(T)\)</span>; that is, <span class="math inline">\(\hat{R}(T')\leq \hat{R}(T)\)</span>), where <span class="math inline">\(T'\)</span> is formed by splitting a terminal node of <span class="math inline">\(T\)</span>. For example, if a tree is allowed to grow until every terminal node contains only a single observation, then that node is classified by the class of that observation and <span class="math inline">\(\hat{R}(T)=0\)</span>.</p>
<p>Second, using only the resubstitution estimate tends to generate trees that are too big for the given data.</p>
<p>Third, the resubstitution estimate <span class="math inline">\(\hat{R}(T)\)</span> is a much-too-optimistic estimate of <span class="math inline">\(R(T)\)</span>. More realistic estimates of <span class="math inline">\(R(T)\)</span> are given below.</p>
</section>
<section id="tree-pruning" class="level2">
<h2 class="anchored" data-anchor-id="tree-pruning">Tree Pruning</h2>
<p>Since decision trees have a very high tendency to over-fit the data, a smaller tree with fewer splits might lead to increase the generalization capability. Lower variance (estimation error) at the cost of a little bias (approximation error).</p>
<p>One possible alternative to the process described above is to build the tree only so long as the decrease in the node impurity measure, due to each split exceeds some (high) threshold. However, due to greedy nature of the splitting algorithm, it is too short-sighted since a seemingly worthless split early on in the tree might be followed by a very good split i.e., a split that leads to a large reduction in impurity later on.</p>
<p>Therefore, a better strategy is to grow a very large tree <span class="math inline">\(T_0\)</span>, and then prune it back in order to obtain a subtree.</p>
<section id="cost-complexity-pruning" class="level3">
<h3 class="anchored" data-anchor-id="cost-complexity-pruning">Cost complexity pruning</h3>
<p>A sequence of trees indexed by a nonnegative tuning parameter <span class="math inline">\(\alpha\)</span> is considered. For each value of <span class="math inline">\(\alpha\)</span> there corresponds a subtree <span class="math inline">\(T\subset T_0\)</span> such that the penalized misclassification rate <span class="math display">\[
R_\alpha(T)=R(T)+\alpha|T|
\]</span> is as small as possible. Here <span class="math inline">\(|T|\)</span> indicates the number of terminal nodes of the subtree <span class="math inline">\(T\)</span>, Think of <span class="math inline">\(\alpha|T|\)</span> as a penalty term for tree size, so that <span class="math inline">\(R_\alpha(T)\)</span> penalizes <span class="math inline">\(R(T)\)</span> (<span class="math inline">\(\ref{e1}\)</span>) for generating too large a tree. For each <span class="math inline">\(\alpha\)</span>, we then choose that subtree <span class="math inline">\(T(\alpha)\)</span> of <span class="math inline">\(T_0\)</span> that minimizes <span class="math inline">\(R_\alpha(T)\)</span>.</p>
<p>The tuning parameter <span class="math inline">\(\alpha\)</span> controls a trade-off between the subtree’s complexity and its fit to the training data. When <span class="math inline">\(\alpha=0\)</span>, then the subtree <span class="math inline">\(T\)</span> will simply equal <span class="math inline">\(T_0\)</span>. As <span class="math inline">\(\alpha\)</span> increases, there is a price to pay for having a tree with many terminal nodes, and so the above equation will tend to be minimized for a smaller subtree.</p>
<p>Breiman et al. (1984) showed that for every <span class="math inline">\(\alpha\)</span>, there exists a unique smallest minimizing subtree.</p>
<p>Depending on the cost of each additional leaf (i.e. the <span class="math inline">\(\alpha\)</span> value) different sub-trees of <span class="math inline">\(T_0\)</span> minimise the error-complexity measure. Breiman and his colleagues proved that although <span class="math inline">\(\alpha\)</span> can run through a continuum of values there is a sequence of pruned trees such that each element is optimal for a range of <span class="math inline">\(\alpha\)</span>, and so there is only a finite number of interesting <span class="math inline">\(\alpha\)</span> values.</p>
<p><span class="math display">\[
0 = \alpha_0 < \alpha_1 < \alpha_2 < \alpha_3 < \cdots < \alpha_M,
\]</span></p>
<p>Furthermore, they developed an algorithm that generates a parametric family of pruned trees</p>
<p><span class="math display">\[
T_0 \prec T_1 \prec T_2 \prec T_3 \prec \cdots \prec T_M,
\]</span></p>
<p>such that each <span class="math inline">\(T_i\)</span> in the sequence is characterised by adifferent value <span class="math inline">\(\alpha_i\)</span>. They proved that each tree <span class="math inline">\(T_i\)</span> in this sequence is optimal from the error-complexity perspective within the interval <span class="math inline">\([\alpha_i,\alpha_{i+1})\)</span>.</p>
<p>So far, we have constructed a finite sequence of decreasing-size subtrees <span class="math inline">\(T_1,T_2,T_3,\dots,T_M\)</span> by pruning more and more nodes from <span class="math inline">\(T_0\)</span>. When do we stop pruning? Which subtree of the sequence do we choose as the “best” pruned subtree? Choice of the best subtree depends upon having a good estimate of the misclassification rate <span class="math inline">\(R(T_k)\)</span> corresponding to the subtree <span class="math inline">\(T_k\)</span>. Breiman et al. (1984) offered two estimation methods: use an independent test sample or use cross-validation. When the data set is very large, use of an independent test set is straightforward and computationally efficient, and is, generally, the preferred estimation method. For smaller data sets, crossvalidation is preferred.</p>
</section>
</section>
<section id="advantages-and-disadvantages-of-trees" class="level2">
<h2 class="anchored" data-anchor-id="advantages-and-disadvantages-of-trees">Advantages and disadvantages of trees</h2>
<ol type="1">
<li>Trees are very easy to explain to people. In fact, they are even easier to explain than linear regression!</li>
<li>Some people believe that decision trees more closely mirror human decision-making than do the regression and classification approaches.</li>
<li>Trees can be displayed graphically, and are easily interpreted even by a non-expert (especially if they are small).</li>
<li>Trees can easily handle qualitative predictors without the need to create dummy variables.</li>
<li>Unfortunately, trees generally do not have the same level of predictive accuracy as some of the other regression and classification approaches.</li>
</ol>
<p>However, by aggregating many decision trees, using methods like bagging, random forests, and boosting, the predictive performance of trees can be substantially improved. We introduce these concepts next.</p>
</section>
</section>
<section id="bibliography" class="level1">
<h1>Bibliography</h1>
<p>A. Criminisi, J. Shotton and E. Konukoglu. Decision Forests for Classification, Regression, Density Estimation, Manifold Learning and Semi-Supervised Learning. Microsoft Research technical report TR-2011-114.</p>
<p>The Elements of Statistical Learning: Data Mining, Inference, and Prediction. Second Edition. Springer. 2009.</p>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const toggleBodyColorMode = (bsSheetEl) => {
const mode = bsSheetEl.getAttribute("data-mode");
const bodyEl = window.document.querySelector("body");
if (mode === "dark") {
bodyEl.classList.add("quarto-dark");
bodyEl.classList.remove("quarto-light");
} else {
bodyEl.classList.add("quarto-light");
bodyEl.classList.remove("quarto-dark");
}
}
const toggleBodyColorPrimary = () => {
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
if (bsSheetEl) {
toggleBodyColorMode(bsSheetEl);
}
}
toggleBodyColorPrimary();
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const clipboard = new window.ClipboardJS('.code-copy-button', {
text: function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
});
clipboard.on('success', function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
});
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
return note.innerHTML;
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
// TODO in 1.5, we should make sure this works without a callout special case
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>