-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathch-050-vectors.html
940 lines (858 loc) · 44.2 KB
/
ch-050-vectors.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<title>ch-050-vectors.knit</title>
<script src="site_libs/header-attrs-2.11/header-attrs.js"></script>
<script src="site_libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="site_libs/bootstrap-3.3.5/css/bootstrap.min.css" rel="stylesheet" />
<script src="site_libs/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="site_libs/bootstrap-3.3.5/shim/respond.min.js"></script>
<style>h1 {font-size: 34px;}
h1.title {font-size: 38px;}
h2 {font-size: 30px;}
h3 {font-size: 24px;}
h4 {font-size: 18px;}
h5 {font-size: 16px;}
h6 {font-size: 12px;}
code {color: inherit; background-color: rgba(0, 0, 0, 0.04);}
pre:not([class]) { background-color: white }</style>
<script src="site_libs/navigation-1.1/tabsets.js"></script>
<link href="site_libs/highlightjs-9.12.0/default.css" rel="stylesheet" />
<script src="site_libs/highlightjs-9.12.0/highlight.js"></script>
<link href="site_libs/font-awesome-5.1.0/css/all.css" rel="stylesheet" />
<link href="site_libs/font-awesome-5.1.0/css/v4-shims.css" rel="stylesheet" />
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">code{white-space: pre;}</style>
<script type="text/javascript">
if (window.hljs) {
hljs.configure({languages: []});
hljs.initHighlightingOnLoad();
if (document.readyState && document.readyState === "complete") {
window.setTimeout(function() { hljs.initHighlighting(); }, 0);
}
}
</script>
<link rel="stylesheet" href="textbook.css" type="text/css" />
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
img {
max-width:100%;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
pre code {
padding: 0;
}
</style>
<style type="text/css">
.dropdown-submenu {
position: relative;
}
.dropdown-submenu>.dropdown-menu {
top: 0;
left: 100%;
margin-top: -6px;
margin-left: -1px;
border-radius: 0 6px 6px 6px;
}
.dropdown-submenu:hover>.dropdown-menu {
display: block;
}
.dropdown-submenu>a:after {
display: block;
content: " ";
float: right;
width: 0;
height: 0;
border-color: transparent;
border-style: solid;
border-width: 5px 0 5px 5px;
border-left-color: #cccccc;
margin-top: 5px;
margin-right: -10px;
}
.dropdown-submenu:hover>a:after {
border-left-color: #adb5bd;
}
.dropdown-submenu.pull-left {
float: none;
}
.dropdown-submenu.pull-left>.dropdown-menu {
left: -100%;
margin-left: 10px;
border-radius: 6px 0 6px 6px;
}
</style>
<script type="text/javascript">
// manage active state of menu based on current page
$(document).ready(function () {
// active menu anchor
href = window.location.pathname
href = href.substr(href.lastIndexOf('/') + 1)
if (href === "")
href = "index.html";
var menuAnchor = $('a[href="' + href + '"]');
// mark it active
menuAnchor.tab('show');
// if it's got a parent navbar menu mark it active as well
menuAnchor.closest('li.dropdown').addClass('active');
// Navbar adjustments
var navHeight = $(".navbar").first().height() + 15;
var style = document.createElement('style');
var pt = "padding-top: " + navHeight + "px; ";
var mt = "margin-top: -" + navHeight + "px; ";
var css = "";
// offset scroll position for anchor links (for fixed navbar)
for (var i = 1; i <= 6; i++) {
css += ".section h" + i + "{ " + pt + mt + "}\n";
}
style.innerHTML = "body {" + pt + "padding-bottom: 40px; }\n" + css;
document.head.appendChild(style);
});
</script>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "";
border: none;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
background-color: transparent;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<!-- code folding -->
</head>
<body>
<div class="container-fluid main-container">
<div class="navbar navbar-inverse navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="index.html">DATA SCIENCE I</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
</ul>
<ul class="nav navbar-nav navbar-right">
<li>
<a href="https://ds4ps.org/dp4ss/">
<span class="fa fa-university fa-2x"></span>
</a>
</li>
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
<div id="header">
</div>
<div id="TOC">
<ul>
<li><a href="#vectors-and-data-types" id="toc-vectors-and-data-types"><span class="toc-section-number">1</span> VECTORS AND DATA TYPES</a>
<ul>
<li><a href="#key-concepts" id="toc-key-concepts"><span class="toc-section-number">1.1</span> Key Concepts</a></li>
</ul></li>
<li><a href="#vectors" id="toc-vectors"><span class="toc-section-number">2</span> Vectors</a></li>
<li><a href="#data-types" id="toc-data-types"><span class="toc-section-number">3</span> Data Types</a>
<ul>
<li><a href="#common-vectors-functions" id="toc-common-vectors-functions"><span class="toc-section-number">3.1</span> Common Vectors Functions</a></li>
<li><a href="#casting" id="toc-casting"><span class="toc-section-number">3.2</span> Casting</a></li>
<li><a href="#care-when-casting" id="toc-care-when-casting"><span class="toc-section-number">3.3</span> Care When Casting</a></li>
<li><a href="#numeric-vectors" id="toc-numeric-vectors"><span class="toc-section-number">3.4</span> Numeric Vectors</a></li>
<li><a href="#character-vectors" id="toc-character-vectors"><span class="toc-section-number">3.5</span> Character Vectors</a></li>
<li><a href="#factors" id="toc-factors"><span class="toc-section-number">3.6</span> Factors</a></li>
<li><a href="#logical-vectors" id="toc-logical-vectors"><span class="toc-section-number">3.7</span> Logical Vectors</a></li>
<li><a href="#generating-vectors" id="toc-generating-vectors"><span class="toc-section-number">3.8</span> Generating Vectors</a></li>
<li><a href="#recycling" id="toc-recycling"><span class="toc-section-number">3.9</span> Recycling</a></li>
<li><a href="#missing-values-nas" id="toc-missing-values-nas"><span class="toc-section-number">3.10</span> Missing Values: NA’s</a></li>
</ul></li>
</ul>
</div>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<script src="https://cdn.datacamp.com/datacamp-light-latest.min.js"></script>
<div id="vectors-and-data-types" class="section level1" number="1">
<h1><span class="header-section-number">1</span> VECTORS AND DATA TYPES</h1>
<p>Vectors are the building blocks of data programming in R, so they are extremely important concepts.</p>
<p>This section will cover basic principles of working with vectors in the R language, including the different types of vectors (data types or “classes”), and common functions used on vectors.</p>
<p><br><br></p>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-1"></span>
<img src="figures/vectors.png" alt="Components of a Vector" width="40%" />
<p class="caption">
Figure 1.1: Components of a Vector
</p>
</div>
<p><br>
<br>
<br>
<br></p>
<div class="tip">
<p><strong>KEY CONCEPTS:</strong></p>
<p>In this chapter, we’ll learn about the four main types of vectors:</p>
<ul>
<li>numeric</li>
<li>character</li>
<li>factor</li>
<li>logical</li>
</ul>
<p><strong>TAKEAWAYS:</strong></p>
<ul>
<li>All data in R is an <strong>object</strong></li>
<li>Objects have <strong>classes</strong> that specify what type of object it is</li>
<li>Vectors can be numeric, character, factor or logical</li>
<li>Vectors are the building blocks of data frames - the columns of a dataset</li>
<li>They are created using constructors like the combine function <strong>c()</strong></li>
<li>You can change data types using <strong>casting</strong></li>
</ul>
<p><br>
<br></p>
</div>
<p><br>
<br></p>
<div id="key-concepts" class="section level2" number="1.1">
<h2><span class="header-section-number">1.1</span> Key Concepts</h2>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-2"></span>
<img src="figures/vectors.png" alt="Components of a Vector" width="60%" />
<p class="caption">
Figure 1.2: Components of a Vector
</p>
</div>
<div class="figure"><span style="display:block;" id="fig:unnamed-chunk-3"></span>
<img src="figures/data_types.png" alt="Basic data types in R" width="836" />
<p class="caption">
Figure 1.3: Basic data types in R
</p>
</div>
</div>
</div>
<div id="vectors" class="section level1" number="2">
<h1><span class="header-section-number">2</span> Vectors</h1>
<p>Generally speaking a vector is a set of numbers, words, or other values stored sequentially:</p>
<ul>
<li>[ 1, 2, 3]</li>
<li>[ apple, orange, pear ]</li>
<li>[ TRUE, FALSE, FALSE ]</li>
</ul>
<p>In social sciences, a vector usually represents a variable in a dataset, often as a column in a spreadsheet.</p>
<p>We might manually build a dataset by entering data as follows:</p>
<pre class="r"><code>strength <- c( 167, 185, 119, 142 )
name <- c( "adam", "jamal", "linda", "sriti" )
sex <- factor( c( "male", "male", "female", "female" ) )
study.group <- c( "treatment", "control", "treatment", "control" )
is.treated <- study.group == "treatment"
dat <- data.frame( name, sex, study.group, is.treated, strength )</code></pre>
<table style="width:79%;">
<colgroup>
<col width="11%" />
<col width="12%" />
<col width="19%" />
<col width="18%" />
<col width="18%" />
</colgroup>
<thead>
<tr class="header">
<th align="center">name</th>
<th align="center">sex</th>
<th align="center">study.group</th>
<th align="center">is.treated</th>
<th align="center">strength</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="center">adam</td>
<td align="center">male</td>
<td align="center">treatment</td>
<td align="center">TRUE</td>
<td align="center">167</td>
</tr>
<tr class="even">
<td align="center">jamal</td>
<td align="center">male</td>
<td align="center">control</td>
<td align="center">FALSE</td>
<td align="center">185</td>
</tr>
<tr class="odd">
<td align="center">linda</td>
<td align="center">female</td>
<td align="center">treatment</td>
<td align="center">TRUE</td>
<td align="center">119</td>
</tr>
<tr class="even">
<td align="center">sriti</td>
<td align="center">female</td>
<td align="center">control</td>
<td align="center">FALSE</td>
<td align="center">142</td>
</tr>
</tbody>
</table>
<p>Here are the important things to pay attention to:</p>
<ul>
<li>Each vector was created with the combine <strong>c()</strong> function.</li>
<li>Numbers do not require quotation marks around elements.</li>
<li>Characters require quotation marks.</li>
<li>The <em>is.treated</em> vector represents membership in a group.</li>
</ul>
</div>
<div id="data-types" class="section level1" number="3">
<h1><span class="header-section-number">3</span> Data Types</h1>
<p>There are four primary vector types (“classes”) in R:</p>
<table>
<thead>
<tr class="header">
<th>Class</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>numeric</td>
<td>Only numbers</td>
</tr>
<tr class="even">
<td>character</td>
<td>A vector of letters or words, always enclosed with quotes</td>
</tr>
<tr class="odd">
<td>factor</td>
<td>Categorical variables</td>
</tr>
<tr class="even">
<td>logical</td>
<td>TRUE or FALSE values</td>
</tr>
</tbody>
</table>
<p>Each type of vector serves different purposes:</p>
<ul>
<li><strong>numeric</strong>: keep track of quantitative measures, counts, or orders of things</li>
<li><strong>character</strong>: store non-numeric data, typically unstructured text</li>
<li><strong>factor</strong>: represent distinct and mutually-exclusive categories</li>
<li><strong>logical</strong>: disignate cases that meet some criteria, usually group inclusion</li>
</ul>
<p>Each vector in R is stored as an <strong>object</strong>, a technical term in computer science that we will discuss more later. For now know that each object has a <strong>class</strong> that represents the data type. We can ask R for the data types using the <strong>class()</strong> function:</p>
<pre class="r"><code>class( name )</code></pre>
<pre><code>## [1] "character"</code></pre>
<pre class="r"><code>class( strength )</code></pre>
<pre><code>## [1] "numeric"</code></pre>
<pre class="r"><code>class( study.group )</code></pre>
<pre><code>## [1] "character"</code></pre>
<pre class="r"><code>class( is.treated )</code></pre>
<pre><code>## [1] "logical"</code></pre>
<pre class="r"><code>class( dat )</code></pre>
<pre><code>## [1] "data.frame"</code></pre>
<div id="common-vectors-functions" class="section level2" number="3.1">
<h2><span class="header-section-number">3.1</span> Common Vectors Functions</h2>
<p>You will spend a lot of time creating data vectors, transforming variables, generating subsets, cleaning data, and adding new observations. These are all accomplished through <strong>functions</strong> that act on vectors.</p>
<p>Here are some common vector functions:</p>
<div id="vector-length" class="section level3" number="3.1.1">
<h3><span class="header-section-number">3.1.1</span> vector length</h3>
<p>We often need to know how many elements belong to a vector, which we find with the <strong>length()</strong> function.</p>
<pre class="r"><code>length( strength )</code></pre>
<pre><code>## [1] 4</code></pre>
</div>
<div id="combine" class="section level3" number="3.1.2">
<h3><span class="header-section-number">3.1.2</span> combine</h3>
<p>To combine several elements into a single vector, or combine two vectors to form one, use the <strong>c()</strong> function.</p>
<pre class="r"><code>c( 1, 2, 3 ) # create a numeric vector</code></pre>
<pre><code>## [1] 1 2 3</code></pre>
<pre class="r"><code>c( "a", "b", "c" ) # create a character vector</code></pre>
<pre><code>## [1] "a" "b" "c"</code></pre>
<p>Combining two vectors:</p>
<pre class="r"><code>x <- 1:5
y <- 10:15
c( x, y )</code></pre>
<pre><code>## [1] 1 2 3 4 5 10 11 12 13 14 15</code></pre>
<p>Combining two vectors of different data types:</p>
<pre class="r"><code>x <- c( 1, 2,3 )
y <- c( "a", "b", "c" )
c( x, y )</code></pre>
<pre><code>## [1] "1" "2" "3" "a" "b" "c"</code></pre>
<div class="quiz">
<p>What happened to the numeric elements here?</p>
</div>
</div>
</div>
<div id="casting" class="section level2" number="3.2">
<h2><span class="header-section-number">3.2</span> Casting</h2>
<p>You can easily move from one data type to another by <strong>casting</strong> a specific type as another type:</p>
<pre class="r"><code># character casting
x <- 1:5
x</code></pre>
<pre><code>## [1] 1 2 3 4 5</code></pre>
<pre class="r"><code>as.character(x) # numbers stored as text</code></pre>
<pre><code>## [1] "1" "2" "3" "4" "5"</code></pre>
<p>The rules for casting vary by data type. Take logical vectors, for example. Re-casting them as character vectors produces an expected result. What about as a numeric vector?</p>
<pre class="r"><code>y <- c( TRUE, FALSE, TRUE, TRUE, FALSE )
y</code></pre>
<pre><code>## [1] TRUE FALSE TRUE TRUE FALSE</code></pre>
<pre class="r"><code>as.character( y )</code></pre>
<pre><code>## [1] "TRUE" "FALSE" "TRUE" "TRUE" "FALSE"</code></pre>
<pre class="r"><code>as.numeric( y )</code></pre>
<pre><code>## [1] 1 0 1 1 0</code></pre>
<p>If you are familiar with boolean logic or dummy variables in statistics, it actually makes sense that TRUE would be represented as <strong>1</strong> in numeric form, and FALSE as <strong>0</strong>.</p>
<p>But in some cases it might not make sense to cast one variable type as another and we can get unexpected or unwanted behavior.</p>
<pre class="r"><code>z <- c( "a", "b", "c" )
z</code></pre>
<pre><code>## [1] "a" "b" "c"</code></pre>
<pre class="r"><code>as.numeric( z )</code></pre>
<pre><code>## [1] NA NA NA</code></pre>
<div class="tip">
<p>The element <strong>NA</strong> is read as <strong>NOT AVAILABLE</strong> or <strong>NOT APPLICABLE</strong>, and is the value R uses to represent missing or deleted data.</p>
<p>NA’s are really important (and somewhat annoying). We will discuss missing values more in-depth later.</p>
</div>
</div>
<div id="care-when-casting" class="section level2" number="3.3">
<h2><span class="header-section-number">3.3</span> Care When Casting</h2>
<p>Casting will often be induced automatically when you try to combine different types of data. For example, when you add a character element to a numeric vector, the whole vector will be cast as a character vector.</p>
<pre class="r"><code>x1 <- 1:5
x1</code></pre>
<pre><code>## [1] 1 2 3 4 5</code></pre>
<pre class="r"><code>x1 <- c( x1, "a" ) # a vector can only have one data type
x1 # all numbers silently recast as characters</code></pre>
<pre><code>## [1] "1" "2" "3" "4" "5" "a"</code></pre>
<p>If you consider the example above, when a numeric and character vector are combined all elements are re-cast as strings because numbers can be represented as characters but not vice-versa.</p>
<p>R tries to select a reasonable default type, but sometimes casting will create some strange and unexpected behaviors. Consider some of these examples.</p>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiJ4MSA8LSBjKDEsMiwzKSAgICAgICAgICAgICAgICAgICAgIyBudW1lcmljXG54MiA8LSBjKFwiYVwiLFwiYlwiLFwiY1wiKSAgICAgICAgICAgICAgIyBjaGFyYWN0ZXJcbngzIDwtIGMoVFJVRSxGQUxTRSxUUlVFKSAgICAgICAgICAjIGxvZ2ljYWxcbng0IDwtIGZhY3RvciggYyhcImFcIixcImJcIixcImNcIikgKSAgICAjIGZhY3RvclxuIyBjb21iaW5lIGEgbnVtZXJpYyBhbmQgbG9naWNhbCB2ZWN0b3JcbmNhc2UxIDwtIGMoIHgxLCB4MyApXG4jIGNvbWJpbmUgYSBjaGFyYWN0ZXIgYW5kIGxvZ2ljYWwgdmVjdG9yXG5jYXNlMiA8LSBjKCB4MiwgeDMgKVxuIyBjb21iaW5lIGEgbnVtZXJpYyBhbmQgZmFjdG9yIHZlY3RvclxuY2FzZTMgPC0gYyggeDEsIHg0IClcbiMgY29tYmluZSBhIGNoYXJhY3RlciBhbmQgZmFjdG9yIHZlY3RvclxuY2FzZTQgPC0gYyggeDIsIHg0ICkifQ==
</div>
<div class="question">
<p>Which data type will each step produce? Type the case# to see the results.</p>
</div>
<p><br></p>
<p>The answers to <em>case1</em> and <em>case2</em> are somewhat intuitive.</p>
<pre class="r"><code>case1 # combine a numeric and logical vector</code></pre>
<pre><code>## [1] 1 2 3 1 0 1</code></pre>
<p>Recall that TRUE and FALSE are often represented as 1 and 0 in datasets, so they can be recast as numeric elements. The numbers 2 and 3 have no meaning in a logical vector, so we can’t cast a numeric vector as a logical vector. This will default to numeric because we do not lose any information - the one’s and zero’s can always be re-cast back to logical vectors later if necessary.</p>
<pre class="r"><code>case2 # combine a character and logical vector</code></pre>
<pre><code>## [1] "a" "b" "c" "TRUE" "FALSE" "TRUE"</code></pre>
<p>Similarly characters have no meaning in the logical format, so we would have to replace them with NA’s if we converted the character vector to a logical vector.</p>
<pre class="r"><code>as.logical( x2 )</code></pre>
<pre><code>## [1] NA NA NA</code></pre>
<p>So converting the logical vector to characters allows us to retain all of the information in both vectors.</p>
<p><em>case3</em> and <em>case4</em> are a little more nuanced. See the section on factors below to make sense of them.</p>
<pre class="r"><code>case3 # combine a numeric and factor vector</code></pre>
<pre><code>## [1] 1 2 3 1 2 3</code></pre>
<pre class="r"><code>case4 # combine a character and factor vector</code></pre>
<pre><code>## [1] "a" "b" "c" "1" "2" "3"</code></pre>
<div class="tip">
<p>TIP: When you read data in from outside sources, R will sometimes try to guess the data types and store numeric or character vectors as factors. To avoid corrupting your data see the section below on factors for special instructions on re-casting factors as numeric vectors.</p>
</div>
</div>
<div id="numeric-vectors" class="section level2" number="3.4">
<h2><span class="header-section-number">3.4</span> Numeric Vectors</h2>
<p>There are some specific things to note about each vector type.</p>
<p>Math operators will only work on numeric vectors.</p>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiJ4MSA8LSBjKDEsMiwzKSAgICAgICAgICAgICAgICAgICAgIyBudW1lcmljXG54MiA8LSBjKFwiYVwiLFwiYlwiLFwiY1wiKSAgICAgICAgICAgICAgIyBjaGFyYWN0ZXJcbngzIDwtIGMoVFJVRSxGQUxTRSxUUlVFKSAgICAgICAgICAjIGxvZ2ljYWxcbng0IDwtIGZhY3RvciggYyhcImFcIixcImJcIixcImNcIikgKSAgICAjIGZhY3RvclxuXG5cbiMgYWRkIGFsbCBlbGVtZW50cyBpbiB0aGUgdmVjdG9yOlxuc3VtKCB4MSApXG4jIHRoZSBzdW1tYXJ5KCkgZnVuY3Rpb24gcmV0dXJucyBzdW1tYXJ5IHN0YXRzXG5zdW1tYXJ5KCB4MSApXG5cbnN1bSggeDIgKSJ9
</div>
<div class="question">
<p>Note that if we try to run this mathematical function we get an error:</p>
</div>
<p><br></p>
<p>Many functions in R are sensitive to the data type of vectors. Mathematical functions, for example, do not make sense when applied to text (character vectors). In many cases R will give an error.</p>
<p>In some cases R will silently re-cast the variable, then perform the operation. Be watchful for when silent re-casting occurs because it might have unwanted side effects, such as deleting data or re-coding group levels in the wrong way.</p>
<div id="integers" class="section level3" number="3.4.1">
<h3><span class="header-section-number">3.4.1</span> Integers</h3>
<p>Integers are simple numeric vectors. The integer class is used to save memory since integers require less RAM space than numbers that contain decimals points (you need to allocate space for the numbers to the left and the numbers to the right of the decimal). Google “computer memory allocation” if you are interested in the specifics.</p>
<p>If you are doing advanced programming you will be more sensitive to memory allocation and the speed of your code, but in the intro class we will not differentiate between the two types of number vectors. In <strong>most</strong> cases they result in the same results, unless you are doing advanced numerical analysis where rounding errors matter!</p>
<pre class="r"><code>n <- 1:5
n</code></pre>
<pre><code>## [1] 1 2 3 4 5</code></pre>
<pre class="r"><code>class( n )</code></pre>
<pre><code>## [1] "integer"</code></pre>
<pre class="r"><code>n[ 2 ] <- 2.01 # replace the second element with "2.01"
n # all elements converted to decimals</code></pre>
<pre><code>## [1] 1.00 2.01 3.00 4.00 5.00</code></pre>
<pre class="r"><code>class( n )</code></pre>
<pre><code>## [1] "numeric"</code></pre>
</div>
</div>
<div id="character-vectors" class="section level2" number="3.5">
<h2><span class="header-section-number">3.5</span> Character Vectors</h2>
<p>The most important rule to remember with this data type: when creating character vectors, all text must be enclosed by quotation marks.</p>
<p>This one works:</p>
<pre class="r"><code>c( "a", "b", "c" ) # this works</code></pre>
<pre><code>## [1] "a" "b" "c"</code></pre>
<p>This one will not:</p>
<pre class="r"><code>c( a, b, c )
# Error: object 'a' not found</code></pre>
<p>When you type characters surrounded by quotes then R knows you are creating new text (<strong>“strings”</strong> of letters in programming speak).</p>
<p>When you type characters that are not surrounded by quotes, R thinks that you are looking for an object in the environment, like the variables we have already created. It gets confused when it doesn’t find the object that you typed.</p>
<p>In general, you will use quotes when you are creating character vectors and for arguments in functions. You do not use quotes when you are referencing an active object.</p>
<p>An active object is typically a dataset or vector that you have imported or created. You can print a list of all active objects with the <code>ls()</code> function.</p>
<div id="quotes-in-arguments" class="section level3" number="3.5.1">
<h3><span class="header-section-number">3.5.1</span> Quotes in Arguments</h3>
<p>When you first start using R it can be confusing about when quotes are needed around arguments. Take the following example of the color argument (<code>col=</code>) in the <code>plot()</code> function.</p>
<pre class="r"><code>group <- factor( sample( c("treatment","control"), 100, replace=TRUE ) )
strength <- rnorm(100,100,30) + 50 * as.numeric( group=="treatment" )
par( mfrow=c(1,2) )
plot( strength, col="blue", pch=19, bty="n", cex=2 )
plot( strength, col=group, pch=19, bty="n", cex=2 )</code></pre>
<p><img src="ch-050-vectors_files/figure-html/unnamed-chunk-27-1.png" width="960" /></p>
<p><em>These graphs show patterns in the strength measures from our study. The first plots all subjects as blue, and the second plots subjects in the treatment group as red, control group as black.</em></p>
<p>In the first plot we are using a <strong>text argument</strong> to specify a color (<code>col="blue"</code>), so it must be enclosed by quotes.</p>
<p>In the second example R selects the color based upon group membership specified by the <strong>factor called ‘group’</strong>. Since the argument is now referencing an object (<code>col=group</code>), we do not use quotes.</p>
<p>The exception here is when your argument requires a number. <strong>Numbers are not passed with quotes, or they would be cast as text.</strong> For example, (<code>bty="n"</code>) tells the plot to not draw a box around the graph, and the <strong>cex</strong> argument controls the dot size: (<code>cex=2</code>).</p>
<p><img src="https://media2.giphy.com/media/kaq6GnxDlJaBq/giphy.gif" /></p>
<p>I know. I’m with you.</p>
</div>
</div>
<div id="factors" class="section level2" number="3.6">
<h2><span class="header-section-number">3.6</span> Factors</h2>
<p>When there are categorical variables within our data, or groups, then we use a special vector to keep track of these groups. We could just use numbers (1=female, 0=male) or characters (“male”,“female”), but factors are useful for two reasons.</p>
<p>First, it saves memory. Text is very “expensive” in terms of memory allocation and processing speed, so using simpler data structure makes R faster.</p>
<p>Second, when a variable is set as a factor, R recognizes that it represents a group and it can deploy object-oriented functionality. When you use a factor in analysis, R knows that you want to split the analysis up by groups.</p>
<pre class="r"><code>height <- c( 70, 68, 69, 74, 72, 69, 68, 73 )
strength <- c(167,185,119,142,175,204,124,117)
sex <- factor( c("male","male","female","female","male","male","female","female" ) )
par( mfrow=c(1,2) )
plot( height, strength, # two numeric vectors: scatter plot
pch=19, cex=3, bty="n" )
plot( sex, strength ) # factor + numeric: box and whisker plot </code></pre>
<p><img src="ch-050-vectors_files/figure-html/unnamed-chunk-28-1.png" width="768" /></p>
<div class="tip">
<p>Note in this example the same <strong>plot()</strong> function produced two different types of graphs, a scatterplot and a box and whisker plot. How does this work?</p>
<p>R uses the object type to determine behavior:
* If input vectors are both numeric, then produce scatterplot
* If input vectors are factor + numeric, then produce a box and whisker.</p>
<p>This is called object-oriented programming - the functions adapt based upon the type of object they are working with.</p>
<p>It makes the process of creating data recipes much faster! We will revisit this concept later.</p>
</div>
<p><br>
<br></p>
<p>Factors are more memory efficient than character vectors because they store the underlying data as a numeric vector instead of a categorical (text) vector. Each group in the data is assigned a number, and when printing items the program only has to remember which group corresponds to which number:</p>
<pre class="r"><code>as.numeric( sex )</code></pre>
<pre><code>## [1] 2 2 1 1 2 2 1 1</code></pre>
<pre class="r"><code># male = 2
# female = 1</code></pre>
<p>If you print a factor, the computer just replaces each category designation with its name (2 would be replaced with “male” in this example). These replacements can be done in real time without clogging the memory of your computer as they don’t need to be saved.</p>
<p>In some instances a categorical variable might be represented by numbers. For example, grades 9-12 for high school kids. These can be tricky to re-cast.</p>
<pre class="r"><code>grades <- sample( x=9:12, size=10, replace=T )
grades</code></pre>
<pre><code>## [1] 9 11 12 12 11 11 11 10 9 11</code></pre>
<pre class="r"><code>grades <- as.factor( grades )
grades</code></pre>
<pre><code>## [1] 9 11 12 12 11 11 11 10 9 11
## Levels: 9 10 11 12</code></pre>
<pre class="r"><code>as.numeric( grades )</code></pre>
<pre><code>## [1] 1 3 4 4 3 3 3 2 1 3</code></pre>
<pre class="r"><code>as.character( grades )</code></pre>
<pre><code>## [1] "9" "11" "12" "12" "11" "11" "11" "10" "9" "11"</code></pre>
<pre class="r"><code># proper way to get back to the original numeric vector
as.numeric( as.character( grades ))</code></pre>
<pre><code>## [1] 9 11 12 12 11 11 11 10 9 11</code></pre>
<p><br></p>
<div class="tip">
<p>The <strong>very important</strong> rule to remember with factors is you can’t move directly from the factor to numeric using the <strong>as.numeric()</strong> casting function. This will give you the underlying data structure, but will not give you the category names. To get these, you need the <strong>as.character</strong> casting function.</p>
</div>
<p><br>
<br></p>
<p>TIP: When reading data from Excel spreadsheets (usually saved in the comma separated value or CSV format), remember to include the following argument to prevent the creation of factors, which can produce some annoying behaviors.</p>
<pre class="r"><code>dat <- read.csv( "filename.csv", stringsAsFactors=F )</code></pre>
</div>
<div id="logical-vectors" class="section level2" number="3.7">
<h2><span class="header-section-number">3.7</span> Logical Vectors</h2>
<p>Logical vectors are collections of a set of TRUE and FALSE statements.</p>
<p>Logical statements allow us to define groups based upon criteria, then decide whether observations belong to the group. A logical statement is one that contains a logical operator, and returns only TRUE, FALSE, or NA values.</p>
<p>Logical vectors are important because organizing data into these sets is what drives all of the advanced data analytics (set theory is at the basis of mathematics and computer science).</p>
<table style="width:57%;">
<colgroup>
<col width="11%" />
<col width="12%" />
<col width="16%" />
<col width="16%" />
</colgroup>
<thead>
<tr class="header">
<th align="center">name</th>
<th align="center">sex</th>
<th align="center">treat</th>
<th align="center">strength</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="center">adam</td>
<td align="center">male</td>
<td align="center">treatment</td>
<td align="center">167</td>
</tr>
<tr class="even">
<td align="center">jamal</td>
<td align="center">male</td>
<td align="center">control</td>
<td align="center">185</td>
</tr>
<tr class="odd">
<td align="center">linda</td>
<td align="center">female</td>
<td align="center">treatment</td>
<td align="center">119</td>
</tr>
<tr class="even">
<td align="center">sriti</td>
<td align="center">female</td>
<td align="center">control</td>
<td align="center">142</td>
</tr>
</tbody>
</table>
<pre class="r"><code>dat$name == "sriti"</code></pre>
<pre><code>## [1] FALSE FALSE FALSE TRUE</code></pre>
<pre class="r"><code>dat$sex == "male"</code></pre>
<pre><code>## [1] TRUE TRUE FALSE FALSE</code></pre>
<pre class="r"><code>dat$strength > 180</code></pre>
<pre><code>## [1] FALSE TRUE FALSE FALSE</code></pre>
<p>When defining logical vectors, you can use the abbreviated versions of T for TRUE and F for FALSE.</p>
<pre class="r"><code>z1 <- c(T,T,F,T,F,F)
z1</code></pre>
<pre><code>## [1] TRUE TRUE FALSE TRUE FALSE FALSE</code></pre>
<p>Typically logical vectors are used in combination with subset operators to identify specific groups in the data.</p>
<pre class="r"><code># isolate data on all of the females in the dataset
dat[ dat$sex == "female" , ]</code></pre>
<pre><code>## name sex treat strength
## 3 linda female treatment 119
## 4 sriti female control 142</code></pre>
<p>See the <a href="http://ds4ps.org/dp4ss-textbook/p-050-business-logic.html">next chapter</a> for more details on subsets.</p>
</div>
<div id="generating-vectors" class="section level2" number="3.8">
<h2><span class="header-section-number">3.8</span> Generating Vectors</h2>
<p>You will often need to generate vectors for data transformations or simulations. Here are the most common functions that will be helpful.</p>
<div id="repeated-values" class="section level3" number="3.8.1">
<h3><span class="header-section-number">3.8.1</span> Repeated Values</h3>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiIjIHJlcGVhdCBhIG51bWJlciwgb3Igc2VyaWVzIG9mIG51bWJlcnNcbnJlcCggeD05LCB0aW1lcz01IClcbnJlcCggeD1jKDUsNyksIHRpbWVzPTUgKVxucmVwKCB4PWMoNSw3KSwgZWFjaD01IClcblxuIyBhbHNvIHdvcmtzIHRvIGNyZWF0ZSBjYXRlZ29yaWVzXG5yZXAoIHg9YyhcInRyZWF0bWVudFwiLFwiY29udHJvbFwiKSwgZWFjaD01ICkgICJ9
</div>
</div>
<div id="sequence-of-values" class="section level3" number="3.8.2">
<h3><span class="header-section-number">3.8.2</span> Sequence of Values</h3>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiIjIGNyZWF0ZSBhIHNlcXVlbmNlIG9mIG51bWJlcnNcbnNlcSggZnJvbT0xLCB0bz0xNSwgYnk9MSApXG5zZXEoIGZyb209MSwgdG89MTUsIGJ5PTMgKVxuXG4jIHNob3J0aGFuZCBpZiBieT0xXG4xOjE1ICAgXG4zOjYifQ==
</div>
</div>
<div id="random-sample" class="section level3" number="3.8.3">
<h3><span class="header-section-number">3.8.3</span> Random Sample</h3>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiIjIGNyZWF0ZSBhIHJhbmRvbSBzYW1wbGVcbmJhZy5vZi5sZXR0ZXJzIDwtIGMoXCJhXCIsXCJiXCIsXCJjXCIsXCJiXCIsXCJmXCIpXG5zYW1wbGUoIHg9YmFnLm9mLmxldHRlcnMsIHNpemU9MywgcmVwbGFjZT1GQUxTRSApXG5zYW1wbGUoIHg9YmFnLm9mLmxldHRlcnMsIHNpemU9MywgcmVwbGFjZT1GQUxTRSApXG5zYW1wbGUoIHg9YmFnLm9mLmxldHRlcnMsIHNpemU9MywgcmVwbGFjZT1GQUxTRSApXG5cbiMgZm9yIG11bHRpcGxlIHNhbXBsZXMgdXNlIHJlcGxhY2VtZW50XG5zYW1wbGUoIHg9YmFnLm9mLmxldHRlcnMsIHNpemU9MTAsIHJlcGxhY2U9VFJVRSApIn0=
</div>
</div>
<div id="draw-from-a-normal-distribution" class="section level3" number="3.8.4">
<h3><span class="header-section-number">3.8.4</span> Draw From a Normal Distribution</h3>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiIjIGNyZWF0ZSBkYXRhIHRoYXQgZm9sbG93cyBhIG5vcm1hbCBjdXJ2ZVxuIyBJUSBmb2xsb3dzIGEgbm9ybWFsIGRpc3RyaWJ1dGlvblxuIyB3aXRoIGEgbWVhbiBvZiAxMDAgYW5kIHNkIG9mIDE1XG5cbmlxIDwtIHJub3JtKCBuPTEwMDAsIG1lYW49MTAwLCBzZD0xNSApXG5oaXN0KCBpcSwgY29sPVwiZ3JheVwiIClcbmFibGluZSggaD1tZWFuKGlxKSwgY29sPVwiZGFya3JlZFwiICkifQ==
</div>
</div>
</div>
<div id="recycling" class="section level2" number="3.9">
<h2><span class="header-section-number">3.9</span> Recycling</h2>
<p>When we create a new variable from existing variables, it is called a “transformation”. This is very common in data science. Crime is measures by the number of assaults <em>per 100,000 people</em>, for example (crime / pop). A batting average is the number of hits divided by the number of at bats.</p>
<p>In R, mathematical operations are <em>vectorized</em>, which means that operations are performed on the entire vector all at once. This makes transformations fast and easy.</p>
<pre class="r"><code>x <- 1:10
x + 5</code></pre>
<pre><code>## [1] 6 7 8 9 10 11 12 13 14 15</code></pre>
<pre class="r"><code>x * 5</code></pre>
<pre><code>## [1] 5 10 15 20 25 30 35 40 45 50</code></pre>
<p>R uses a convention called “recycling”, which means that it will re-use elements of a vector if necessary. In the example below the x vector has 10 elements, but the y vector only has 5 elements. When we run out of y, we just start over from the beginning. This is powerful in some instances, but can be dangerous in others if you don’t realize that that it is happening.</p>
<pre class="r"><code>x <- 1:10
y <- 1:5
x + y</code></pre>
<pre><code>## [1] 2 4 6 8 10 7 9 11 13 15</code></pre>
<pre class="r"><code>x * y</code></pre>
<pre><code>## [1] 1 4 9 16 25 6 14 24 36 50</code></pre>
<pre class="r"><code># the colors are recycled
plot( 1:5, 1:5, col=c("red","blue"), pch=19, cex=3, bty="n" )</code></pre>
<p><img src="ch-050-vectors_files/figure-html/unnamed-chunk-45-1.png" width="768" /></p>
<p>Here is an example of recycling gone wrong:</p>
<table style="width:57%;">
<colgroup>
<col width="11%" />
<col width="12%" />
<col width="16%" />
<col width="16%" />
</colgroup>
<thead>
<tr class="header">
<th align="center">name</th>
<th align="center">sex</th>
<th align="center">treat</th>
<th align="center">strength</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="center">adam</td>
<td align="center">male</td>
<td align="center">treatment</td>
<td align="center">167</td>
</tr>
<tr class="even">
<td align="center">jamal</td>
<td align="center">male</td>
<td align="center">control</td>
<td align="center">185</td>
</tr>
<tr class="odd">
<td align="center">linda</td>
<td align="center">female</td>
<td align="center">treatment</td>
<td align="center">119</td>
</tr>
<tr class="even">
<td align="center">sriti</td>
<td align="center">female</td>
<td align="center">control</td>
<td align="center">142</td>
</tr>
</tbody>
</table>
<pre class="r"><code># create a subset of data of all female study participants
dat$sex == "female"</code></pre>
<pre><code>## [1] FALSE FALSE TRUE TRUE</code></pre>
<pre class="r"><code>these <- dat$sex == "female"
dat[ these, ] # correct subset</code></pre>
<pre><code>## name sex treat strength
## 3 linda female treatment 119
## 4 sriti female control 142</code></pre>
<pre class="r"><code># same thing with a mistake
# whoops! should be double equal for a logical statement
# the female element is recycled
# just wrote over my raw data!
dat$sex = "female"
these <- dat$sex == "female"
dat[ these , ]</code></pre>
<pre><code>## name sex treat strength
## 1 adam female treatment 167
## 2 jamal female control 185
## 3 linda female treatment 119
## 4 sriti female control 142</code></pre>
</div>
<div id="missing-values-nas" class="section level2" number="3.10">
<h2><span class="header-section-number">3.10</span> Missing Values: NA’s</h2>
<p>Missing values are coded differently in each data analysis program. SPSS uses a period, for example. In R, missing values are coded as “NA”.</p>
<p>The important thing to note is that R wants to make sure you know there are missing values if you are conducting analysis. As a result, it will give you the answer of “NA” when you try to do math with a vector that includes a missing value. You have to ask it explicitly to ignore the missing value.</p>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiJ4NSA8LSBjKCAxLCAyLCAzLCA0IClcbng1XG5zdW0oIHg1IClcbm1lYW4oIHg1IClcblxuXG54NSA8LSBjKCAxLCAyLCBOQSwgNCApXG54NVxuXG4jIHNob3VsZCBtaXNzaW5nIHZhbHVlcyBiZSB0cmVhdGVkIGFzIHplcm9zIG9yIGRyb3BwZWQ/IFxuXG5zdW0oIHg1IClcbm1lYW4oIHg1IClcbnN1bSggeDUsIG5hLnJtPVQgKSAgICMgbmEucm09VCBhcmd1bWVudCBkcm9wcyBtaXNzaW5nIHZhbHVlc1xubWVhbiggeDUsIG5hLnJtPVQgKSAgIyBuYS5ybT1UIGFyZ3VtZW50IGRyb3BzIG1pc3NpbmcgdmFsdWVzIn0=
</div>
<p>You cannot use the <em>==</em> operator to identify missing values in a dataset. There is a special <strong>is.na()</strong> function to locate all of the missing values in a vector.</p>
<div data-datacamp-exercise="" data-height="300" data-encoded="true">
eyJsYW5ndWFnZSI6InIiLCJzYW1wbGUiOiJ4NSA8LSBjKCAxLCAyLCBOQSwgNCApXG5cbiMgd2hpY2ggZWxlbWVudHMgYXJlIG1pc3Npbmc/XG54NSA9PSBOQSAgICAgICAjIHRoaXMgZG9lcyBub3QgZG8gd2hhdCB5b3Ugd2FudFxuaXMubmEoIHg1ICkgICAgIyBtdWNoIGJldHRlclxuXG4hIGlzLm5hKCB4NSApICAjIGlmIHlvdSB3YW50IHRvIGNyZWF0ZSBhIHNlbGVjdG9yIHZlY3RvciB0byBkcm9wIG1pc3NpbmcgdmFsdWVzIFxueDVbICEgaXMubmEoeDUpIF1cblxueDVbIGlzLm5hKHg1KSBdIDwtIDAgICMgcmVwbGFjZSBtaXNzaW5nIHZhbHVlcyB3aXRoIHplcm9cbng1In0=
</div>
</div>
</div>
<div class="footer">
<div class="row" align="center">
Notes for the <a href=http://ds4ps.org/ms-prog-eval-data-analytics/ target="_blank">MS in Program Evaluation and Data Analytics</a><br>
A program at <a href=https://asuonline.asu.edu/online-degree-programs/graduate/program-evaluation-and-data-analytics-ms/ target="_blank">Arizona State University</a><br>
Website powered by <a href=https://rmarkdown.rstudio.com/ target="_blank">R Markdown</a> and <a href=http://jekyllrb.com target="_blank">Jekyll</a>
<br>
<br>
</div>
</div>
</div>
<script>
// add bootstrap table styles to pandoc tables
function bootstrapStylePandocTables() {
$('tr.odd').parent('tbody').parent('table').addClass('table table-condensed');
}
$(document).ready(function () {
bootstrapStylePandocTables();
});
</script>
<!-- tabsets -->
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open');
});
});
</script>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>