-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathps1_sg_update.tex
926 lines (819 loc) · 50.5 KB
/
ps1_sg_update.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
%% using aastex version 6.3
\documentclass[twocolumn]{aastex63}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%
%% The following section defines new commands for comments from co-authors
%%
\definecolor{DarkOrange}{RGB}{204, 85, 0}
\definecolor{LincolnGreen}{RGB}{17, 102, 0}
\definecolor{Rust}{HTML}{9B4F0F}
\definecolor{DarkCyan}{HTML}{008B8B}
\definecolor{MediumAquaMarine}{HTML}{66CDAA}
\def\ion#1#2{#1$\;${\footnotesize\rm{#2}}\relax}
\newcommand{\xander}[1]{{\color{red} XH: \textbf{#1}}}
\newcommand{\aam}[1]{{\color{DarkOrange} aam: \textbf{#1}}}
\newcommand{\todo}[1]{{\color{magenta} to-do: {#1}}}
\usepackage{lineno}
% \linenumbers
\usepackage{pifont}% http://ctan.org/pkg/pifont
\newcommand{\cmark}{\ding{51}}%
\newcommand{\xmark}{\ding{55}}%
\usepackage{multirow, amsmath}
%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Reintroduced the \received and \accepted commands from AASTeX v5.2
\received{\today}
\revised{}
\accepted{}
%% Command to document which AAS Journal the manuscript was submitted to.
%% Adds "Submitted to " the argument.
\submitjournal{PASP}
%% For manuscript that include authors in collaborations, AASTeX v6.3
%% builds on the \collaboration command to allow greater freedom to
%% keep the traditional author+affiliation information but only show
%% subsets. The \collaboration command now must appear AFTER the group
%% of authors in the collaboration and it takes TWO arguments. The last
%% is still the collaboration identifier. The text given in this
%% argument is what will be shown in the manuscript. The first argument
%% is the number of author above the \collaboration command to show with
%% the collaboration text. If there are authors that are not part of any
%% collaboration the \nocollaboration command is used. This command takes
%% one argument which is also the number of authors above to show. A
%% dashed line is shown to indicate no collaboration. This example manuscript
%% shows how these commands work to display specific set of authors
%% on the front page.
%%
%% For manuscript without any need to use \collaboration the
%% \AuthorCollaborationLimit command from v6.2 can still be used to
%% show a subset of authors.
%
%\AuthorCollaborationLimit=2
%
%% will only show Schwarz & Muench on the front page of the manuscript
%% (assuming the \collaboration and \nocollaboration commands are
%% commented out).
%%
%% Note that all of the author will be shown in the published article.
%% This feature is meant to be used prior to acceptance to make the
%% front end of a long author article more manageable. Please do not use
%% this functionality for manuscripts with less than 20 authors. Conversely,
%% please do use this when the number of authors exceeds 40.
%%
%% Use \allauthors at the manuscript end to show the full author list.
%% This command should only be used with \AuthorCollaborationLimit is used.
%% The following command can be used to set the latex table counters. It
%% is needed in this document because it uses a mix of latex tabular and
%% AASTeX deluxetables. In general it should not be needed.
%\setcounter{table}{1}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%
%% The following section outlines numerous optional output that
%% can be displayed in the front matter or as running meta-data.
%%
%% If you wish, you may supply running head information, although
%% this information may be modified by the editorial offices.
\shorttitle{PS1 Point Source Catalog II}
\shortauthors{Miller \& Hall}
%%
%% You can add a light gray and diagonal water-mark to the first page
%% with this command:
\watermark{DRAFT}
%% where "text", e.g. DRAFT, is the text to appear. If the text is
%% long you can control the water-mark size with:
%% \setwatermarkfontsize{dimension}
%% where dimension is any recognized LaTeX dimension, e.g. pt, in, etc.
%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\graphicspath{{./}{figures/}}
%% This is the end of the preamble. Indicate the beginning of the
%% manuscript itself with \begin{document}.
\begin{document}
\title{A Morphological Classification Model to Identify Unresolved PanSTARRS1 Sources II: Update to the PS1 Point Source Catalog}
%% LaTeX will automatically break titles if they run longer than
%% one line. However, you may use \\ to force a line break if
%% you desire. In v6.3 you can include a footnote in the title.
%% A significant change from earlier AASTEX versions is in the structure for
%% calling author and affiliations. The change was necessary to implement
%% auto-indexing of affiliations which prior was a manual process that could
%% easily be tedious in large author manuscripts.
%%
%% The \author command is the same as before except it now takes an optional
%% argument which is the 16 digit ORCID. The syntax is:
%% \author[xxxx-xxxx-xxxx-xxxx]{Author Name}
%%
%% This will hyperlink the author name to the author's ORCID page. Note that
%% during compilation, LaTeX will do some limited checking of the format of
%% the ID to make sure it is valid. If the "orcid-ID.png" image file is
%% present or in the LaTeX pathway, the OrcID icon will appear next to
%% the authors name.
%%
%% Use \affiliation for affiliation information. The old \affil is now aliased
%% to \affiliation. AASTeX v6.3 will automatically index these in the header.
%% When a duplicate is found its index will be the same as its previous entry.
%%
%% Note that \altaffilmark and \altaffiltext have been removed and thus
%% can not be used to document secondary affiliations. If they are used latex
%% will issue a specific error message and quit. Please use multiple
%% \affiliation calls for to document more than one affiliation.
%%
%% The new \altaffiliation can be used to indicate some secondary information
%% such as fellowships. This command produces a non-numeric footnote that is
%% set away from the numeric \affiliation footnotes. NOTE that if an
%% \altaffiliation command is used it must come BEFORE the \affiliation call,
%% right after the \author command, in order to place the footnotes in
%% the proper location.
%%
%% Use \email to set provide email addresses. Each \email will appear on its
%% own line so you can put multiple email address in one \email call. A new
%% \correspondingauthor command is available in V6.3 to identify the
%% corresponding author of the manuscript. It is the author's responsibility
%% to make sure this name is also in the author list.
%%
%% While authors can be grouped inside the same \author and \affiliation
%% commands it is better to have a single author for each. This allows for
%% one to exploit all the new benefits and should make book-keeping easier.
%%
%% If done correctly the peer review system will be able to
%% automatically put the author and affiliation information from the manuscript
%% and save the corresponding author the trouble of entering it by hand.
\author[0000-0001-9515-478X]{A.~A.~Miller}
\affiliation{Center for Interdisciplinary Exploration and Research in
Astrophysics (CIERA) and Department of Physics and Astronomy,
Northwestern University,
1800 Sherman Road, Evanston, IL 60201, USA}
\affiliation{The Adler Planetarium, Chicago, IL 60605, USA}
\email{[email protected]}
\author[0000-0002-9364-5419]{X.~Hall}
\affiliation{Cahill Center for Astrophysics,
California Institute of Technology,
1200 E.~California Boulevard, Pasadena, CA 91125, USA}
\affiliation{Center for Interdisciplinary Exploration and Research in
Astrophysics (CIERA) and Department of Physics and Astronomy,
Northwestern University,
1800 Sherman Road, Evanston, IL 60201, USA}
%% Note that the \and command from previous versions of AASTeX is now
%% depreciated in this version as it is no longer necessary. AASTeX
%% automatically takes care of all commas and "and"s between authors names.
%% AASTeX 6.3 has the new \collaboration and \nocollaboration commands to
%% provide the collaboration status of a group of authors. These commands
%% can be used either before or after the list of corresponding authors. The
%% argument for \collaboration is the collaboration identifier. Authors are
%% encouraged to surround collaboration identifiers with ()s. The
%% \nocollaboration command takes no argument and exists to indicate that
%% the nearby authors are not part of surrounding collaborations.
%% Mark off the abstract in the ``abstract'' environment.
\begin{abstract}
We present an update to the PanSTARRS-1 Point Source Catalog (PS1 PSC), which
provides morphological classifications of PS1 sources. The original PS1 PSC
adopted stringent detection criteria that excluded hundreds of millions of PS1
sources from the PSC. Here, we adapt the supervised machine learning methods
used to create the PS1 PSC and apply them to different photometric
measurements that are more widely available, allowing us to add $\sim$144
million new classifications while expanding the the total number of sources in
PS1 PSC by $\sim$10\%. We find that the new methodology, which utilizes PS1
forced photometry, performs $\sim$6--8\% worse than the original method. This
slight degradation in performance is offset by the overall increase in the
size of the catalog. The PS1 PSC is used by time-domain surveys to filter
transient alert streams by removing candidates coincident with
point sources that are likely to be Galactic in origin. The addition of
$\sim$144 million new classifications to the PS1 PSC will improve the
efficiency with which transients are discovered.
\end{abstract}
%% Keywords should appear after the \end{abstract} command.
%% See the online documentation for the full list of available subject
%% keywords and the rules for their use.
\keywords{Catalogs -- Surveys -- Astronomy data analysis -- Astrostatistics}
%% From the front matter, we move on to the body of the paper.
%% Sections are demarcated by \section and \subsection, respectively.
%% Observe the use of the LaTeX \label
%% command after the \subsection to give a symbolic KEY to the
%% subsection for cross-referencing in a \ref command.
%% You can use LaTeX's \ref and \label commands to keep track of
%% cross-references to sections, equations, tables, and figures.
%% That way, if you change the order of any elements, LaTeX will
%% automatically renumber them.
%%
%% We recommend that authors also use the natbib \citep
%% and \citet commands to identify citations. The citations are
%% tied to the reference list via symbolic KEYs. The KEY corresponds
%% to the KEY in the \bibitem in the reference list below.
\section{Introduction} \label{sec:intro}
The proliferation of wide-field time-domain surveys over the past $\sim$decade
has led to the discovery of a bevy of novel extragalactic transients
\citep[e.g.,][]{quimby11,Gezari12,Drout14,Gal-Yam14,Abbott17a,Prentice18,
IceCube-Collaboration18}. While these wide-field surveys have been enabled by
significant advances in detector technology, software has proven equally
important \citep[e.g.,][]{Masci17,Masci19,Smith20,Jones20} as many of these
critical discoveries have been facilitated by the rapid identification and
dissemination of new transient candidates in near real time
\citep[e.g.,][]{Patterson19}.
Reliable catalogs identifying stars and galaxies, or similarly unresolved and
resolved sources, are an essential cog in the machinery necessary to identify
extragalactic transients. On a nightly basis, time-domain surveys are inundated
with transient candidates, the vast majority of which are considered ``bogus''
\citep[e.g.,][]{Bloom12}. Despite sophisticated software capable of whittling
down the number of likely transients by several orders of magnitude
\citep[e.g.,][]{Brink13,Goldstein15,Duev19,Smith20}, the number of candidates
still vastly outpaces the spectroscopic resources necessary to classify
everything that varies \citep[e.g.,][]{Kulkarni20}. The aforementioned
star--galaxy catalogs therefore play an essential role in the search for
transients by removing stellar-like objects that are likely to be Galactic in
origin.
The PanSTARRS-1 Point Source Catalog \citep[PS1 PSC;][]{Tachibana18}, which
provides probabilistic point-source like classifications for $\sim$1.5 billion
sources detected by PanSTARRS-1 \citep[PS1;][]{Chambers16}, was designed
precisely to filter such sources. This catalog has been deployed by the Zwicky
Transient Facility \citep[ZTF;][]{Bellm19} and other surveys
\citep{Smith20,Moller20} to identify likely extragalactic transients. The PS1
PSC has been demonstrated to be an important ingredient in the systematic
search for extragalactic transients \citep[e.g.,][]{Fremling20,De20}.
A downside to the PS1 PSC is that it does not provide classifications for
sources that are not ``detected'' in the PS1 \texttt{StackObjectAttributes}
table \citep[see \S3 in][]{Tachibana18}. Of the $\sim$3 billion unique sources
in the PS1 \texttt{StackObjectAttributes} table, the vast majority of those
missing from the PS1 PSC are either spurious or have an extremely low
signal-to-noise ratio (S/N), such that the methods in \citet{Tachibana18}
would not provide a reliable classification. Additional sources are missing
from the PS1 PSC because there are multiple rows within the PS1
\texttt{StackObjectAttributes} table that have the same \texttt{ObjID} and
$\mathtt{primaryDetection} = 1$. By definition this should not happen, and
therefore these sources were excluded. For PS1 sources that are not in the PS1
PSC, ZTF reports a probability score $=0.5$, i.e., an ambiguous
classification, when cross-matching newly observed variables with the PS1
catalog (see \ref{app:cat_counts} for additional details about which PS1
sources are used by ZTF).
Here, we present an update to the PS1 PSC by classifying $\sim$144 million
sources that were previously ``missing'' from the catalog. These
classifications are made using different photometric measurements from the
ones adopted in \citet{Tachibana18}. While our new method performs slightly
worse than the one in \citet{Tachibana18}, we nevertheless achieve a similar
level of accuracy with the new model. We apply our new model to the $\sim$426
million ``missing'' sources (classifying $\sim$34\% of them), providing a new
and useful supplement to the PS1 PSC.\footnote{During the preparation of this
manuscript \citet{Beck20} published a new machine learning catalog (PS1-STRM)
to classify the $\sim$2.9 billion sources in the PS1 \texttt{ForcedMeanObject}
table. We highlight differences and similarities between the
\citeauthor{Beck20} catalog and this work in \S\ref{sec:discussion}.}
Alongside this paper, we have released our open-source software needed to
recreate the analysis in this study. These are available online at
\url{https://github.com/adamamiller/PS1_star_galaxy}. The update to the
ZTF–PS1 catalog created during this study is available as a High Level
Science Product via the Mikulski Archive for Space Telescopes (MAST) at
doi:\textbf{\textit{link still to be determined}}.
\section{ML Model Data}
\subsection{PS1 Data}
PS1 conducted a five filter ($g_\mathrm{PS1}$, $r_\mathrm{PS1}$,
$i_\mathrm{PS1}$, $z_\mathrm{PS1}$, $y_\mathrm{PS1}$) time-domain survey
covering $\sim$3/4 of the sky \citep{Chambers16}. PS1 provides three different
types of photometric measurements: there are mean flux measurements from the
individual PS1 exposures of each field, there are stack flux measurements from
the deeper stack images that co-add individual exposures, and there are
forced-flux measurements that measure the flux in individual exposures at the
location of all sources detected in the stack images. The mean photometry is
limited by the depth of the individual exposures, while the stack photometry
has a difficult to model point spread function (PSF) because images must be
warped before they can be co-added. The forced-flux measurements provide an
intermediate compromise as they are deeper than the mean flux measurements,
while in principle having a more stable PSF than the stack images.
\citet{Tachibana18} show that the stack photometry works best when
morphologically classifying resolved, extended sources and unresolved point
sources. The methodology that we adopt here is extremely similar to
\citet{Tachibana18}, but we instead use PS1 forced photometry to classify
sources that do not have suitable stack photometry. The
forced-photometry-based model leads to slightly lower quality classifications
(see \S\ref{sec:results}).
\subsection{ML Training Set}\label{sec:training_set}
As a training set for the model, we use deep observations of the COSMOS field
from the \textit{Hubble Space Telescope} (HST). The superior
resolution of HST enables reliable morphological classifications for
sources as faint as $\sim$25\,mag \citep{Leauthaud07}. There are 80,867 bright
HST sources from \citet{Leauthaud07} that have PS1 counterparts
\citep[within a 1\arcsec match radius; see][]{Tachibana18} in the PS1
\texttt{ForcedMeanObject} table (see \S\ref{sec:features}). Of those, the
47,825 PS1 sources with $\mathtt{nDetections} \ge 1$ are adopted as the
training set for our model. This training set is $\sim$1.6\%
larger than the one used in \citet{Tachibana18} because more HST/COSMOS sources
are ``detected'' in PS1 forced photometry.\footnote{For this work a source is considered ``detected'' only if the \texttt{FPSFFlux}, \texttt{FPSFFluxErr}, \texttt{FKronFlux}, \texttt{FKronFluxErr}, \texttt{FApFlux}, \texttt{FApFluxErr} are all $> 0$ in at least one filter.}
\section{ML Model Features}\label{sec:ML_features}
\subsection{PS1 Forced Photometry Features}\label{sec:features}
Regardless of the choice of algorithm, the basic goal of a machine learning
model is to build a map between source features, numerical and/or categorical
properties that can be measured for an individual source, and labels, the
target output, often a classification, of the model. This mapping is learned
via a training set, a subset of the data with known labels, after which the
model can classify any source based on its features.
\citet{Tachibana18} introduced the concept of ``white flux'' features, whereby
measurements in the five individual PS1 filters were summed, via a weighted
mean, to produce a ``total'' flux or shape measurement across all
filters.\footnote{Only filters in which the source is detected are included in
the sum, see Equations~1 and 2 in \citet{Tachibana18}.} Machine learning
models are limited by their training sets: there is no guarantee that their
empirical mapping will correctly extend beyond the boundaries enclosed by the
training set. Given the significant systematic uncertainties associated with
Galactic reddening, and the tendency for spectroscopic samples, which are
typically used to define training sets, to be biased in their target selection
\citep[see e.g.,][]{Miller17}, the motivation for ``white flux'' features
becomes clear: they reduce potential biases in the final classifications due
to selection effects in how the training set sources were targeted. Therefore,
as in \citet{Tachibana18}, we use ``white flux'' features in this study.
The PS1 \texttt{StackObjectAttributes} table provides both flux and shape
(e.g., second moment of the radiation intensity) measurements in each of the
five PS1 filters, whereas the PS1 \texttt{ForcedMeanObject} table only
provides flux measurements.\footnote{The PS1 \texttt{ForcedMeanObject} table
provides average measurements across all epochs on which a PS1 source is
observed, and the average second moment of the radiation intensity is somewhat
meaningless as the orientation of the detector and observing conditions vary
image to image.}
To create the feature set for our machine learning model, we create ``white
flux'' features for the six different flux measurements available in the
\texttt{ForcedMeanObject} table\footnote{The original PS1 PSC and the PS1-STRM
catalogs are both constructed using the first PS1 data release. This study
uses measurements from the second PS1 data release, which corrects a
percent-level flat-field correction that was applied with the wrong sign in
DR1 \citep{Beck20}.} (\texttt{FPSFFlux}, \texttt{FKronFlux}, \texttt{FApFlux},
\texttt{FmeanflxR5}, \texttt{FmeanflxR6}, \texttt{FmeanflxR7}), as well as the
\texttt{E1} and \texttt{E2} measurements, which represent the mean
polarization parameters from \citet{Kaiser95}. We use flux ratios, rather than
the raw flux measurements, which provide morphological classifications that
are independent of S/N \citep{Lupton01}.
\begin{figure*}
\centering
\includegraphics[width=6.5in]{./figures/whiteFeatures.pdf}
%
\caption{The primary square panels show Gaussian KDEs of the PDF for each
of the ``white flux'' features as a function of \texttt{whiteFKronMag}
($=-2.5\log_{10}[\mathtt{whiteFKronFlux}/3631]$) for all sources in the
training set. Unresolved point sources are shown via the red-purple
contours, while resolved, extended objects are shown via blue-green
contours. The shown contour levels extend from 0.9 to 0.1 in 0.1
intervals. To the right of each primary panel is a marginalized 1D KDE of
the PDF for the individual features, where the amplitudes of the KDEs have
been normalized by the relative number of point sources and extended
objects.}
%
\label{fig:features}
\end{figure*}
Our final model includes nine features, five flux ratios:
%
\begin{align*}
\mathtt{whiteFPSFApRatio} &= \frac{\mathtt{whiteFPSFlux}}{\mathtt{whiteFApFlux}},\\
\mathtt{whiteFPSFKronRatio} &= \frac{\mathtt{whiteFPSFlux}}{\mathtt{whiteFKronFlux}},\\
\mathtt{whiteFPSFFmeanflxR5Ratio} &= \frac{\mathtt{whiteFPSFlux}}{\mathtt{whiteFmeanflxR5Flux}},\\
\mathtt{whiteFPSFFmeanflxR6Ratio} &= \frac{\mathtt{whiteFPSFlux}}{\mathtt{whiteFmeanflxR6Flux}},\\
\mathtt{whiteFPSFFmeanflxR7Ratio} &= \frac{\mathtt{whiteFPSFlux}}{\mathtt{whiteFmeanflxR7Flux}},
\end{align*}
%
the white polarization parameters: \texttt{whiteE1} and \texttt{whiteE2}, and
two ``simple'' distance measures: \texttt{whiteFPSFApDist} and
\texttt{whiteFPSFKronDist} (see \S\ref{sec:simple_model}). The distribution of
these features for stars and galaxies in the training set is shown in
Figures~\ref{fig:features}, \ref{fig:psf_ap}, and \ref{fig:psf_kron}.
Figure~\ref{fig:features} shows that \texttt{whiteFPSFApRatio} is the most
useful feature, aside from the ``simple'' features, to separate resolved and
unresolved sources. This intuitively makes sense as PS1 \texttt{ApFlux}
measurements are matched to the seeing, whereas the \texttt{R5flx},
\texttt{R6flx}, \texttt{R7flx} measurements use fixed aperture sizes. With
multiple images taken under different observing conditions contributing to the
final forced flux measurements, fixed aperture measurements should be more
noisy.
\subsection{The ``Simple'' Distance Features}\label{sec:simple_model}
\citet{Tachibana18} introduced a ``simple'' model to classify sources based
solely on their measured \texttt{whitePSFFlux} and \texttt{whiteKronFlux}. The
model was inspired by the use of flux ratios, which have been shown to provide
a good discriminant between resolved and unresolved sources \citep[e.g., the
SDSS morphological \texttt{CLASS} parameter;][]{Lupton01}. At moderate to low
S/N, however, flux ratios no longer provide accurate classifications (see
e.g., Figure~\ref{fig:features}). The simple model from \citet{Tachibana18}
leverages this fact by measuring the distance of each source from a line drawn
in the \texttt{whitePSFFlux}--\texttt{whiteKronFlux} plane. Unlike a flux
ratio, the simple model preserves information about the S/N, meaning sources
with large absolute distances from the dividing line can be classified with
greater confidence.
Following from Equation~3 in \citet{Tachibana18}, ``simple'' features can be
calculated as:
%
\begin{equation}
\mathtt{whiteF1F2Dist}(a) =
\frac{\mathtt{whiteF1} - a\times\mathtt{whiteF2}}{ \sqrt{1 + a^2}},
\end{equation}
%
where \texttt{whiteF1} and \texttt{whiteF2} are the ``white flux''
measurements introduced in \S\ref{sec:features} (e.g.,
\texttt{whiteFKronFlux}), $a$ is the slope of the line in the
\texttt{whiteF1}--\texttt{whiteF2} plane, and \texttt{whiteF1F2Dist} is the
orthogonal distance of a source from the line (sources above the line have
positive values). For this study we construct two simple features for
inclusion in our machine learning model: \texttt{whiteFPSFFKronDist} and
\texttt{whiteFPSFFApDist}.
We determine the optimal value of $a$ for the simple features via cross
validation. We find $a = 0.7512$ for the \texttt{whiteFPSFFKronDist} feature
and $a = 0.7784$ for the \texttt{whiteFPSFFApDist} feature maximizes the FoM
(see \S\ref{sec:ML_model}). Empirically \texttt{whiteFPSFFApDist} is better at
separating resolved and unresolved sources than \texttt{whiteFPSFFKronDist},
and therefore the ``simple'' model, discussed below, is based on
\texttt{whiteFPSFFApDist}. The \texttt{whiteFPSFFApDist} and
\texttt{whiteFPSFFKronDist} distribution of resolved and unresolved sources is
shown in Figures~\ref{fig:psf_ap} and \ref{fig:psf_kron}, respectively.
\begin{figure}
\centering
\includegraphics[width=\columnwidth]{./figures/whiteFPSFApDist.pdf}
%
\caption{The distribution of $\mathtt{whiteFPSFKronDist}$ values for
resolved, extended sources and unresolved point sources from the training
set as a function of \texttt{whiteKronMag}. The colors and contours are
the same as Figure~\ref{fig:features}. The horizontal dashed line shows
the optimal threshold ($\mathtt{whiteFPSFKronDist} \ge 1.48 \times
10^{-6}$) for resolved--unresolved classification. The upper-right inset
shows a zoom-out highlighting the stark difference between stars and
galaxies at the bright end.}
%
\label{fig:psf_ap}
\end{figure}
\begin{figure}
\centering
\includegraphics[width=\columnwidth]{./figures/whiteFPSFKronDist.pdf}
%
\caption{Same as Figure~\ref{fig:psf_ap}, but showing the distribution for
\texttt{whiteFPSFKronDist}. A horizontal line is not shown as we do not
recommend the use of only \texttt{whiteFPSFKronDist} for
resolved-unresolved classification.}
%
\label{fig:psf_kron}
\end{figure}
\section{Training the ML Model}\label{sec:ML_model}
We construct a model to maximize the figure of merit (FoM) for our
morphological classification model. Our aim is to retain nearly all the
resolved, extended sources while excluding as many unresolved point sources as
possible. Thus, our FoM is defined as the true positive rate
(TPR)\footnote{$\mathrm{TPR} = \mathrm{TP}/(\mathrm{TP} + \mathrm{FP})$, where
TP is the total number of true positive classifications and FP is the number
of false positives.} at a fixed false positive rate
(FPR)\footnote{$\mathrm{FPR} = \mathrm{FP}/(\mathrm{FP}+\mathrm{TN})$, where
TN is the number of true negatives.} = 0.005.
Using the nine features from \S\ref{sec:ML_features}, we use the random forest
(RF) algorithm \citep{Breiman01}, as implemented in \texttt{scikit-learn}
\citep{Pedregosa11}, to classify PS1 sources as resolved or unresolved.
Briefly, the RF algorithm constructs an ensemble of decision trees
\citep{Breiman84}, where each tree is constructed using a bootstrapped sample
of the training set \citep[a method known as ``bagging'';][]{Breiman96} and
the split for each branch within the tree is selected from a random subset of
the full feature set. The result is a lower variance estimator than is
possible from a single decision tree.
To train the RF model, we replicate the procedure in \citet{Tachibana18}. We
use $k$-fold cross validation (CV) to optimize the model tuning parameters,
namely the number of trees in the forest $N_\mathrm{tree}$, the random number
of features for splitting at each node $m_\mathrm{try}$, and the minimum
number of sources in a terminal leaf of the tree $\mathtt{nodesize}$. Our CV
procedure utilizes both an inner and outer loop, each with $k = 10$ folds. In
the inner loop, a $k = 10$ folds CV grid search is performed over the three
tuning parameters, while predictions from the optimal grid location are
applied to the 1/10 of the training set that was withheld in the outer loop.
This process is then repeated for the remaining 9 folds in the outer loop. We
adopt the average results from the 10 different grid searches to arrive at
optimal model parameters of: $N_\mathrm{tree} = 900$, $m_\mathrm{try}
= 3$, and $\mathtt{nodesize} = 2$. The RF model results are not strongly
dependent on the final choice of tuning parameters.
\section{Results}\label{sec:results}
\subsection{Model Performance}
Our aim is to maximize the FoM of the RF model. We show receiver operating
characteristic (ROC) curves of the RF, simple, and PS1\footnote{The PS1 model
is defined by a single hard cut on the PSF--Kron flux ratio measured in the
$i_\mathrm{PS1}$ band \citep[for further details see][]{Tachibana18}.} models
in Figure~\ref{fig:hst_roc}. From Figure~\ref{fig:hst_roc}, it is clear that
the RF and simple models greatly outperform the PS1 model. Furthermore, while
the gains are modest, the inclusion of all the ``white flux'' features and use
of machine learning is justified as the RF model produces a higher FoM than
the simple model.
\begin{figure}[t]
\centering
\includegraphics[width=\columnwidth]{./figures/CV_ROC_FHST.pdf}
%
\caption{ ROC curves comparing the relative performance of the PS1, simple,
and RF models for HST sources with $i_\mathrm{PS1}$ detections. The thick
slate gray, green, and purple lines show the ROC curves for the PS1, simple,
and RF models, respectively. The light, thin lines show the ROC curves for
the individual CV folds. The inset on the right shows a zoom in around FPR =
0.005, shown as a dotted vertical line, corresponding to the FoM (the PS1
model is not shown in the inset, because it has very low FoM).}
%
\label{fig:hst_roc}
\end{figure}
The FoM of each of the three models is summarized in Table~\ref{tbl:hst_cv}.
In addition to providing the largest FoM, the RF model is also the most
accurate and it has the largest area under the ROC curve (ROC AUC). We
robustly conclude that, of the models considered here, the RF model is best.
Comparing with Table~1 in \citet{Tachibana18}, we find that the
forced-photometry features derived in this study do not provide the same
discriminating power as the PS1 stack-photometry features used in
\citet{Tachibana18}. Our new model performs $\sim$7\% worse than the one in
\citet{Tachibana18}. In \S\ref{sec:ztf_pipeline}, we argue that this slight
reduction in performance is more than offset by the $\sim$144 million
additional sources that are now classified using the forced-photometry
features.
\input{tables/hst_cv.tex}
We show the CV accuracy of the RF, simple, and PS1 models as a function of
\texttt{whiteFKronMag} in Figure~\ref{fig:hst_acc}. As in \citet{Tachibana18},
we find that the RF model provides more accurate classifications than the
alternatives.
\begin{figure}[t]
\centering
\includegraphics[width=\columnwidth]{./figures/CV_Accuracy_FHST.pdf}
%
\caption{\textit{Top}: Model accuracy as a function of
\texttt{whiteFKronMag} for HST sources with $i_\mathrm{PS1}$ detections.
Accuracy curves for the PS1, simple and RF models are shown as slate gray
pentagons, green triangles, and purple circles, respectively. The bin widths
are 0.5\,mag, and the error bars represent the 68\% interval from bootstrap
resampling. Additionally, a Gaussian KDE of the PDF for the training set, as
well as the unresolved point sources and resolved, extended objects in the
same subset is shown in the shaded gray, red, and green regions,
respectively. The amplitude of the star and galaxy PDFs have been normalized
by their relative ratio compared to the full $i_\mathrm{PS1}$-band subset.
\textit{Bottom}: accuracy of resolved and unresolved classifications as a
function of \texttt{whiteFKronMag} from the RF model (i.e., the TPR when
treating each class as the positive class). Nearly all the resolved sources
are correctly classified, because they dominate by number at low S/N (see
text), while only bright unresolved sources are correctly classified.}
%
\label{fig:hst_acc}
\end{figure}
The accuracy of each model shown in Figure~\ref{fig:hst_acc} decreases for
lower S/N sources. The accuracy curve for the RF and simple models feature a
slight departure from expectation in that they do not decrease much from 22 to
24\,mag. This quasi-plateau in the model accuracy can be understood as the
result of two components of the training set: (i) unresolved sources
completely dominate the source counts at these magnitudes, and (ii) the
well-defined locus of unresolved sources in the training set (see
Figure~\ref{fig:features}) becomes heavily blended with the resolved source
population at these brightness levels. Taken together the model will be biased
towards classifying all faint sources as resolved, despite the fact that we do
not explicitly include flux measurements in the feature set. With 88.5\% of
the $\mathtt{whiteFKronMag} > 22.5$\,mag training set sources being
unresolved, a quasi-plateau in accuracy of $\sim$88\% makes sense. This is
confirmed in the bottom panel of Figure~\ref{fig:hst_acc}, which shows the RF
model true positive rate (TPR) for both resolved and unresolved sources as a
function of \texttt{whiteFKronMag}. A near 100\% TPR for faint resolved
sources combined with a few correctly classified unresolved sources leads to
the observed quasi-plateau in Figure~\ref{fig:hst_acc}.
\subsection{The Updated PS1 PSC Catalog}\label{sec:ps1psc_update}
With a new RF model in hand, we can now provide morphological classifications
for the PS1 sources that are currently missing from the PS1 PSC. Of the
$\sim$426 million ``missing'' sources, $\sim$144 million have PS1 DR2
\texttt{ForcedMeanObject} photometry that pass our detection criteria (see
\ref{app:cat_counts} for more details). A histogram showing the distribution
of the RF classification score for these newly classified sources is shown in
Figure~\ref{fig:psc_update}.
\begin{figure}
\centering
\includegraphics[width=\columnwidth]{./figures/PS1_PSC_update_hist.pdf}
%
\caption{Histogram showing the RF classification scores for the $\sim$144
million newly classified sources from PS1. All of the newly classified
sources are shown in blue, while Galactic plane sources ($|b| <
5^{\circ}$) are shown in orange, and high galactic latitude sources ($|b|
> 30^{\circ}$) are shown in grey. The vertical dotted line shows the
conservative classification threshold adopted in \citet{Tachibana18}
(sources to the right of the line are considered point sources). The vast
majority of the newly classified sources are in the Galactic plane.}
%
\label{fig:psc_update}
\end{figure}
Figure~\ref{fig:psc_update} shows that there are relatively few
high-confidence classifications (i.e., very likely extended sources with RF
score $\approx 0$ or very likely point sources with RF score $\approx 1$)
among the ``missing'' sources. Figure~\ref{fig:psc_update} also reveals the
likely explanation for this outcome: the vast majority of the newly classified
sources are in the Galactic plane. Of the $\sim$144 million newly classified
sources, $\sim$57\% have galactic latitude $\lvert b \rvert < 5$\,deg, while
$> 95$\% are in the Galactic plane ($\lvert b \rvert < 15$\,deg). The HST
COSMOS field, from which we derive our training set, has $b \approx 42$\,deg
and as a result includes very few stellar blends, which are common at low
galactic latitudes. The PS1 PSC also has significantly lower confidence
classifications in the Galactic plane \citep[see Figure~8 in][]{Tachibana18}.
That these sources were not ``detected'' in the PS1 stack images also suggests
that it is difficult to make reliable photometric measurements using the PS1
data, which could also contribute to the lower confidence classifications. We
use the third data release from the space-based \textit{Gaia} telescope
\citep{Perryman01} to improve this situation by classifying many of these
ambiguous sources as stars via parallax and proper motion measurements
(\S\ref{sec:gaia}).
Ultimately, this update to the PS1 PSC has identified 17,945,494 likely point
sources using the optimized threshold from \citet[][RF score $\ge
0.83$]{Tachibana18}. While this number is small compared to the $\sim$734
million point sources in the original PS1 PSC, these $\sim$18 million newly
identified point sources would otherwise pass filters looking for
extragalactic transients in the ZTF alert stream. Their removal will reduce
the number of false positive transient candidates.
\section{Deployment in the ZTF Real-Time Pipeline}\label{sec:ztf_pipeline}
The ZTF real-time pipeline \citep{Masci19} provides AVRO alert packets
\citep[see][]{Patterson19} containing information (e.g., flux, position,
nearest neighbors) about any newly discovered sources of variability. The
packets include morphological classifications, based on the PS1 PSC
\citep{Tachibana18}, for the three closest closed sources in the ZTF
\texttt{Stars} table that are within 30\arcsec\ of the newly observed variable
source (see ~\ref{app:cat_counts} for a summary of the PS1 sources included in
the ZTF \texttt{Stars} table). There are $\sim$426 million PS1 sources in the
ZTF \texttt{Stars} table are not classified in the original PS1 PSC (see
\S\ref{sec:ps1psc_update}).
\subsection{Updating RF Classifications with Gaia Stars}\label{sec:gaia}
The \textit{Gaia} Early Data Release 3 includes high-precision astrometric
measurements collected over a 34 month timespan for $\sim$1.8 billion sources
\citep{Gaia-Collaboration20}. Within ZTF, the PS1 PSC is primarily used to
identify likely stars (i.e., point sources) and remove them from filters
searching for extragalactic transients. To that end, we can supplement the RF
classifications described in \S\ref{sec:ps1psc_update} with \textit{Gaia}
stars, which are identified via high-significance parallax and proper motion
detections.
A common threshold for determining ``high-significance'' is $\mathrm{S/N} \ge
5$, which in the case of gaussian uncertainties corresponds to a
$\sim$3$\times 10^{-7}$ probability that the observed signal is the result of
noise. We can therefore select stars from \textit{Gaia} sources with high S/N
parallax or proper motion measurements.\footnote{The total proper motion is
estimated by adding the proper motion in Right Ascension and Declination in
quadrature, see \citet{Tachibana18} for the corresponding uncertainty on this
quantity.} We adopt conservative significance thresholds because the formal
uncertainties from \textit{Gaia} are slightly underestimated
\citep{Fabricius20} and because most of the ``missing'' sources in the ZTF
\texttt{Stars} table are in the Galactic Plane (e.g.,
Figure~\ref{fig:psc_update}). \citet{Fabricius20} estimate that \textit{Gaia}
parallax measurements underestimate the uncertainties by a much as $\sim$60\%
in crowded regions. Similarly, proper motions are found to be underestimated
by as much $\sim$80\% in crowded regions \citep{Fabricius20}. We therefore
only consider \textit{Gaia} sources with a parallax $\mathrm{S/N} \ge 8$ or a
total proper motion $\mathrm{S/N} \ge 9$ to be stars.
Using the ESA \textit{Gaia}
archive\footnote{\url{https://gea.esac.esa.int/archive/}} we find there are
18,658,572 sources with either a high-significance parallax or proper motion
detection in the ZTF \texttt{Stars} table that lack a classification in the
original PS1 PSC. For these sources (11,427,503 of which have RF scores from
\S\ref{sec:ps1psc_update}) we update their scores to 1 in the ZTF
\texttt{Stars} table. This effectively excludes each of these sources from
filters designed to find extragalactic transients in the ZTF alert stream.
\subsection{Practical Implementation of the Updated Catalog}
Moving forward, ZTF alert packets now include $\sim$152 million additional
classifications ($\sim$133.6 million RF classifications from
\S\ref{sec:ps1psc_update}, and $\sim$18.6 million from
\S\ref{sec:gaia}. The addition of these new classifications to the ZTF AVRO
packets should not affect existing alert-stream filters, as we describe below.
\input{./tables/thresholds.tex}
While a one-to-one mapping of point-source classification scores cannot be
made between \citet{Tachibana18} and this study, the similarity between the
two methodologies leads to classifications that are highly similar.
Table~\ref{tbl:thresh} summarizes the TPR and FPR for different classification
thresholds using the model from \citet{Tachibana18} and the RF model created
in this study. The PS1 stack photometry used in \citet{Tachibana18}
consistently produces a higher TPR, by $\sim$6--8\%, than the PS1 forced
photometry. The PS1 forced photometry used in this study does have a lower FPR
than \citet{Tachibana18} for all but the most liberal point-source
classification cuts. Thus, applying classification cuts developed for the
original PS1 PSC will ultimately lead to a higher TPR, as previously
unclassified point sources can now be removed from the stream, without
experiencing an overall increase in the FPR. As a result, we conclude that the
vast majority of users will not experience any significant change in the
results to their filters, aside from a slight reduction in false negatives
(stars that are classified as galaxies), following the update to the ZTF
\texttt{Stars} table.
\section{Discussion}\label{sec:discussion}
During the preparation of this manuscript, \citet{Beck20} published the
Pan-STARRS1 Source Types and Redshifts with Machine learning (PS1-STRM)
catalog, which includes the machine learning classification of PS1 sources as
either stars, galaxies, or quasars. Like this study, \citet{Beck20} use PS1
forced photometry to provide classifications. There are a couple of
differences between the catalogs: the PS1-STRM classifies all $\sim$2.9
billion sources in the PS1 \texttt{ForcedMeanObject} table, while the updated
PS1 PSC only classifies $\sim$half that many sources.\footnote{We note that
the majority of the additional classifications in the PS1-STRM are
$\mathtt{nDetections} \le 2$ sources with low S/N photometry. These
classifications therefore have a lot more uncertainty than the sources that
are in common between the PS1 PSC and the PS1-STRM.} Another difference
between the two catalogs is that the PS1-STRM uses a neural-network
classifier, whereas the PS1 PSC uses the RF algorithm. Finally, the PS1-STRM
uses full color information in their classifier whereas the PS1 PSC uses
``white flux'' features (see \S\ref{sec:features}).
The most important distinction between the two catalogs, in our estimation, is
their training sets. The PS1-STRM is trained using spectroscopic labels that
predominantly come from the Sloan Digital Sky Survey
\citep[SDSS;][]{Abolfathi18}, whereas the PS1 PSC is trained via morphological
classifications from HST. An SDSS-based training set has two distinct
advantages: it is nearly two orders of magnitude larger than the HST training
set and it includes redshift information (which can be used to estimate
photometric redshifts, as is done in the PS1-STRM).
When considering only morphological classification, or similarly star-galaxy
separation, an SDSS-based training set produces biased classifications
\citep{Miller17,Tachibana18}. The SDSS spectroscopic targeting algorithm was
biased towards specific source classes, such as luminous red galaxies, and as
a result SDSS spectra are not representative of the average source in PS1
\citep[see Figure~1 in][]{Tachibana18}. Furthermore, the SDSS training set is
distinctly biased towards point sources at the faint end ($r \gtrsim
21$\,mag), which leads to models that overestimate the prevalence of point
sources at these brightness levels \citep[see e.g., Figure~7
in][]{Tachibana18}. It is for these reasons that we adopt the HST training set
for the PS1 PSC, despite its relatively modest size.
Ultimately, we recommend the use of both catalogs. Despite the different
methodologies and training sets, we expect the classifications to largely be in
agreement for bright sources ($r \lesssim 20$\,mag). In cases where the
catalogs agree, the classifications can be treated as extremely confident.
Most of the disagreements will occur at the faint end, where both catalogs
will provide noisier estimates. For faint sources where the catalogs disagree,
users should consider applying an additional prior based on the observed
source counts in the Universe \citep[e.g.,][]{Henrion11}. At high galactic
latitudes, nearly all the very faint sources are galaxies, while within the
Galactic plane nearly everything will be a star.
\section{Conclusions}
We have presented an update to the PS1 PSC \citep{Tachibana18}, by classifying
$\sim$144 million sources that were previously ``missing.'' The new
classifications are made using a new RF model that utilizes photometric and
shape features from the PS1 DR2 \texttt{ForcedMeanObject} table.
The training set and methodology are nearly identical to those used in
\citet{Tachibana18}, with the major difference being that that study used
features from the PS1 DR1 \texttt{StackObjectAttributes} table. The similarity
in methodology is intentional, as it allows new classifications for the
previously ``missing'' sources to be incorporated into the PS1 PSC without a
need for significant revisions to existing filters that are applied to the ZTF
alert stream. We find that the new model performs $\sim$6--8\% worse than the
one presented in \citet[][see Table~\ref{tbl:thresh}]{Tachibana18}.
Nevertheless, the slight degradation in performance is more than offset by the
addition of $>$144 million newly classified sources. The update to the PS1 PSC
presented here will improve the extragalactic transient search efficiency for
ZTF.
Spectroscopic observations from SDSS have now fueled the training sets for
machine learning models to separate stars and galaxies for more than a decade
\citep[e.g.,][]{Ball06,Beck20}. These labels have proven extremely valuable as
they have been applied to several surveys beyond SDSS
\citep[e.g.,][]{Miller17,Beck20}. Our ability to use methods built on
empirical training sets is going to be severely limited by the Vera C.\ Rubin
Observatory, whose images will be predominantly populated by extremely faint
sources \citep[$r \approx 24$\,mag;][]{Ivezic19}. With few spectroscopic
classifications of any kind at these depths, the separation of stars and
galaxies in Rubin Observatory data is going to largely rely on data from the
Rubin Observatory itself. In this regime machine learning is unlikely to play
a leading role, and purely photometric methods will be required to separate
stars and galaxies \citep[e.g.,][]{Slater20} and triage the Rubin Observatory
alert stream to remove stellar variables prior to the search for extragalactic
transients.
\acknowledgments
This work would not have been possible without the public release of the PS1
data. We thank F.~Masci and R.~Laher for helping us identify sources that were
not classified in the ZTF \texttt{Stars} table. We thank the anonymous referee for comments that improved this manuscript.
A.A.M.~is funded by the Large Synoptic Survey Telescope Corporation (LSSTC),
the Brinson Foundation, and the Moore Foundation in support of the LSSTC Data
Science Fellowship Program; he also receives support as a CIERA Fellow by the
CIERA Postdoctoral Fellowship Program (Center for Interdisciplinary
Exploration and Research in Astrophysics, Northwestern University). X.H.~is
supported by LSSTC, through Enabling Science Grant \#2020-01.
% \vspace{5mm}
\facilities{PS1 \citep{Chambers16}}
%% Similar to \facility{}, there is the optional \software command to allow
%% authors a place to specify which programs were used during the creation of
%% the manuscript. Authors should list each code and include either a
%% citation or url to the code inside ()s when available.
\software{\texttt{astropy} \citep{Astropy-Collaboration13,
Astropy-Collaboration18},
\texttt{scipy} \citep{2020SciPy-NMeth},
\texttt{matplotlib} \citep{Hunter07},
\texttt{pandas} \citep{McKinney10},
\texttt{scikit-learn} \citep{Pedregosa11}}
\appendix
\section{The ZTF--PS1 Morphological Catalog}\label{app:cat_counts}
The ZTF database contains a table (\texttt{Stars}) with sources selected from
the PS1 DR1 that are used to provide morphological classifications in the ZTF
alert packets. The ZTF \texttt{Stars} table was seeded from the PS1
\texttt{MeanObject} table and includes all PS1 \texttt{MeanObject} sources
with $\mathtt{nDetections} \ge 3$.\footnote{Immediately after the release of
PS1 DR1 it was recommended that sources detected on at least three individual
PS1 images were unlikely to be spurious. Hence, the use of this selection cut
for the ZTF \texttt{Stars} table.} There are 1,919,106,844 sources in the ZTF
\texttt{Stars} table. Of these, 1,484,281,394 are classified in the PS1 PSC
and another 8,520,167 are classified as point sources based on \textit{Gaia}
parallax and/or proper motion measurements \citep{Tachibana18}. Therefore,
there are 426,305,283 sources in the ZTF \texttt{Stars} table that did not
meet the quality cuts necessary to be included in the PS1 PSC.\footnote{Only
sources with a single row designated as the \texttt{primaryDetection} in the
PS1 \texttt{StackObjectAttributes} table and a stack ``detection''
\citep[i.e., the PSF, Kron, and aperture flux are all $>0$ in at least one
filter, see][]{Tachibana18} are included in the PS1 PSC. }
For the $\sim$426 million ZTF \texttt{Stars} table sources not in the PS1 PSC,
5,885,633 had multiple rows in the PS1 \texttt{StackObjectAttributes} table
with $\mathtt{primaryDetection} = 1$, while the rest were not ``detected'' in
the PS1 stacks. As described in \S\ref{sec:ps1psc_update}, 144,870,754 of the
previously ``missing'' sources pass our \texttt{ForcedMeanObject}
``detection'' criteria (see \S\ref{sec:training_set}) and are now included in
the PS1 PSC.
The remaining $\sim$281 million sources do not have reliable PS1 stack or
forced photometry, and as a result remain in the ZTF \texttt{Stars} table with
an ambiguous score of 0.5. About 8\% of the still unclassified ZTF
\texttt{Stars} table sources are not present in PS1 DR2 (mostly because they
have declination $\delta < -30$\,deg).\footnote{See
\url{https://outerspace.stsci.edu/display/PANSTARRS/PS1+DR2+caveats\#PS1DR2caveats-Missingdata} for more information.} Furthermore, $\sim$34\% of these
$\sim$281 million sources have $\mathtt{nDetections} = 3$, and $\sim$55\% have
$\mathtt{nDetections} \le 5$. That these sources have so few detections in PS1
increases the probability that they may be spurious, and even if they are not
spurious, they are otherwise very low S/N detections, which do not produce
highly confident classifications.
\bibliographystyle{aas_arxiv.bst}
\bibliography{/Users/adamamiller/Documents/tex_stuff/papers}
%% Include this line if you are using the \added, \replaced, \deleted
%% commands to see a summary list of all changes at the end of the article.
%\listofchanges
\end{document}
% End of file `sample63.tex'.