Updating things for DR3 (#29)

* some major updates for dr3 * updating the semiamp inference * updates to get things working on the cluster * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * running on cluster * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
one-datum · Feb 11, 2023 · 87c4521 · 87c4521
1 parent 12b2c19
commit 87c4521
Show file tree

Hide file tree

Showing 32 changed files with 943 additions and 622 deletions.
diff --git a/.gitignore b/.gitignore
@@ -286,3 +286,4 @@ build
 localconfig.yaml
 src/one-datum/
 remote
+/*.ipynb
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,4 +1,4 @@
-dataset_name: "edr3"
+dataset_name: "dr3"
 base_table_filename: "base.fits.gz"
 noise_config_file: "config/noise.yaml"
 results_basedir: "results"

diff --git a/config/noise.yaml b/config/noise.yaml
@@ -7,11 +7,11 @@ min_nb_transits: 3
 # The color and magnitude grid specification
 min_color: 0.0
 max_color: 5.5
-num_color: 20
+num_color: 45
 
 min_mag: 4.5
 max_mag: 16.0
-num_mag: 18
+num_mag: 30
 
 # The number of targets to fit at once
 targets_per_fit: 1000
@@ -20,4 +20,4 @@ targets_per_fit: 1000
 num_optim: 20000
 
 # The number of random subsets to execute per inference
-num_iter: 5
+num_iter: 50
diff --git a/workflow/envs/baseline.yml b/workflow/envs/baseline.yml
@@ -0,0 +1,19 @@
+channels:
+  - conda-forge
+dependencies:
+  - python =3.10
+  - git
+  - curl
+  - tar
+  - zip
+  - open-fonts
+  - numpy
+  - scipy
+  - matplotlib
+  - pyyaml
+  - fitsio
+  - astropy
+  - astroquery
+  - pip
+  - pip:
+      - kepler.py
diff --git a/workflow/envs/environment.yml b/workflow/envs/environment.yml
diff --git a/workflow/envs/figures.yml b/workflow/envs/figures.yml
@@ -0,0 +1,8 @@
+channels:
+  - conda-forge
+dependencies:
+  - python =3.10
+  - open-fonts
+  - numpy
+  - matplotlib
+  - astropy
diff --git a/workflow/envs/inference.yml b/workflow/envs/inference.yml
@@ -0,0 +1,14 @@
+channels:
+  - conda-forge
+dependencies:
+  - python =3.10
+  - numpy
+  - scipy
+  - astropy
+  - tqdm
+  - pip
+  - pip:
+    - -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
+    - jax[cpu]; sys_platform != 'linux'
+    - jax[cuda]; sys_platform == 'linux'
+    - tensorflow-probability
diff --git a/workflow/envs/munge.yml b/workflow/envs/munge.yml
@@ -0,0 +1,7 @@
+channels:
+  - conda-forge
+dependencies:
+  - python =3.10
+  - astropy
+  - h5py
+  - tqdm
diff --git a/workflow/envs/noise.yml b/workflow/envs/noise.yml
@@ -0,0 +1,17 @@
+channels:
+  - conda-forge
+dependencies:
+  - python =3.10
+  - open-fonts
+  - numpy
+  - scipy
+  - matplotlib
+  - astropy
+  - fitsio
+  - pyyaml
+  - tqdm
+  - pip
+  - pip:
+      - tinygp
+      - jaxopt
+      - jax[cpu]
diff --git a/workflow/envs/remote.yml b/workflow/envs/remote.yml
@@ -0,0 +1,4 @@
+channels:
+  - conda-forge
+dependencies:
+  - curl
diff --git a/workflow/rules/archive.smk b/workflow/rules/archive.smk
@@ -1,21 +1,22 @@
 import json
 
-rule base:
-    output:
-        get_results_filename("base.fits.gz")
-    params:
-        gaia=json.dumps(config["gaia"])
-    conda:
-        "../envs/environment.yml"
-    log:
-        get_log_filename("base.log")
-    shell:
-        """
-        python workflow/scripts/query.py \\
-            --output {output} \\
-            --gaia-creds '{params.gaia}' \\
-            &> {log}
-        """
+# # This rule is no longer used since we download the query from Zenodo
+# rule base:
+#     output:
+#         get_results_filename("base.fits.gz")
+#     params:
+#         gaia=json.dumps(config["gaia"])
+#     conda:
+#         "../envs/baseline.yml"
+#     log:
+#         get_log_filename("base.log")
+#     shell:
+#         """
+#         python workflow/scripts/query.py \\
+#             --output {output} \\
+#             --gaia-creds '{params.gaia}' \\
+#             &> {log}
+#         """
 
 rule sb9:
     output:
@@ -27,7 +28,7 @@ rule sb9:
     params:
         gaia=json.dumps(config["gaia"])
     conda:
-        "../envs/environment.yml"
+        "../envs/baseline.yml"
     log:
         get_log_filename("sb9.log")
     shell:

diff --git a/workflow/rules/deposit.smk b/workflow/rules/deposit.smk
@@ -14,7 +14,7 @@ rule archive_directory:
     wildcard_constraints:
         archive="[a-zA-Z]+"
     conda:
-        "../envs/environment.yml"
+        "../envs/baseline.yml"
     log:
         get_log_filename("{archive}.log")
     shell:
@@ -27,7 +27,7 @@ rule archive_copy:
     output:
         get_results_filename("archive/{filename}")
     conda:
-        "../envs/environment.yml"
+        "../envs/baseline.yml"
     log:
         get_log_filename("archive/{filename}.log")
     shell:
@@ -42,7 +42,7 @@ rule archive_zip:
     wildcard_constraints:
         archive="[a-zA-Z]+"
     conda:
-        "../envs/environment.yml"
+        "../envs/baseline.yml"
     log:
         get_log_filename("{archive}.zip.log")
     shell:
@@ -59,7 +59,7 @@ rule deposit_results:
         creds=config["zenodo"],
         metadata="workflow/metadata/{target}.yaml"
     conda:
-        "../envs/environment.yml"
+        "../envs/baseline.yml"
     log:
         get_log_filename("{target}.zenodo.log")
     shell:

diff --git a/workflow/rules/figures.smk b/workflow/rules/figures.smk
@@ -12,7 +12,7 @@ FIGURES = [
 
 rule figures_completeness:
     input:
-        get_results_filename("simulations/inferred.fits.gz")
+        get_results_filename("simulations/catalog.fits.gz")
     output:
         report(
             get_results_filename("figures/completeness.pdf"),
@@ -21,7 +21,7 @@ rule figures_completeness:
     params:
         threshold=config["det_pval_thresh"]
     conda:
-        "../envs/environment.yml"
+        "../envs/figures.yml"
     log:
         get_log_filename("figures/completeness.log")
     shell:
@@ -44,7 +44,7 @@ rule figures_recovered:
     params:
         threshold=config["det_pval_thresh"]
     conda:
-        "../envs/environment.yml"
+        "../envs/figures.yml"
     log:
         get_log_filename("figures/recovered.log")
     shell:
@@ -65,7 +65,7 @@ rule figures_noise_model:
             category="Noise model",
         )
     conda:
-        "../envs/environment.yml"
+        "../envs/figures.yml"
     log:
         get_log_filename("figures/noise_model.log")
     shell:
@@ -85,7 +85,7 @@ rule figures_sigma_cmd:
             category="Catalog",
         )
     conda:
-        "../envs/environment.yml"
+        "../envs/figures.yml"
     log:
         get_log_filename("figures/sigma_cmd.log")
     shell:
@@ -107,7 +107,7 @@ rule figures_binary_fraction_cmd:
     params:
         threshold=config["det_pval_thresh"]
     conda:
-        "../envs/environment.yml"
+        "../envs/figures.yml"
     log:
         get_log_filename("figures/binary_fraction_cmd.log")
     shell:
@@ -130,7 +130,7 @@ rule figures_p_value_dist:
     params:
         threshold=config["det_pval_thresh"]
     conda:
-        "../envs/environment.yml"
+        "../envs/figures.yml"
     log:
         get_log_filename("figures/p_value_dist.log")
     shell:

diff --git a/workflow/rules/inference.smk b/workflow/rules/inference.smk
@@ -4,7 +4,7 @@ rule inference:
     output:
         get_results_filename("inference/inferred.fits.gz")
     conda:
-        "../envs/environment.yml"
+        "../envs/inference.yml"
     log:
         get_log_filename("inference.log")
     shell:

diff --git a/workflow/rules/noise.smk b/workflow/rules/noise.smk
@@ -4,13 +4,13 @@ with open(config["noise_config_file"], "r") as f:
 
 rule noise_infer:
     input:
-        get_results_filename(config["base_table_filename"])
+        get_remote_filename(config["base_table_filename"])
     output:
         get_results_filename("noise/infer-{n}.fits")
     params:
         config=config["noise_config_file"],
     conda:
-        "../envs/environment.yml"
+        "../envs/noise.yml"
     log:
         get_log_filename("noise/infer-{n}.log")
     shell:
@@ -35,7 +35,7 @@ rule noise_combine:
     params:
         config=config["noise_config_file"],
     conda:
-        "../envs/environment.yml"
+        "../envs/noise.yml"
     log:
         get_log_filename("noise/combine.log")
     shell:
@@ -51,37 +51,40 @@ rule noise_postprocess:
     input:
         get_results_filename("noise/combine.fits")
     output:
-        get_results_filename("noise/process.fits")
+        grid=get_results_filename("noise/process.fits"),
+        gp=get_results_filename("noise/gp.pkl")
     params:
         color_smooth=config["noise"]["color_smoothing_scale"],
         mag_smooth=config["noise"]["mag_smoothing_scale"]
     conda:
-        "../envs/environment.yml"
+        "../envs/noise.yml"
     log:
         get_log_filename("noise/process.log")
     shell:
         """
         python workflow/scripts/noise/process.py \\
-            --input {input} --output {output} \\
+            --input {input} \\
+            --output-grid {output.grid} \\
+            --output-gp {output.gp} \\
             --color-smooth {params.color_smooth} \\
             --mag-smooth {params.mag_smooth} \\
             &> {log}
         """
 
 rule noise_apply:
     input:
-        noise_model=get_results_filename("noise/process.fits"),
-        base_table=get_results_filename(config["base_table_filename"])
+        gp=get_results_filename("noise/gp.pkl"),
+        base_table=get_remote_filename(config["base_table_filename"])
     output:
         get_results_filename("noise/apply.fits.gz")
     conda:
-        "../envs/environment.yml"
+        "../envs/noise.yml"
     log:
         get_log_filename("noise/apply.log")
     shell:
         """
         python workflow/scripts/noise/apply.py \\
-            --noise-model {input.noise_model} \\
+            --gp {input.gp} \\
             --input {input.base_table} \\
             --output {output} \\
             &> {log}