catalyst-cooperative · zaneselvans · Aug 29, 2023 · Aug 19, 2023 · Aug 21, 2023 · Aug 21, 2023
diff --git a/README.rst b/README.rst
@@ -124,7 +124,7 @@ Want access to all the published data in bulk? If you're familiar with Python
 and `Jupyter Notebooks <https://jupyter.org/>`__ and are willing to install Docker you
 can:
 
-* `Download a PUDL data release <https://sandbox.zenodo.org/record/764696>`__ from
+* `Download a PUDL data release <https://zenodo.org/record/3653158>`__ from
   CERN's `Zenodo <https://zenodo.org>`__ archiving service.
 * `Install Docker <https://docs.docker.com/get-docker/>`__
 * Run the archived image using ``docker-compose up``

diff --git a/docs/dev/datastore.rst b/docs/dev/datastore.rst
@@ -38,15 +38,17 @@ For more detailed usage information, see:
     $ pudl_datastore --help
 
 The downloaded data will be used by the script to populate a datastore under
-the ``data`` directory in your workspace, organized by data source, form, and
-date::
+the your ``$PUDL_INPUT`` directory, organized by data source, form, and DOI::
 
     data/censusdp1tract/
     data/eia860/
+    data/eia860m/
     data/eia861/
     data/eia923/
     data/epacems/
     data/ferc1/
+    data/ferc2/
+    data/ferc60/
     data/ferc714/
 
 If the download fails to complete successfully, the script can be run repeatedly until
@@ -64,28 +66,13 @@ archival and versioning of datasets. See the `documentation
 for information on adding datasets to the datastore.
 
 
-Prepare the Datastore
-^^^^^^^^^^^^^^^^^^^^^
+Tell PUDL about the archive
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-If you have used pudl-archiver to prepare a Zenodo archive as above, you
-can add support for your archive to the datastore by adding the DOI to
-pudl.workspace.datastore.DOI, under "sandbox" or "production" as appropriate.
-
-If you want to prepare an archive for the datastore separately, the following
-are required.
-
-#. The root path must contain a ``datapackage.json`` file that conforms to the
-`frictionless datapackage spec <https://specs.frictionlessdata.io/data-package/>`__
-#. Each listed resource among the ``datapackage.json`` resources must include:
-
-   * ``path`` containing the zenodo download url for the specific file.
-   * ``remote_url`` with the same url as the ``path``
-   * ``name`` of the file
-   * ``hash`` with the md5 hash of the file
-   * ``parts`` a set of key / value pairs defining additional attributes that
-     can be used to select a subset of the whole datapackage. For example, the
-     ``epacems`` dataset is partitioned by year and state, and
-     ``"parts": {"year": 2010, "state": "ca"}`` would indicate that the
-     resource contains data for the state of California in the year 2010.
-     Unpartitioned datasets like the ``ferc714`` which includes all years in
-     a single file, would have an empty ``"parts": {}``
+Once you have used pudl-archiver to prepare a Zenodo archive as above, you
+can make the PUDL Datastore aware of it by updating the appropriate DOI in
+:class:`pudl.workspace.datastore.ZenodoFetcher`. DOIs can refer to resources from the
+`Zenodo sandbox server <https://sandbox.zenodo.org>`__ for archives that are still in
+testing or development (sandbox DOIs have a prefix of ``10.5072``), or the
+`Zenodo production server <https://zenodo.org>`__ server if the archive is ready for
+public use (production DOIs hae a prefix of ``10.5281``).
diff --git a/docs/dev/testing.rst b/docs/dev/testing.rst
@@ -304,7 +304,6 @@ You can always check to see what custom flags exist by running
                         Path to a non-standard ETL settings file to use.
   --gcs-cache-path=GCS_CACHE_PATH
                         If set, use this GCS path as a datastore cache layer.
-  --sandbox             Use raw inputs from the Zenodo sandbox server.
 
 The main flexibility that these custom options provide is in selecting where
 the raw input data comes from and what data the tests should be run

diff --git a/src/pudl/cli/etl.py b/src/pudl/cli/etl.py
@@ -45,12 +45,6 @@ def parse_command_line(argv):
     parser.add_argument(
         dest="settings_file", type=str, default="", help="path to ETL settings file."
     )
-    parser.add_argument(
-        "--sandbox",
-        action="store_true",
-        default=False,
-        help="Use the Zenodo sandbox rather than production",
-    )
     parser.add_argument(
         "--logfile",
         default=None,
@@ -156,7 +150,6 @@ def main():
                 "dataset_settings": {"config": dataset_settings_config},
                 "datastore": {
                     "config": {
-                        "sandbox": args.sandbox,
                         "gcs_cache_path": args.gcs_cache_path
                         if args.gcs_cache_path
                         else "",

diff --git a/src/pudl/ferc_to_sqlite/cli.py b/src/pudl/ferc_to_sqlite/cli.py
@@ -52,12 +52,6 @@ def parse_command_line(argv):
         fail.""",
         default=False,
     )
-    parser.add_argument(
-        "--sandbox",
-        action="store_true",
-        default=False,
-        help="Use the Zenodo sandbox rather than production",
-    )
     parser.add_argument(
         "-b",
         "--batch-size",
@@ -155,7 +149,6 @@ def main():  # noqa: C901
                 },
                 "datastore": {
                     "config": {
-                        "sandbox": args.sandbox,
                         "gcs_cache_path": args.gcs_cache_path
                         if args.gcs_cache_path
                         else "",

diff --git a/src/pudl/metadata/classes.py b/src/pudl/metadata/classes.py
@@ -949,7 +949,6 @@ def get_temporal_coverage(self, partitions: dict = None) -> str:
     def add_datastore_metadata(self) -> None:
         """Get source file metadata from the datastore."""
         dp_desc = Datastore(
-            sandbox=False,
             local_cache_path=PudlPaths().data_dir,
             gcs_cache_path="gs://zenodo-cache.catalyst.coop",
         ).get_datapackage_descriptor(self.name)

diff --git a/src/pudl/resources.py b/src/pudl/resources.py
@@ -39,18 +39,12 @@ def ferc_to_sqlite_settings(init_context) -> FercToSqliteSettings:
             description="If enabled, the local file cache for datastore will be used.",
             default_value=True,
         ),
-        "sandbox": Field(
-            bool,
-            description="Use the Zenodo sandbox rather than production",
-            default_value=False,
-        ),
     },
 )
 def datastore(init_context) -> Datastore:
     """Dagster resource to interact with Zenodo archives."""
     ds_kwargs = {}
     ds_kwargs["gcs_cache_path"] = init_context.resource_config["gcs_cache_path"]
-    ds_kwargs["sandbox"] = init_context.resource_config["sandbox"]
 
     if init_context.resource_config["use_local_cache"]:
         # TODO(rousik): we could also just use PudlPaths().input_dir here, because