Skip to content

Commit

Permalink
New NASA syntax (#37)
Browse files Browse the repository at this point in the history
* CI, bump python-setup to v4

* Trying to isolate the problem

* Rolling 3.8 out, adding 3.11

* Skip all failing tests, and start tracking down

* Optional depdencies for development & testing

* Cleaning pytest in setup.cfg

* Adding dev optional-dependencies in GA

* Re-activating one test at a time

* Using python-CMR instead of builtin

I created this module before python-CMR. Let's move to that one instead
of my builtin. The API keeps changing, so hopefully they will keep that
library up to date.

* style: Improving style and updating syntax

* fix: Updating test for new filename syntax

* fix: New Aqua filename syntax

* fix: Renaming TERRA products with new name standard

* fix: More on TERRA filename standard

* fix: AQUA-L2 new filename pattern

* Temporary skip S3Storage test

* Temporary off this test

* Re-activating some tests with updated filenames

* fix: Updating VIIRS filenames

* fix: Updating another TERRA outdated filename

* test: Reactivating tests and updating those

* Updating test files to be cached

* style: Updating syntax

* style: Updating syntax

* Adding dask as requirement for parallel processing

* style: Fixing syntax

* doc: Updating expected filename syntax

* Adding pre-commit config

* Forget about flake8

* fix: Typo on formated string

* info: Adding more log info to bloom_filter()

* Removing unecessary check
  • Loading branch information
castelao authored Oct 8, 2023
1 parent 343de1c commit 7255f44
Show file tree
Hide file tree
Showing 19 changed files with 390 additions and 211 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
strategy:
max-parallel: 1
matrix:
python-version: ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"]

steps:
- name: Install System requirements
Expand All @@ -29,7 +29,7 @@ jobs:
fetch-depth: 0

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -46,19 +46,19 @@ jobs:
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
pip install -e .[parallel,s3]
pip install -e .[dev,parallel,s3]
- name: Cache sample data
id: cache-samples
uses: actions/cache@v3
with:
path: |
MODIS-Aqua/L3m/2017/012/A2017012.L3m_DAY_CHL_chlor_a_4km.nc
MODIS-Aqua/L2/2016/244/A2016244221000.L2_LAC_OC.nc
MODIS-Aqua/L2/2016/245/A2016245211500.L2_LAC_OC.nc
MODIS-Aqua/L2/2017/012/A2017012213500.L2_LAC_OC.nc
MODIS-Terra/L3m/2004/006/T2004006.L3m_DAY_CHL_chlor_a_4km.nc
VIIRS-SNPP/L2/2017/013/V2017013002400.L2_SNPP_OC.nc
MODIS-Aqua/L3m/2017/01/12/AQUA_MODIS.20170112.L3m.DAY.CHL.chlor_a.4km.nc
MODIS-Aqua/L2/2016/08/31/AQUA_MODIS.20160831T221001.L2.OC.nc
MODIS-Aqua/L2/2016/09/01/AQUA_MODIS.20160901T211500.L2.OC.nc
MODIS-Aqua/L2/2017/01/12/AQUA_MODIS.20170112T213500.L2.OC.nc
MODIS-Terra/L3m/2004/01/06/TERRA_MODIS.20040106.L3m.DAY.CHL.chlor_a.4km.nc
VIIRS-SNPP/L2/2017/01/13/SNPP_VIIRS.20170113T002400.L2.OC.nc
key: ${{ runner.os }}-CHL

- name: Test with pytest
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,4 @@ ENV/
.mypy_cache/

# IDE settings
.vscode/
.vscode/
33 changes: 33 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
default_language_version:
python: python3.10
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: check-ast
- id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml
- id: check-toml
- id: debug-statements
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-added-large-files
- repo: https://github.com/asottile/pyupgrade
rev: v2.38.2
hooks:
- id: pyupgrade
args: [--py39-plus]
- repo: https://github.com/psf/black
rev: 22.8.0
hooks:
- id: black
args: [ --safe ]
exclude: docs/conf.py
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.0.237'
hooks:
- id: ruff
53 changes: 30 additions & 23 deletions OceanColor/backend/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __setitem__(self, index, value):
raise NotImplementedError("Missing __setitem__ for this Backend")


class FileSystem(object):
class FileSystem:
"""Backend for OceanColorDB based on files and directories
A file system backend for OceanColorDB to save the data files in
Expand Down Expand Up @@ -197,8 +197,7 @@ def __getitem__(self, index):
return ds

def __setitem__(self, index, ds):
"""Saves Dataset ds identified by index
"""
"""Saves Dataset ds identified by index"""
if not isinstance(ds, xr.Dataset):
self.logger.warn("Trying to save a non xr.Dataset object")
raise ValueError
Expand All @@ -216,7 +215,7 @@ def path(self, product_name: str):
return p.replace(".nc", ".zarr")


class Filename(object):
class Filename:
"""Parse implicit information on NASA's filename
NASA's data filename, and granules, follows a logical standard that can be
Expand Down Expand Up @@ -249,23 +248,23 @@ def mission(self):

if attrs["platform"] == "S":
return "SeaWIFS"
elif attrs["platform"] == "A":
elif attrs["platform"] == "AQUA_MODIS":
return "MODIS-Aqua"
elif attrs["platform"] == "T":
elif attrs["platform"] == "TERRA_MODIS":
return "MODIS-Terra"
elif attrs["platform"] == "V":
if attrs["instrument"] == "JPSS1":
return "VIIRS-JPSS1"
elif attrs["instrument"] == "SNPP":
return "VIIRS-SNPP"
elif attrs["platform"] == "JPSS1_VIIRS":
return "VIIRS-JPSS1"
elif attrs["platform"] == "SNPP_VIIRS":
return "VIIRS-SNPP"

@property
def dirname(self):
path = os.path.join(
self.mission,
self.attrs["mode"],
self.attrs["year"],
self.attrs["doy"],
self.attrs["month"],
self.attrs["day"],
)
return path

Expand Down Expand Up @@ -296,23 +295,31 @@ def parse_filename(filename: str):
Notes
-----
Examples of possible files:
- S2002006003729.L2_[GAC_IOP|GAC_OC|MLAC_OC].nc
- S2001006.L3m_DAY_[CHL_chlor_a|CHL_chl_ocx|ZLEE_Zeu_lee]_9km.nc
- A2011010000000.L2[_LAC_OC|_LAC_IOP|SST|SST4].nc
- T2004006.L3m[_DAY_CHL_chlor_a|_DAY_CHL_chl_ocx]_[4|9]km.nc
- SNPP_VIIRS.20190501T101200.L2.OC.nc
- SNPP_VIIRS.20190514.L3m.DAY.[CHL.chlor_a].[4|9]km.nc
- AQUA_MODIS.20190501T100501.L2.OC.nc
- TERRA_MODIS.20040106.L3m[.DAY.CHL.chlor_a|.DAY.CHL.chl_ocx].[4|9]km.nc
- V2018007000000.L2_SNPP_OC.nc
- V2015009.L3m_DAY_SNPP_CHL_chlor_a_4km.nc
- V2018006230000.L2_JPSS1_OC.nc
"""
rule = r"""
(?P<platform>[S|A|T|V])
(?P<platform>S|(?:SNPP_VIIRS)|(?:JPSS1_VIIRS)|(?:AQUA_MODIS)|(?:TERRA_MODIS))
.
(?P<year>\d{4})
(?P<doy>\d{3})
(?P<time>\d+)?
\.
(?P<month>\d{2})
(?P<day>\d{2})
(?:
T
(?P<hour>\d{2})
(?P<minute>\d{2})
(?P<second>\d{2})
)?
.
(?P<mode>(L2)|(L3m))
(?:_DAY)?
_ (?P<instrument>(?:SNPP)|(?:JPSS1))?
(?:.DAY)?
.
(?P<instrument>(?:SNPP)|(?:JPSS1))?
.*?
\.nc
"""
Expand All @@ -330,7 +337,7 @@ class InMemory(BaseStorage):

__data = OrderedDict()

def __init__(self, quota: int = 5 * 1024 ** 3):
def __init__(self, quota: int = 5 * 1024**3):
"""Initialize an InMemory object
Parameters
Expand Down
32 changes: 20 additions & 12 deletions OceanColor/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import logging
import os
from typing import Any, Dict, Optional, Sequence
from typing import Any, Dict, Optional
from collections.abc import Sequence

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -35,7 +36,7 @@ def ds_attrs(ds):
return output


class Catalog(object):
class Catalog:
"""
ToDo
Expand All @@ -49,10 +50,12 @@ class Catalog(object):
"""

def __init__(self, dbfilename):
self.store = pd.HDFStore(dbfilename, mode="a", complevel=9, fletcher32=True)
self.store = pd.HDFStore(
dbfilename, mode="a", complevel=9, fletcher32=True
)

def __getitem__(self, product_name):
record = self.store.select("catalog", "index == '{}'".format(product_name))
record = self.store.select("catalog", f"index == '{product_name}'")
if record.size == 0:
raise KeyError

Expand All @@ -76,7 +79,9 @@ def __setitem__(self, key, value):
self.store.append("catalog", value, format="t", data_columns=True)

def __del__(self):
module_logger.debug("Closing Catalog's storage: {}".format(self.store.filename))
module_logger.debug(
f"Closing Catalog's storage: {self.store.filename}"
)
# self.store.flush()
self.store.close()

Expand All @@ -88,18 +93,22 @@ def record(self, ds):
assert attrs["product_name"] not in self, (
"There is a record in the database for %s" % attrs["filename"]
)
module_logger.debug("New record: {}".format(attrs))
module_logger.debug(f"New record: {attrs}")
attrs = pd.DataFrame([attrs])
attrs = attrs.set_index("product_name")
# if ('catalog' in self.store):
# tmp = tmp.set_index(tmp.index + self.store.catalog.index.max() + 1)
self.store.append(
"catalog", attrs, format="t", data_columns=True, min_itemsize={"values": 42}
"catalog",
attrs,
format="t",
data_columns=True,
min_itemsize={"values": 42},
)

def bloom_filter(
self,
track: Sequence[Dict],
track: Sequence[dict],
sensor: Optional[Any] = None,
dtype: Optional[Any] = None,
dt_tol: Optional[Any] = None,
Expand All @@ -113,12 +122,11 @@ def bloom_filter(
cond = []
cond.append("time_coverage_end >= %r" % (track.time.min() - dt_tol))
cond.append("time_coverage_start <= %r" % (track.time.max() + dt_tol))
cond.append("geospatial_lat_max > {}".format(track.lat.min()))
cond.append("geospatial_lat_min > {}".format(track.lat.max()))
cond.append(f"geospatial_lat_max > {track.lat.min()}")
cond.append(f"geospatial_lat_min > {track.lat.max()}")
cond.append(
"(geospatial_lon_min <= {} & geospatial_lon_max >= {}) or (geospatial_lon_max < 0 & geospatial_lon_min > 0)".format(
track.lon.max(), track.lon.min()
)
)
for f in self.store.select("catalog", where=cond).index:
yield f
yield from self.store.select("catalog", where=cond).index
53 changes: 35 additions & 18 deletions OceanColor/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,50 @@
# At this point it's just a Proof of concept
# OceanColor InRange --username=myUser --password=myPassword 2019-05-21,15,-38


@click.group()
def main():
"""Console script for OceanColor."""
pass


@main.command(name="InRange")
@click.option('--username', required=True,
help="Username on NASA's EarthData System")
@click.option('--password', required=True,
help="Password on NASA's EarthData System")
@click.option('--sensor', type=click.Choice(['aqua', 'terra']),
default='aqua')
@click.option('--data-type', 'dtype', type=click.Choice(['L2', 'L3m']),
default='L3m')
@click.option('--time-tolerance', 'dt_tol', type=click.INT , default=12,
help='Time difference [hours] tolerance to matchup')
@click.option('--distance-tolerance', 'dL_tol', type=float, default=10e3,
help='Distance difference [m] tolerance to matchup')
@click.argument('track', required=True)
@click.option(
"--username", required=True, help="Username on NASA's EarthData System"
)
@click.option(
"--password", required=True, help="Password on NASA's EarthData System"
)
@click.option("--sensor", type=click.Choice(["aqua", "terra"]), default="aqua")
@click.option(
"--data-type", "dtype", type=click.Choice(["L2", "L3m"]), default="L3m"
)
@click.option(
"--time-tolerance",
"dt_tol",
type=click.INT,
default=12,
help="Time difference [hours] tolerance to matchup",
)
@click.option(
"--distance-tolerance",
"dL_tol",
type=float,
default=10e3,
help="Distance difference [m] tolerance to matchup",
)
@click.argument("track", required=True)
def cli_inrange(username, password, sensor, dtype, dt_tol, dL_tol, track):
time, lat, lon = track.split(',')
track = pd.DataFrame({"time": [np.datetime64(time)],
"lat": [float(lat)],
"lon": [float(lon)]})
time, lat, lon = track.split(",")
track = pd.DataFrame(
{
"time": [np.datetime64(time)],
"lat": [float(lat)],
"lon": [float(lon)],
}
)

dt_tol = np.timedelta64(dt_tol, 'h')
dt_tol = np.timedelta64(dt_tol, "h")
matchup = InRange(username, password, npes=3)
matchup.search(track, sensor, dtype, dt_tol, dL_tol)
for m in matchup:
Expand Down
Loading

0 comments on commit 7255f44

Please sign in to comment.