From 7255f44bcc8d262aa5e66aab9bf5b2f8d0bdec07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Guilherme=20Castel=C3=A3o?= Date: Sat, 7 Oct 2023 19:56:36 -0600 Subject: [PATCH] New NASA syntax (#37) * CI, bump python-setup to v4 * Trying to isolate the problem * Rolling 3.8 out, adding 3.11 * Skip all failing tests, and start tracking down * Optional depdencies for development & testing * Cleaning pytest in setup.cfg * Adding dev optional-dependencies in GA * Re-activating one test at a time * Using python-CMR instead of builtin I created this module before python-CMR. Let's move to that one instead of my builtin. The API keeps changing, so hopefully they will keep that library up to date. * style: Improving style and updating syntax * fix: Updating test for new filename syntax * fix: New Aqua filename syntax * fix: Renaming TERRA products with new name standard * fix: More on TERRA filename standard * fix: AQUA-L2 new filename pattern * Temporary skip S3Storage test * Temporary off this test * Re-activating some tests with updated filenames * fix: Updating VIIRS filenames * fix: Updating another TERRA outdated filename * test: Reactivating tests and updating those * Updating test files to be cached * style: Updating syntax * style: Updating syntax * Adding dask as requirement for parallel processing * style: Fixing syntax * doc: Updating expected filename syntax * Adding pre-commit config * Forget about flake8 * fix: Typo on formated string * info: Adding more log info to bloom_filter() * Removing unecessary check --- .github/workflows/ci.yml | 18 +++--- .gitignore | 2 +- .pre-commit-config.yaml | 33 ++++++++++ OceanColor/backend/common.py | 53 +++++++++------- OceanColor/catalog.py | 32 ++++++---- OceanColor/cli.py | 53 ++++++++++------ OceanColor/cmr.py | 31 +++++----- OceanColor/gsfc.py | 16 +++-- OceanColor/inrange.py | 102 ++++++++++++++++++++++--------- OceanColor/storage.py | 9 +-- OceanColor/utils.py | 8 ++- README.rst | 2 +- docs/conf.py | 2 +- pyproject.toml | 16 ++++- setup.cfg | 10 --- tests/test_cmr.py | 39 ++++++++++-- tests/test_gsfc.py | 16 ++--- tests/test_inrange.py | 115 +++++++++++++++++++++-------------- tests/test_storage.py | 44 ++++++++------ 19 files changed, 390 insertions(+), 211 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0f57a49..6c9c85b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: strategy: max-parallel: 1 matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] steps: - name: Install System requirements @@ -29,7 +29,7 @@ jobs: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -46,19 +46,19 @@ jobs: python -m pip install --upgrade pip pip install flake8 pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - pip install -e .[parallel,s3] + pip install -e .[dev,parallel,s3] - name: Cache sample data id: cache-samples uses: actions/cache@v3 with: path: | - MODIS-Aqua/L3m/2017/012/A2017012.L3m_DAY_CHL_chlor_a_4km.nc - MODIS-Aqua/L2/2016/244/A2016244221000.L2_LAC_OC.nc - MODIS-Aqua/L2/2016/245/A2016245211500.L2_LAC_OC.nc - MODIS-Aqua/L2/2017/012/A2017012213500.L2_LAC_OC.nc - MODIS-Terra/L3m/2004/006/T2004006.L3m_DAY_CHL_chlor_a_4km.nc - VIIRS-SNPP/L2/2017/013/V2017013002400.L2_SNPP_OC.nc + MODIS-Aqua/L3m/2017/01/12/AQUA_MODIS.20170112.L3m.DAY.CHL.chlor_a.4km.nc + MODIS-Aqua/L2/2016/08/31/AQUA_MODIS.20160831T221001.L2.OC.nc + MODIS-Aqua/L2/2016/09/01/AQUA_MODIS.20160901T211500.L2.OC.nc + MODIS-Aqua/L2/2017/01/12/AQUA_MODIS.20170112T213500.L2.OC.nc + MODIS-Terra/L3m/2004/01/06/TERRA_MODIS.20040106.L3m.DAY.CHL.chlor_a.4km.nc + VIIRS-SNPP/L2/2017/01/13/SNPP_VIIRS.20170113T002400.L2.OC.nc key: ${{ runner.os }}-CHL - name: Test with pytest diff --git a/.gitignore b/.gitignore index c545b16..c9fc5ae 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,4 @@ ENV/ .mypy_cache/ # IDE settings -.vscode/ \ No newline at end of file +.vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..d9795ed --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +default_language_version: + python: python3.10 +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-ast + - id: check-docstring-first + - id: check-merge-conflict + - id: check-yaml + - id: check-toml + - id: debug-statements + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files +- repo: https://github.com/asottile/pyupgrade + rev: v2.38.2 + hooks: + - id: pyupgrade + args: [--py39-plus] +- repo: https://github.com/psf/black + rev: 22.8.0 + hooks: + - id: black + args: [ --safe ] + exclude: docs/conf.py +- repo: https://github.com/charliermarsh/ruff-pre-commit + # Ruff version. + rev: 'v0.0.237' + hooks: + - id: ruff diff --git a/OceanColor/backend/common.py b/OceanColor/backend/common.py index acc2e62..fdbdd2c 100644 --- a/OceanColor/backend/common.py +++ b/OceanColor/backend/common.py @@ -57,7 +57,7 @@ def __setitem__(self, index, value): raise NotImplementedError("Missing __setitem__ for this Backend") -class FileSystem(object): +class FileSystem: """Backend for OceanColorDB based on files and directories A file system backend for OceanColorDB to save the data files in @@ -197,8 +197,7 @@ def __getitem__(self, index): return ds def __setitem__(self, index, ds): - """Saves Dataset ds identified by index - """ + """Saves Dataset ds identified by index""" if not isinstance(ds, xr.Dataset): self.logger.warn("Trying to save a non xr.Dataset object") raise ValueError @@ -216,7 +215,7 @@ def path(self, product_name: str): return p.replace(".nc", ".zarr") -class Filename(object): +class Filename: """Parse implicit information on NASA's filename NASA's data filename, and granules, follows a logical standard that can be @@ -249,15 +248,14 @@ def mission(self): if attrs["platform"] == "S": return "SeaWIFS" - elif attrs["platform"] == "A": + elif attrs["platform"] == "AQUA_MODIS": return "MODIS-Aqua" - elif attrs["platform"] == "T": + elif attrs["platform"] == "TERRA_MODIS": return "MODIS-Terra" - elif attrs["platform"] == "V": - if attrs["instrument"] == "JPSS1": - return "VIIRS-JPSS1" - elif attrs["instrument"] == "SNPP": - return "VIIRS-SNPP" + elif attrs["platform"] == "JPSS1_VIIRS": + return "VIIRS-JPSS1" + elif attrs["platform"] == "SNPP_VIIRS": + return "VIIRS-SNPP" @property def dirname(self): @@ -265,7 +263,8 @@ def dirname(self): self.mission, self.attrs["mode"], self.attrs["year"], - self.attrs["doy"], + self.attrs["month"], + self.attrs["day"], ) return path @@ -296,23 +295,31 @@ def parse_filename(filename: str): Notes ----- Examples of possible files: - - S2002006003729.L2_[GAC_IOP|GAC_OC|MLAC_OC].nc - - S2001006.L3m_DAY_[CHL_chlor_a|CHL_chl_ocx|ZLEE_Zeu_lee]_9km.nc - - A2011010000000.L2[_LAC_OC|_LAC_IOP|SST|SST4].nc - - T2004006.L3m[_DAY_CHL_chlor_a|_DAY_CHL_chl_ocx]_[4|9]km.nc + - SNPP_VIIRS.20190501T101200.L2.OC.nc + - SNPP_VIIRS.20190514.L3m.DAY.[CHL.chlor_a].[4|9]km.nc + - AQUA_MODIS.20190501T100501.L2.OC.nc + - TERRA_MODIS.20040106.L3m[.DAY.CHL.chlor_a|.DAY.CHL.chl_ocx].[4|9]km.nc - V2018007000000.L2_SNPP_OC.nc - V2015009.L3m_DAY_SNPP_CHL_chlor_a_4km.nc - V2018006230000.L2_JPSS1_OC.nc """ rule = r""" - (?P[S|A|T|V]) + (?PS|(?:SNPP_VIIRS)|(?:JPSS1_VIIRS)|(?:AQUA_MODIS)|(?:TERRA_MODIS)) + . (?P\d{4}) - (?P\d{3}) - (?P