diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 82c2b0f..b8cb51a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -1,4 +1,4 @@ -name: Build image +name: Build py-rocket-base image on: workflow_dispatch: null @@ -6,52 +6,78 @@ on: branches: - main paths: - - 'apt2.txt' + - 'apt.txt' + - 'user-dirs.dirs' + - 'vscode-extensions.txt' - 'environment.yml' - 'start' - 'postBuild' - - 'appendix' - - 'rocker.sh' - - 'desktop.sh' + - 'Dockerfile' - 'custom_jupyter_server_config.json' - 'scripts/**' - '.github/workflows/build.yaml' - '!book/**' # Ignore all changes in the /book directory - '!docs/**' # Ignore all changes in the /docs directory + - '!base-image/**' - '!README.md' - '!conda-lock.yml' - '!LICENSE' jobs: - build-and-push: + build: runs-on: ubuntu-latest + permissions: write-all steps: - - # For biggish images, github actions runs out of disk space. - # So we cleanup some unwanted things in the disk image, and reclaim that space for our docker use - # https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150 - # and https://github.com/easimon/maximize-build-space/blob/b4d02c14493a9653fe7af06cc89ca5298071c66e/action.yml#L104 - # This gives us a total of about 52G of free space, which should be enough for now - - name: cleanup disk space - run: | - sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc - df -h - - - name: Checkout files in repo - uses: actions/checkout@main - - - name: Build and push the image to quay.io - uses: jupyterhub/repo2docker-action@master - with: - # Make sure username & password/token pair matches your registry credentials - DOCKER_USERNAME: ${{github.actor}} - DOCKER_PASSWORD: ${{secrets.GITHUB_TOKEN}} - DOCKER_REGISTRY: ghcr.io - IMAGE_NAME: nmfs-opensci/py-rocket-base - # REQUIRED. Otherwise start when /home is set-up w persistent storage - REPO_DIR: /srv/repo - APPENDIX_FILE: "appendix" - - # Lets us monitor disks getting full as images get bigger over time - - name: Show how much disk space is left - run: df -h + - uses: actions/checkout@v3 + with: + ref: main + + - name: Login to GitHub Container Registry + if: github.repository == 'nmfs-opensci/py-rocket-base' + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{github.actor}} + password: ${{secrets.GITHUB_TOKEN}} + + - name: Create short_sha tag + shell: bash + run: | + short_sha=$(echo "${{ github.sha }}" | cut -c1-7) + echo "tag=${short_sha}" >> $GITHUB_ENV + + - name: Extract VERSION from Dockerfile + shell: bash + run: | + # Check for both OCI-compliant version label and generic version label + if grep -q "LABEL org.opencontainers.image.version=" Dockerfile; then + version=$(grep "LABEL org.opencontainers.image.version=" Dockerfile | cut -d '=' -f 2 | tr -d ' ') + elif grep -q "LABEL VERSION=" Dockerfile; then + version=$(grep "LABEL VERSION=" Dockerfile | cut -d '=' -f 2 | tr -d ' ') + else + version="" + fi + echo "version=${version}" >> $GITHUB_ENV + + - name: Build the Docker image + if: github.repository == 'nmfs-opensci/py-rocket-base' + run: | + docker build . -f Dockerfile \ + --tag ghcr.io/nmfs-opensci/py-rocket-base:latest \ + --tag ghcr.io/nmfs-opensci/py-rocket-base:${{ env.tag }} + + # If VERSION exists, tag the image with that as well + if [ -n "${{ env.version }}" ]; then + docker tag ghcr.io/nmfs-opensci/py-rocket-base:${{ env.tag }} ghcr.io/nmfs-opensci/py-rocket-base:${{ env.version }} + fi + + - name: Publish + if: github.repository == 'nmfs-opensci/py-rocket-base' + run: | + docker push ghcr.io/nmfs-opensci/py-rocket-base:latest + docker push ghcr.io/nmfs-opensci/py-rocket-base:${{ env.tag }} + + # Push the version tag if it exists + if [ -n "${{ env.version }}" ]; then + docker push ghcr.io/nmfs-opensci/py-rocket-base:${{ env.version }} + fi diff --git a/Dockerfile b/Dockerfile index 05eeaf7..f7e3338 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,10 @@ FROM ghcr.io/nmfs-opensci/py-rocket-base/base-image:latest +LABEL org.opencontainers.image.maintainers="eli.holmes@noaa.gov" +LABEL org.opencontainers.image.author="eli.holmes@noaa.gov" +LABEL org.opencontainers.image.source=https://github.com/nmfs-opensci/container-images/images/py-rocket-geospatial-2 +LABEL org.opencontainers.image.description="Geospatial Python (3.12), R (4.4) and QGIS image" +LABEL org.opencontainers.image.licenses=Apache2.0 +LABEL org.opencontainers.image.version=2024.11.19 USER root diff --git a/book/_quarto.yml b/book/_quarto.yml index 89ebd88..e4a9e79 100644 --- a/book/_quarto.yml +++ b/book/_quarto.yml @@ -23,6 +23,7 @@ book: - desktop.qmd - publishing.qmd - jupyter-config.qmd + - example_children.qmd - developers.qmd - related.qmd diff --git a/book/example_children.qmd b/book/example_children.qmd new file mode 100644 index 0000000..c5f7454 --- /dev/null +++ b/book/example_children.qmd @@ -0,0 +1,210 @@ +# Example child images + +## py-rocket-geospatial + +This has many geospatial R and Python packages along with QGIS, Panoply and CoastWatch utils. + +Dockerfile +``` +FROM ghcr.io/nmfs-opensci/py-rocket-base/test:latest + +LABEL org.opencontainers.image.maintainers="eli.holmes@noaa.gov" +LABEL org.opencontainers.image.author="eli.holmes@noaa.gov" +LABEL org.opencontainers.image.source=https://github.com/nmfs-opensci/container-images/py-rocket-2 +LABEL org.opencontainers.image.description="Geospatial Python (3.11) and R (4.4) image with Desktop (QGIS, Panoply, CWUtils)" +LABEL org.opencontainers.image.licenses=Apache2.0 +LABEL org.opencontainers.image.version=2024.11.06 + +USER root +COPY . /tmp2/ +RUN /pyrocket_scripts/install-conda-packages.sh /tmp2/environment.yml || echo "install-conda-packages.sh failed" || true +RUN /pyrocket_scripts/install-r-packages.sh /tmp2/install.R || echo "install-r-package.sh failed" || true +RUN /pyrocket_scripts/install-apt-packages.sh /tmp2/apt.txt || echo "install-apt-packages.sh failed" || true +RUN /pyrocket_scripts/install-desktop.sh /tmp2/Desktop|| echo "setup-desktop.sh failed" || true +RUN rm -rf /tmp2 +USER ${NB_USER} + +USER root +# install the geospatial libraries and R spatial; the rocket script are part of py-rocket-base +RUN PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin && \ + /rocker_scripts/install_geospatial.sh + +# Install cwutils +RUN cd /tmp && \ + wget https://www.star.nesdis.noaa.gov/socd/coastwatch/cwf/cwutils-4_0_0_198-linux-x86_64.tar.gz && \ + tar -zxf cwutils-4_0_0_198-linux-x86_64.tar.gz && \ + rm -rf cwutils-4_0_0_198-linux-x86_64.tar.gz +ENV PATH=${PATH}:/tmp/cwutils_4.0.0.198/bin +ENV MANPATH=${MANPATH}:/tmp/cwutils_4.0.0.198/doc/man +ENV INSTALL4J ADD VM PARAMS=-Dsun.java2d.uiScale=2.0 + +# Install panoply +RUN cd /tmp && \ + wget --user-agent="Mozilla/5.0" https://www.giss.nasa.gov/tools/panoply/download/PanoplyJ-5.5.5.tgz && \ + tar -zxf PanoplyJ-5.5.5.tgz && \ + rm -rf PanoplyJ-5.5.5.tgz +ENV PATH=${PATH}:/tmp/PanoplyJ + +USER ${NB_USER} +WORKDIR ${HOME} +``` + +environment.yml +``` +name: py-rocket-geospatial +# 2024-11-01 +channels: + - conda-forge + - nodefaults + +dependencies: + # Core scientific python stack + - cython # optimization, C API access + - flox # optimization, xarray operations + - hypothesis # needed by numpy testing tools + - networkx + - numba # high-performance numerics + - numpy + - pandas + - scikit-image + - scikit-learn + - scipy + - statsmodels + - pymannkendall # non-parametric Mann-Kendall trend analysis + - sympy + - xarray>=2024.05.0 + + # Visualization packages + - bokeh + - cartopy # geospatial plotting with matplotlib + - geemap + - geoviews + - hvplot + - ipyleaflet + - ipympl # This enables matplotlib interaction with jupyter widgets + - leafmap + - lonboard + - matplotlib + - plotly + - seaborn # statistical plotting with matplotlib + - cmocean # colormaps for ocean + - imageio # helps writing image files + - apng # create animation from multiple png + - holoviews + - graphviz + - bqplot # 2-D visualization system + - regionmask # create masks of (geo)spatial regions + + # Machine Learning packages + - py-xgboost~=2.1.1=cpu* + + # Geospatial data packages + - geopandas>=0.14.4 + - pygmt + - rasterio + - rioxarray + - rasterstats # summarize geospatial raster datasets based on vector geometries + - pyresample # resampling geospatial image data + - shapely # manipulation and analysis of planar geometric objects + - pyproj + - datashader + + # File formats and file management, download, dataset caching + - h5py + - h5netcdf + - nco + - pooch + - zarr + - kerchunk + - rechunker + - cftime # for decoding time units and variable values in a netCDF file + - h5coro # reading HDF5 data stored in S3 + - hdf5plugin # provides HDF5 compression filters + - lxml # processing XML and HTML + - pynco # python style access to the NetCDF Operators (NCO) + + # Cloud access tools and libraries + - awscli + - awscliv2 + - boto3 + - s3fs>=2023.6.0 + # handle login and similar details for accessing earthdata protected data + # Access datasets exposed via STAC + - pystac-client + - stackstac + # Access datasets exposed via intake catalogs + - intake + - intake-esm>=2023.7.7 + - intake-stac==0.4.0 + - intake-xarray==0.6.1 + - gcsfs>=2023.5.0 + - certifi # Root Certificates for validating the trustworthiness of SSL certificates. + + # Specific cloud access libraries + - copernicusmarine # get data from copernicus + - earthaccess>=0.11.0 # get data from nasa earth access + - pydap # OPeNDAP implementation + - erddapy # connect to erddap servers + - ecmwflibs # wraps some of European Centre for Medium-Range Weather Forecasts libraries + - harmony-py + + # Distributed computing + - dask>=2023.12.1 + - dask-labextension + - dask-geopandas + - coiled + + # Other useful generic python packages + - pillow # Python imaging library, useful for many image-related tasks + - pytest + - pytest-cov + - pep8 + - flake8 + - tqdm # progress bars, with notebook support + - joblib # lightweight pipelining in Python + + # Packages specific to climate and ocean data work + - esmpy + - xmip + - spectral # pure Python module for processing hyperspectral image data + + # Desktop tools + - qgis + - pyopencl # Maybe needed for qgis? https://github.com/conda-forge/qgis-feedstock/issues/263 + # Resolves warning "No ICDs were found": https://github.com/CryoInTheCloud/hub-image/issues/50 + - ocl-icd-system + + # Quarto + - quarto + - pip: + - xq # Apply XPath expressions to XML + - jupyterlab-quarto +``` + +install.R +``` +#! /usr/local/bin/Rscript +# install R dependencies + +# to match rocker/verse:4.4 used in py-rocker-base +# look up the date that the Rocker image was created and put that +repo <- "https://p3m.dev/cran/__linux__/jammy/2024-05-13" + +install.packages(c("rstac", "quarto", "aws.s3", "reticulate", "gdalcubes", "rnaturalearth"), repos=repo) +install.packages("rnaturalearthdata", repos=repo) + +remotes::install_github('r-tmap/tmap', upgrade=FALSE) +# CRAN version is out of date +devtools::install_github("boettiger-lab/earthdatalogin") + +# CoastWatch required +list.of.packages <- c("parsedate", "reshape2", "gridGraphics", "PBSmapping", + "date", "openair", "cmocean", "plotdap", "rerddapXtracto") +install.packages(list.of.packages, repos=repo) +``` + +apt.txt +``` +# for qgis +libgl1-mesa-glx +``` \ No newline at end of file diff --git a/docs/configuration_files.html b/docs/configuration_files.html index 3b9537f..50fd29a 100644 --- a/docs/configuration_files.html +++ b/docs/configuration_files.html @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + diff --git a/docs/customizing.html b/docs/customizing.html index 1a371ee..c2eb8e7 100644 --- a/docs/customizing.html +++ b/docs/customizing.html @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + diff --git a/docs/desktop.html b/docs/desktop.html index 1aff4e1..ff18cc0 100644 --- a/docs/desktop.html +++ b/docs/desktop.html @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + diff --git a/docs/developers.html b/docs/developers.html index dd861ef..cd25948 100644 --- a/docs/developers.html +++ b/docs/developers.html @@ -7,7 +7,7 @@ -7  Developer notes – py-rocket-base documentation +8  Developer notes – py-rocket-base documentation + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ +
+ + +
+ + + +
+ +
+
+

7  Example child images

+
+ + + +
+ + + + +
+ + + +
+ + +
+

7.1 py-rocket-geospatial

+

This has many geospatial R and Python packages along with QGIS, Panoply and CoastWatch utils.

+

Dockerfile

+
FROM ghcr.io/nmfs-opensci/py-rocket-base/test:latest
+
+LABEL org.opencontainers.image.maintainers="eli.holmes@noaa.gov"
+LABEL org.opencontainers.image.author="eli.holmes@noaa.gov"
+LABEL org.opencontainers.image.source=https://github.com/nmfs-opensci/container-images/py-rocket-2
+LABEL org.opencontainers.image.description="Geospatial Python (3.11) and R (4.4) image with Desktop (QGIS, Panoply, CWUtils)"
+LABEL org.opencontainers.image.licenses=Apache2.0
+LABEL org.opencontainers.image.version=2024.11.06
+
+USER root
+COPY . /tmp2/
+RUN /pyrocket_scripts/install-conda-packages.sh /tmp2/environment.yml || echo "install-conda-packages.sh failed" || true
+RUN /pyrocket_scripts/install-r-packages.sh /tmp2/install.R || echo "install-r-package.sh failed" || true
+RUN /pyrocket_scripts/install-apt-packages.sh /tmp2/apt.txt || echo "install-apt-packages.sh failed" || true
+RUN /pyrocket_scripts/install-desktop.sh /tmp2/Desktop|| echo "setup-desktop.sh failed" || true
+RUN rm -rf /tmp2
+USER ${NB_USER}
+
+USER root
+# install the geospatial libraries and R spatial; the rocket script are part of py-rocket-base
+RUN PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin && \
+    /rocker_scripts/install_geospatial.sh
+
+# Install cwutils
+RUN cd /tmp && \
+    wget https://www.star.nesdis.noaa.gov/socd/coastwatch/cwf/cwutils-4_0_0_198-linux-x86_64.tar.gz && \
+    tar -zxf cwutils-4_0_0_198-linux-x86_64.tar.gz && \
+    rm -rf cwutils-4_0_0_198-linux-x86_64.tar.gz
+ENV PATH=${PATH}:/tmp/cwutils_4.0.0.198/bin
+ENV MANPATH=${MANPATH}:/tmp/cwutils_4.0.0.198/doc/man
+ENV INSTALL4J ADD VM PARAMS=-Dsun.java2d.uiScale=2.0
+
+# Install panoply
+RUN cd /tmp && \
+  wget --user-agent="Mozilla/5.0" https://www.giss.nasa.gov/tools/panoply/download/PanoplyJ-5.5.5.tgz && \
+  tar -zxf PanoplyJ-5.5.5.tgz && \
+  rm -rf PanoplyJ-5.5.5.tgz
+ENV PATH=${PATH}:/tmp/PanoplyJ
+
+USER ${NB_USER}
+WORKDIR ${HOME}
+

environment.yml

+
name: py-rocket-geospatial 
+# 2024-11-01
+channels:
+  - conda-forge
+  - nodefaults
+
+dependencies:
+  # Core scientific python stack
+  - cython # optimization, C API access
+  - flox # optimization, xarray operations
+  - hypothesis # needed by numpy testing tools
+  - networkx
+  - numba # high-performance numerics
+  - numpy
+  - pandas
+  - scikit-image
+  - scikit-learn
+  - scipy
+  - statsmodels
+  - pymannkendall # non-parametric Mann-Kendall trend analysis
+  - sympy
+  - xarray>=2024.05.0
+
+  # Visualization packages
+  - bokeh
+  - cartopy # geospatial plotting with matplotlib
+  - geemap
+  - geoviews
+  - hvplot
+  - ipyleaflet
+  - ipympl # This enables matplotlib interaction with jupyter widgets
+  - leafmap
+  - lonboard
+  - matplotlib
+  - plotly
+  - seaborn # statistical plotting with matplotlib
+  - cmocean # colormaps for ocean
+  - imageio # helps writing image files
+  - apng # create animation from multiple png
+  - holoviews
+  - graphviz
+  - bqplot # 2-D visualization system
+  - regionmask # create masks of (geo)spatial regions
+
+  # Machine Learning packages
+  - py-xgboost~=2.1.1=cpu*
+
+  # Geospatial data packages
+  - geopandas>=0.14.4
+  - pygmt
+  - rasterio
+  - rioxarray
+  - rasterstats # summarize geospatial raster datasets based on vector geometries
+  - pyresample # resampling geospatial image data
+  - shapely # manipulation and analysis of planar geometric objects
+  - pyproj
+  - datashader
+
+  # File formats and file management, download, dataset caching
+  - h5py
+  - h5netcdf
+  - nco
+  - pooch
+  - zarr
+  - kerchunk
+  - rechunker
+  - cftime # for decoding time units and variable values in a netCDF file
+  - h5coro # reading HDF5 data stored in S3
+  - hdf5plugin # provides HDF5 compression filters
+  - lxml # processing XML and HTML
+  - pynco # python style access to the NetCDF Operators (NCO)
+
+  # Cloud access tools and libraries
+  - awscli
+  - awscliv2
+  - boto3
+  - s3fs>=2023.6.0
+  # handle login and similar details for accessing earthdata protected data
+  # Access datasets exposed via STAC
+  - pystac-client
+  - stackstac
+  # Access datasets exposed via intake catalogs
+  - intake
+  - intake-esm>=2023.7.7
+  - intake-stac==0.4.0
+  - intake-xarray==0.6.1
+  - gcsfs>=2023.5.0
+  - certifi # Root Certificates for validating the trustworthiness of SSL certificates.
+
+  # Specific cloud access libraries
+  - copernicusmarine # get data from copernicus
+  - earthaccess>=0.11.0 # get data from nasa earth access
+  - pydap # OPeNDAP implementation
+  - erddapy # connect to erddap servers
+  - ecmwflibs # wraps some of European Centre for Medium-Range Weather Forecasts libraries
+  - harmony-py
+
+  # Distributed computing
+  - dask>=2023.12.1
+  - dask-labextension
+  - dask-geopandas
+  - coiled
+
+  # Other useful generic python packages
+  - pillow # Python imaging library, useful for many image-related tasks
+  - pytest
+  - pytest-cov
+  - pep8
+  - flake8
+  - tqdm # progress bars, with notebook support
+  - joblib # lightweight pipelining in Python
+
+  # Packages specific to climate and ocean data work
+  - esmpy
+  - xmip
+  - spectral # pure Python module for processing hyperspectral image data
+
+  # Desktop tools
+  - qgis 
+  - pyopencl  # Maybe needed for qgis? https://github.com/conda-forge/qgis-feedstock/issues/263
+  # Resolves warning "No ICDs were found": https://github.com/CryoInTheCloud/hub-image/issues/50
+  - ocl-icd-system
+  
+  # Quarto
+  - quarto
+  - pip:
+    - xq # Apply XPath expressions to XML
+    - jupyterlab-quarto
+

install.R

+
#! /usr/local/bin/Rscript
+# install R dependencies
+
+# to match rocker/verse:4.4 used in py-rocker-base
+# look up the date that the Rocker image was created and put that
+repo <- "https://p3m.dev/cran/__linux__/jammy/2024-05-13"
+
+install.packages(c("rstac", "quarto", "aws.s3", "reticulate", "gdalcubes", "rnaturalearth"), repos=repo)
+install.packages("rnaturalearthdata", repos=repo)
+
+remotes::install_github('r-tmap/tmap', upgrade=FALSE)
+# CRAN version is out of date
+devtools::install_github("boettiger-lab/earthdatalogin")
+
+# CoastWatch required
+list.of.packages <- c("parsedate", "reshape2", "gridGraphics", "PBSmapping",   
+                      "date", "openair", "cmocean", "plotdap", "rerddapXtracto")
+install.packages(list.of.packages, repos=repo)
+

apt.txt

+
# for qgis
+libgl1-mesa-glx
+ + +
+ +
+ + +
+ + + + + \ No newline at end of file diff --git a/docs/index.html b/docs/index.html index c1bee8c..e835996 100644 --- a/docs/index.html +++ b/docs/index.html @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + diff --git a/docs/jupyter-config.html b/docs/jupyter-config.html index 3fea477..5e4eb8d 100644 --- a/docs/jupyter-config.html +++ b/docs/jupyter-config.html @@ -29,7 +29,7 @@ - + @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + @@ -806,8 +812,8 @@

6  - - 7  Developer notes + + 7  Example child images diff --git a/docs/publishing.html b/docs/publishing.html index c7c5392..d8ee990 100644 --- a/docs/publishing.html +++ b/docs/publishing.html @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + diff --git a/docs/r-packages.html b/docs/r-packages.html index e1710cd..809b4d3 100644 --- a/docs/r-packages.html +++ b/docs/r-packages.html @@ -157,17 +157,23 @@ 6  Customizing Jupyter + + diff --git a/docs/related.html b/docs/related.html index bde44b6..e94b834 100644 --- a/docs/related.html +++ b/docs/related.html @@ -7,7 +7,7 @@ -8  Related Docker Stacks – py-rocket-base documentation +9  Related Docker Stacks – py-rocket-base documentation