From f4d2a3075e2dd93b9fa96fede8d58abb9e5abf84 Mon Sep 17 00:00:00 2001 From: Valentin LE BESCOND Date: Tue, 12 Nov 2024 16:22:28 +0100 Subject: [PATCH 1/6] add a full config with all possible entries --- full_config.yml | 200 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) create mode 100644 full_config.yml diff --git a/full_config.yml b/full_config.yml new file mode 100644 index 0000000..efdddf6 --- /dev/null +++ b/full_config.yml @@ -0,0 +1,200 @@ +## Synthetic population pipeline for Île-de-France +## based on the synpp package + +# This is the path to a directory where the pipeline can store temporary data +working_directory: cache + +# This section defines which parts of the pipeline should be run +run: + - synthesis.output # To create the output population in the output_path (see below) + #- matsim.output # Uncomment, if you want to run the full simulation (you'll need Java for that) + +# Here the configuraiton of the pipeline starts +config: + + ### Some general configuration + + ## Number of CPUs to use + processes: 4 + + ### Random seeds + + ## global random seed for the output population + random_seed: 1234 + + ## bpe specific random seed when impute missing coordinates for known IRIS + # bpe_random_seed: 0 + + ### Define sampling rate for the output population + sampling_rate: 0.001 + + ### household travel survey (HTS) + + ## Define whether to use ENTD or EGT as the HTS + hts: entd # entd, egt, edgt_lyon, edgt_44 + + ## Whether to filter people going outside of the area and other filters + # filter_hts: true + + ## if selected, chose the source for edgt_lyon + # edgt_lyon_source: unchosen # unchosen, adisp, cerema + + ### Zone selection + + ## select regions by region_id + # regions: [11] + + ## select departments by department_id + # departments: [] + + ### Output paths + + ## output folder + output_path: output + + ## output prefix, appended to file names + # output_prefix: ile_de_france_ + + ## file formats that should be exported + # output_formats: ["csv", "gpkg"] # ["csv", "gpkg", "parquet", "geoparquet"] + + ### Algorithms configurations + + ## Use the bhepop2 package for attributing income + # income_assignation_method: bhepop2 # uniform, bhepop2 + + ## Activate if you want to run mode choice, will assign a mode to output trips + mode_choice: true + + ## Statistical matching configuration + + ## Minimum number of observation to sample from + # matching_minimum_observations: 20 + + ## list of attributes to use for matching + # matching_attributes: ["sex", "any_cars", "age_class", "socioprofessional_class", "departement_id"] + + ## Use INSEE's urban type in statistical matching + # use_urban_type: true + # urban_type_path: urban_type/UU2020_au_01-01-2023.zip + # matching_attributes: ["urban_type", "*default*"] + + ## Exclude entreprise without any employee (trancheEffectifsEtablissement is NA, "NN" or "00") + # exclude_no_employee: true + + ## source for the education locations + # education_location_source: bpe # bpe, addresses + + ## max iterations for the secondary location selection algorithm + # secloc_maximum_iterations: np.inf + + ## Buffer arround buildings to capture adresses in their vicinity + # home_address_buffer: 5.0 + + ## How sample homes, using weights or not + # home_location_weight: housing # "uniform", "housing" + + # home_location_source: addresses # "addresses", "buildings", "tiles" + + ## When running matsim + + ## performing one run of the matsim simulation or not + # run_matsim: true + + ## creating the far or not + # write_jar: true + + ### Analysis configuration + + ## Whether to use previously generated files or not + # analysis_from_file: false + + ## prefix of the files to compare to + # comparison_file_prefix: other_ + + ### Tools configuration + + ## Mostly interesting if you run the simulation, or you activate the `mode_choice` option, + + ## Binaries paths + # git_binary: git + # osmosis_binary: osmosis + # java_binary: java + # maven_binary: mvn + + ## Binaries parameters + # java_memory: 14G + # maven_skip_tests: false + + ## eqasim-java parameters + # eqasim_version: 1.5.0 + # eqasim_branch: develop + # eqasim_commit: ece4932 + # eqasim_repository: https://github.com/eqasim-org/eqasim-java.git + # eqasim_path: "" + + ## pt2matsim parameters + # pt2matsim_version: 22.3 + # pt2matsim_branch: v22.3 + + ## Strategy to use in pt2matsim gtfs processing + # gtfs_date: dayWithMostServices + ## Export the detailed geometry of the network before simplification in pt2matsim + # export_detailed_network: true + + ### Input paths + + ## Absolute root path of all input data + data_path: /path/to/my/data + + # census_path: rp_2019/RP2019_INDCVI_csv.zip + # census_csv: FD_INDCVI_2019.csv + + # ban_path: ban_idf + + # bdtopo_path: bdtopo_idf + + # bpe_path: bpe_2021/bpe21_ensemble_xy_csv.zip + # bpe_csv: bpe21_ensemble_xy.csv + + # gtfs_path: gtfs_idf + + # income_com_path: filosofi_2019/indic-struct-distrib-revenu-2019-COMMUNES.zip + # income_com_xlsx: FILO2019_DISP_COM.xlsx + # income_reg_path: filosofi_2019/indic-struct-distrib-revenu-2019-SUPRA.zip + # income_reg_xlsx: FILO2019_DISP_REG.xlsx + # income_year: 19 + + # tiles_path: tiles_2019/Filosofi2019_carreaux_200m_gpkg.zip + # tiles_file: carreaux_200m_met.gpkg + + # od_pro_path: rp_2019/RP2019_MOBPRO_csv.zip + # od_sco_path: rp_2019/RP2019_MOBSCO_csv.zip + # od_pro_csv: FD_MOBPRO_2019.csv + # od_sco_csv: FD_MOBSCO_2019.csv + + # ## external education locations file + # education_file: education/education_addresses.geojson + + # osm_path: osm_idf + # osm_highways: "*" + # osm_railways: "*" + + # siren_path: sirene/StockUniteLegale_utf8.zip + # siret_path: sirene/StockEtablissement_utf8.zip + # siret_geo_path: sirene/GeolocalisationEtablissement_Sirene_pour_etudes_statistiques_utf8.zip + + # iris_path: iris_2021 + + # population_path: rp_2019/base-ic-evol-struct-pop-2019.zip + # population_xlsx: base-ic-evol-struct-pop-2019.xlsx + # population_year: 19 + + # # population projections + # projection_path: projection_2021 + # projection_scenario: 00_central + # projection_year: 2030 + + # vehicles_method: default # fleet_sample, default + # vehicles_path: vehicles + # vehicles_year: 2021 \ No newline at end of file From 3799766704b9b6089117192dd4afad7cab3ad900 Mon Sep 17 00:00:00 2001 From: Valentin LE BESCOND Date: Wed, 13 Nov 2024 10:49:14 +0100 Subject: [PATCH 2/6] feat: add Docker and Apptainer containers --- .github/workflows/docker-publish.yml | 99 ++++++++++++++++++++++++++++ containers/Dockerfile | 55 ++++++++++++++++ containers/apptainer.def | 8 +++ docs/containers.md | 36 ++++++++++ 4 files changed, 198 insertions(+) create mode 100644 .github/workflows/docker-publish.yml create mode 100644 containers/Dockerfile create mode 100644 containers/apptainer.def create mode 100644 docs/containers.md diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..748aff2 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,99 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + # schedule: + # - cron: '35 8 * * *' + push: + branches: [ "main" ] + # Publish semver tags as releases. + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "main" ] + workflow_dispatch: + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@59acb6260d9c0ba8f4a2f9d9b48431a222b68e20 #v3.5.0 + with: + cosign-release: 'v2.2.4' + + # Set up BuildKit Docker container builder to be able to build + # multi-platform images and export cache + # https://github.com/docker/setup-buildx-action + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 + with: + context: ./containers + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + # Sign the resulting Docker image digest except on PRs. + # This will only write to the public Rekor transparency log when the Docker + # repository is public to avoid leaking data. If you would like to publish + # transparency data even for private images, pass --force to cosign below. + # https://github.com/sigstore/cosign + - name: Sign the published Docker image + if: ${{ github.event_name != 'pull_request' }} + env: + # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable + TAGS: ${{ steps.meta.outputs.tags }} + DIGEST: ${{ steps.build-and-push.outputs.digest }} + # This step uses the identity token to provision an ephemeral certificate + # against the sigstore community Fulcio instance. + run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST} \ No newline at end of file diff --git a/containers/Dockerfile b/containers/Dockerfile new file mode 100644 index 0000000..cd1af34 --- /dev/null +++ b/containers/Dockerfile @@ -0,0 +1,55 @@ +# Use an official Ubuntu as a parent image +FROM ubuntu:24.04 + +# Set environment variables +ENV DEBIAN_FRONTEND=noninteractive + +ARG env_path + +# Install dependencies +RUN apt-get update && apt-get install -y \ + openjdk-17-jdk \ + maven \ + python3 \ + python3-pip \ + wget \ + unzip \ + git \ + && apt-get clean + +# Install Miniconda +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p /opt/conda && \ + rm ~/miniconda.sh + +# Set path to conda +ENV PATH /opt/conda/bin:$PATH + +# Install Osmosis +RUN wget --quiet https://github.com/openstreetmap/osmosis/releases/download/0.48.3/osmosis-0.48.3.zip -O /tmp/osmosis.zip && \ + unzip /tmp/osmosis.zip -d /opt/osmosis && \ + rm /tmp/osmosis.zip && \ + ln -s /opt/osmosis/bin/osmosis /usr/local/bin/osmosis + +# Verify installations +RUN java -version && \ + mvn -version && \ + python3 --version && \ + conda --version && \ + which osmosis + +# Copy the environment.yml file into the container if env_path is set (else will create an empty directory) +COPY ${env_path} /tmp/environment.yml.tmp + +# Check if env_path is set, if not use from the repo +RUN if [ -z "$env_path" ]; then \ + cp ./environment.yml /tmp/environment.yml; \ + else \ + mv /tmp/environment.yml.tmp /tmp/environment.yml; \ + fi + +# Create the conda environment +RUN conda env create -f /tmp/environment.yml -n eqasim + +# Activate the environment +RUN echo "source activate eqasim" > ~/.bashrc diff --git a/containers/apptainer.def b/containers/apptainer.def new file mode 100644 index 0000000..9fa7d1e --- /dev/null +++ b/containers/apptainer.def @@ -0,0 +1,8 @@ +Bootstrap: docker +From: ghcr.io/eqasim-org/ile-de-france:latest + +%environment + source /opt/conda/bin/activate eqasim + +%runscript + exec python -m synpp "$@" \ No newline at end of file diff --git a/docs/containers.md b/docs/containers.md new file mode 100644 index 0000000..5a2ef6e --- /dev/null +++ b/docs/containers.md @@ -0,0 +1,36 @@ +# Containers + +Containers for running the Eqasim pipeline are located in the `containers` folder + +## Docker container + +To build the container : +`docker build -t eqasim .` + +This will pull the conda environment from the current repo. + +To build using your own environment.yml file : +`docker build --build-arg env_path=/path/to/my/environment.yml -t eqasim .` + +To run the pipeline : +```bash +docker run --rm -it \ + --mount type=bind,src=/path/to/eqasim-ile-de-france,target=/usr/local/eqasim \ + --mount type=bind,src=/path/to/eqasim-data,target=/usr/local/eqasim-data \ + ghcr.io/eqasim-org/ile-de-france:latest /bin/bash -l -c "cd /usr/local/eqasim && python -m synpp"` +``` + +where : + +- `/path/to/eqasim-ile-de-france` is the path of the [eqasim pipline](https://github.com/eqasim-org/ile-de-france) on your *host* machine. +- `/usr/local/eqasim` is going to be the path of the eqasim pipeline inside the container. +- `/path/to/eqasim-data` is the path of the data (bdtopo, hts, sirene, etc.) folder on your *host* machine. +- `/usr/local/eqasim-data` is the path of the data folder in the container. **This is the path you need to put in your `congif.yml` file** + +## Apptainer + +To build the container : +`apptainer -v -d build eqasim.sif apptainer.def` + +To run the pipeline : +`apptainer run eqasim.sif` From 16508c23edbae562f8ac1434230152ffd19dcdca Mon Sep 17 00:00:00 2001 From: Valentin LE BESCOND Date: Wed, 13 Nov 2024 10:54:35 +0100 Subject: [PATCH 3/6] chore: fix action trigger --- .github/workflows/docker-publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 748aff2..b377835 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -9,11 +9,11 @@ on: # schedule: # - cron: '35 8 * * *' push: - branches: [ "main" ] + branches: [ "develop" ] # Publish semver tags as releases. tags: [ 'v*.*.*' ] pull_request: - branches: [ "main" ] + branches: [ "develop" ] workflow_dispatch: env: From d7415077af00ac13a02e5f0a1e0ee3037bc91725 Mon Sep 17 00:00:00 2001 From: Valentin LE BESCOND Date: Wed, 13 Nov 2024 11:03:26 +0100 Subject: [PATCH 4/6] update docker --- .gitignore | 2 ++ containers/Dockerfile | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 49a1898..2385ec5 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,6 @@ data/lyon_2015 .vscode .idea +.sif + config_local_*.yml \ No newline at end of file diff --git a/containers/Dockerfile b/containers/Dockerfile index cd1af34..7636a02 100644 --- a/containers/Dockerfile +++ b/containers/Dockerfile @@ -38,14 +38,16 @@ RUN java -version && \ conda --version && \ which osmosis +COPY ./environment.yml /tmp/environment_eqasim.yml + # Copy the environment.yml file into the container if env_path is set (else will create an empty directory) -COPY ${env_path} /tmp/environment.yml.tmp +COPY ${env_path} /tmp/environment_custom.yml # Check if env_path is set, if not use from the repo RUN if [ -z "$env_path" ]; then \ - cp ./environment.yml /tmp/environment.yml; \ + mv /tmp/environment_eqasim.yml /tmp/environment.yml; \ else \ - mv /tmp/environment.yml.tmp /tmp/environment.yml; \ + mv /tmp/environment_custom.yml /tmp/environment.yml; \ fi # Create the conda environment From 5d07a81106ada14532e6d2aba34131e19e2bcc1f Mon Sep 17 00:00:00 2001 From: Valentin LE BESCOND Date: Wed, 13 Nov 2024 11:06:06 +0100 Subject: [PATCH 5/6] fix: update dockerfile --- containers/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/Dockerfile b/containers/Dockerfile index 7636a02..f0d5e01 100644 --- a/containers/Dockerfile +++ b/containers/Dockerfile @@ -38,7 +38,7 @@ RUN java -version && \ conda --version && \ which osmosis -COPY ./environment.yml /tmp/environment_eqasim.yml +COPY ../environment.yml /tmp/environment_eqasim.yml # Copy the environment.yml file into the container if env_path is set (else will create an empty directory) COPY ${env_path} /tmp/environment_custom.yml From e1e30b6bca7bfa3b85a261c5c5c8f65ee1c6d448 Mon Sep 17 00:00:00 2001 From: Valentin LE BESCOND Date: Wed, 13 Nov 2024 11:11:22 +0100 Subject: [PATCH 6/6] fix: update dockerfile --- .github/workflows/docker-publish.yml | 3 ++- containers/Dockerfile | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index b377835..b64cfbb 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -76,7 +76,8 @@ jobs: id: build-and-push uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 with: - context: ./containers + context: ./ + file: ./containers/Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/containers/Dockerfile b/containers/Dockerfile index f0d5e01..7636a02 100644 --- a/containers/Dockerfile +++ b/containers/Dockerfile @@ -38,7 +38,7 @@ RUN java -version && \ conda --version && \ which osmosis -COPY ../environment.yml /tmp/environment_eqasim.yml +COPY ./environment.yml /tmp/environment_eqasim.yml # Copy the environment.yml file into the container if env_path is set (else will create an empty directory) COPY ${env_path} /tmp/environment_custom.yml