Skip to content

Download data

Download data #78

name: Download data
on:
schedule:
- cron: '0 6 * * 1' # run every monday at 6:00
workflow_dispatch:
jobs:
download-and-upload:
strategy:
fail-fast: false
matrix:
include:
- scraper: stop-birokraciji
script: stop_birokraciji_scraper.py
publish_dir: stop-birokraciji
- scraper: predlogi-vladi
script: proposals_scraper.py
publish_dir: predlogi-vladi
- scraper: zakoni
script: opsi_laws_scrapper.py ZAKO
publish_dir: regulations
- scraper: podzakonski-akti
script: opsi_laws_scrapper.py PRAV SKLE URED ODLO ODRE AKT_ KOLP STAT TARI POSL RESO DEKL MERI NACP USTZ STRA USTA
publish_dir: regulations
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- run: pip install -r requirements-scrap.txt
- name: Download data
run: python scripts/${{ matrix.script }}
- name: Zip CSV
run: |
mkdir data/csv
gzip -c data/${{ matrix.publish_dir }}.csv > data/csv/${{ matrix.scraper }}.csv.gz
- name: Deploy CSV file
uses: peaceiris/actions-gh-pages@v3
if: ${{ github.ref == 'refs/heads/main' }}
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./data/csv
publish_branch: gh-pages
destination_dir: podatki
keep_files: true # prevent emptying podatki directory
- name: Deploy to gh-pages branch
uses: peaceiris/actions-gh-pages@v3
if: ${{ github.ref == 'refs/heads/main' }}
with:
# GITHUB_TOKEN prevent running generate-index action at gh-pages
# this is why BIOLAB_HELPER_PAT must be used here
github_token: ${{ secrets.BIOLAB_HELPER_PAT }}
publish_dir: ./data/${{ matrix.publish_dir }}
publish_branch: gh-pages
destination_dir: podatki/${{ matrix.scraper }}