Skip to content

Parallel Courts Scraper #199

Parallel Courts Scraper

Parallel Courts Scraper #199

name: Parallel Courts Scraper
on:
schedule:
- cron: "15 2 * * 0"
push:
paths:
- .github/workflows/parallel-scrape.yml
jobs:
parallel-scraper:
name: Parallel Courts Scraper
runs-on: ubuntu-latest
strategy:
matrix:
chunk-number: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
steps:
- uses: actions/checkout@v2
with:
ref: master
persist-credentials: false
fetch-depth: 0
- uses: actions/setup-python@v2
with:
python-version: "3.9"
- name: Run image
uses: abatilo/[email protected]
with:
poetry-version: "1.4.2"
- name: Install dependencies
run: sudo apt-get install -y libspatialindex-dev
- name: Scrape
run: |
poetry install
poetry run gv-dashboard-data scrape-courts-portal --nprocs 10 --pid ${{ matrix.chunk-number }} --sleep 2 --debug
- name: Commit files
run: |
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
git pull origin master
git add -f gun_violence_dashboard_data/data/raw/scraped_courts_data*
git commit -m "Add courts scraper data (chunk #${{ matrix.chunk-number }})"
- name: Push changes
uses: ad-m/[email protected]
with:
branch: master
github_token: ${{ secrets.GITHUB_TOKEN }}
combine-files:
needs: parallel-scraper
name: Combine Scraping Results
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
ref: master
persist-credentials: false
fetch-depth: 0
- uses: actions/setup-python@v2
with:
python-version: "3.9"
- name: Run image
uses: abatilo/[email protected]
with:
poetry-version: "1.4.2"
- name: Install dependencies
run: sudo apt-get install -y libspatialindex-dev
- name: Combine
shell: bash -l {0}
run: |
poetry install
poetry run gv-dashboard-data finalize-courts-scraping --debug
- name: Commit files
run: |
git config --local user.email "[email protected]"
git config --local user.name "GitHub Action"
git add -f gun_violence_dashboard_data/data/raw/scraped_courts_data.json
git commit -m "Add combined courts scraper data"
- name: Push changes
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}