Skip to content

chore: improve scraper workflow #65

chore: improve scraper workflow

chore: improve scraper workflow #65

Workflow file for this run

name: Calendarium Scraper
on:
pull_request:
types: [opened, synchronize, reopened]
schedule:
# run every 7 days
- cron: "0 0 * * 0"
workflow_dispatch: # enables manual triggering
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Firefox
uses: browser-actions/setup-firefox@v1
- name: Firefox Version
run: firefox --version
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.11.5
- name: Install Dependencies
run: pip install -r scraper/requirements.txt
- name: Run the Scraper
run: python scraper/main.py
- name: Set up Node
uses: actions/setup-node@v3
with:
node-version: 18
- name: Format Code with Prettier
run: |
npm ci
npm run format
- name: Configure Git
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
- name: Get Most Active Contributors
id: get_contributors
run: |
# Fetch contributors using GitHub API
contributors=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/${{ github.repository }}/contributors?per_page=5")
# Extract the login names of the top contributors
echo "::set-output name=reviewers::$(echo $contributors | jq -r '.[].login' | tr '\n' ',')"
- name: Create Pull Request
uses: peter-evans/create-pull-request@v5
with:
title: "chore: update shifts data"
body: |
*This is an automated PR*
Run Calendarium Scraper in order to update shifts data.
> [!IMPORTANT]
> Before merging, please be aware of edge cases where manual intervention is needed.
commit-message: "chore: update shifts data"
branch: action/shifts
base: master
delete-branch: true
labels: add activities, automated
reviewers: ${{ steps.get_contributors.outputs.reviewers }}
token: ${{ secrets.GITHUB_TOKEN }}