Skip to content

Update Data

Update Data #1039

Workflow file for this run

name: Update Data
on:
workflow_dispatch:
push:
schedule:
# https://crontab.guru/#17_6,18_*_*_*
# Times in IST(+5:30): 5:47, 11:47, 17:47, 23:47
- cron: '17 0,6,12,18 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
update:
permissions:
contents: write
name: Update data
# We run this on our own infra, to avoid cloud IP egress
runs-on: self-hosted
steps:
# This needs to come before the checkout, since checkout depends on git
- name: Install Dependencies
run: |
sudo apt-get update && sudo apt-get install --yes libnss3 nss-plugin-pem ca-certificates wget jq git build-essential unzip
sudo mkdir -p -m 755 /etc/apt/keyrings \
&& wget -qO- https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \
&& sudo chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null \
&& sudo apt update \
&& sudo apt install gh -y
wget "https://github.com/lwthiker/curl-impersonate/releases/download/v0.6.1/curl-impersonate-v0.6.1.x86_64-linux-gnu.tar.gz" -O /tmp/curl-impersonate.tar.gz
echo "fa1e1614f7ba69ccc66721a0f38be457a3647eb64c75d66974b56186e3316b12 /tmp/curl-impersonate.tar.gz" | sha256sum --check --status
sudo tar -xzf /tmp/curl-impersonate.tar.gz -C /usr/bin
- name: Cache Requests SQLite
uses: actions/cache@v4
with:
path: /home/runner/.cache/event-fetcher-cache.sqlite
key: "cache-requests-${{ github.run_id }}"
restore-keys: "cache-requests-"
- uses: actions/checkout@v4
name: Clone self repository
with:
ref: ${{ github.head_ref }}
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Setup
run: |
git config --global init.defaultBranch main
git config --global extensions.partialClone true
pip install .
- name: Fetch updates
run: make clean && make -s all
env:
ZOMATO_PUBLIC_API_KEY: ${{ secrets.ZOMATO_PUBLIC_API_KEY }}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
id: upload
with:
name: events-db
path: |
events.db
out/pvr-*
if-no-files-found: error
retention-days: 1
# Commit back some data so we can keep track of diffs
- uses: stefanzweifel/git-auto-commit-action@v5
name: Commit
with:
commit_message: |
Automatic Updates 🤖
Database URL: ${{ steps.upload.outputs.artifact-url }}
Run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
Workflow: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/workflow
commit_author: 'github-actions[bot] <github-actions[bot]@users.noreply.github.com>'
add_options: "--ignore-removal --update"
file_pattern: "out/*"
status_options: '--untracked-files=no'
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
release:
needs: update
if: ${{github.event_name == 'schedule'}}
runs-on: ubuntu-latest
name: Release
steps:
- name: fetch from artifact
uses: actions/download-artifact@v4
with:
name: events-db
path: .
- uses: actions/create-github-app-token@v1
id: app-token
with:
app-id: ${{ vars.PUBLISH_APP_ID }}
private-key: ${{ secrets.PUBLISH_PRIVATE_KEY }}
repositories: dataset
- name: publish dataset
env:
GITHUB_TOKEN: ${{ steps.app-token.outputs.token }}
run: |
export VERSION=$(date '+%Y.%-m.%-d-%H')
gh release create "v$VERSION" events.db out/*.csv --repo=blr-today/dataset --notes "blr.today dataset release. See LICENSE.txt for license information before using this dataset."
publish:
needs: update
runs-on: ubuntu-latest
name: Publish Website
steps:
- name: trigger build on netlify
run: curl -X POST -d '{}' "https://api.netlify.com/build_hooks/${BUILD_HOOK_SECRET}"
env:
BUILD_HOOK_SECRET: ${{secrets.NETLIFY_BUILD_HOOK_SECRET}}