Skip to content

Generate Stats

Generate Stats #470

Workflow file for this run

# On every push this script is executed
on:
workflow_dispatch:
schedule:
- cron: "0 5 * * *"
#concurrency: data
name: Generate Stats
jobs:
generate_stats:
timeout-minutes: 120
runs-on: ubuntu-latest
env:
UV_CACHE_DIR: /tmp/.uv-cache
steps:
- name: Maximize build space
uses: easimon/maximize-build-space@master
with:
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
remove-docker-images: 'true'
- name: checkout
uses: actions/checkout@v4
- name: Set up uv
# Install latest uv version using the installer
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Restore uv cache
uses: actions/cache@v4
with:
path: /tmp/.uv-cache
key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
uv-${{ runner.os }}
- name: Install the project
run: uv sync --all-extras --dev
- name: Download links
run: |
mkdir dataset/
cat links/dataset.txt | xargs -P 5 -n 4 wget --no-verbose -P dataset/
- name: Generate stats
run: |
uv run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --threads=8 dataset/*.parquet
- run: |
uv run pypi-data sort-json-stats ${{ github.workspace }}/stats/totals.json
head stats/totals.json
- uses: EndBug/add-and-commit@v9
with:
add: 'stats/totals.json'
author_email: "41898282+github-actions[bot]@users.noreply.github.com"
author_name: "commit-bot"
message: "Add stats"
push: true
fetch: true
pull: '--rebase --autostash'
- name: Minimize uv cache
run: uv cache prune --ci