Skip to content

Generate Stats

Generate Stats #355

Workflow file for this run

# On every push this script is executed
on:
workflow_dispatch:
schedule:
- cron: "0 5 * * *"
#concurrency: data
name: Generate Stats
jobs:
generate_stats:
timeout-minutes: 120
runs-on: ubuntu-latest
steps:
- name: Maximize build space
uses: easimon/maximize-build-space@master
with:
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
remove-docker-images: 'true'
- name: checkout
uses: actions/checkout@v4
- name: Set up python
id: setup-python
uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pip'
- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Install root
run: poetry install --only-root
- name: Download links
run: |
mkdir dataset/
cat links/dataset.txt | xargs -P 5 -n 4 wget --no-verbose -P dataset/
- name: Generate stats
run: |
poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --threads=8 dataset/*.parquet
- run: |
poetry run pypi-data sort-json-stats ${{ github.workspace }}/stats/totals.json
head stats/totals.json
- uses: EndBug/add-and-commit@v9
with:
add: 'stats/totals.json'
author_email: "41898282+github-actions[bot]@users.noreply.github.com"
author_name: "commit-bot"
message: "Add stats"
push: true
fetch: true
pull: '--rebase --autostash'