Skip to content

Generate Documentation Samples #408

Generate Documentation Samples

Generate Documentation Samples #408

Workflow file for this run

# On every push this script is executed
on:
workflow_dispatch:
schedule:
- cron: "0 5 * * *"
#concurrency: data
name: Generate Documentation Samples
jobs:
generate_samples:
timeout-minutes: 120
runs-on: ubuntu-latest
env:
UV_CACHE_DIR: /tmp/.uv-cache
steps:
- name: checkout
uses: actions/checkout@v4
- name: Set up uv
# Install latest uv version using the installer
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Restore uv cache
uses: actions/cache@v4
with:
path: /tmp/.uv-cache
key: uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
restore-keys: |
uv-${{ runner.os }}-${{ hashFiles('uv.lock') }}
uv-${{ runner.os }}
- name: Install the project
run: uv sync --all-extras --dev
- name: Download links
run: |
mkdir dataset/
mkdir dataset_python_only/
cat links/dataset.txt | shuf | head -n1 | xargs -P 5 -n 4 wget --no-verbose -P dataset/
cat links/only_python_files.txt | shuf | head -n1 | xargs -P 5 -n 4 wget --no-verbose -P dataset_python_only/
- name: Generate sample
run: |
uv run pypi-data run-sql ${{ github.workspace }}/sql/random.prql stats/random_sample.json --output=json dataset/*.parquet
uv run pypi-data run-sql ${{ github.workspace }}/sql/random_unique.prql stats/random_sample_python_only.json --output=json dataset_python_only/*.parquet
- run: |
cp stats/random_sample.json t
cat t | jq '.' > stats/random_sample.json
head stats/random_sample.json
- run: |
cp stats/random_sample_python_only.json t
cat t | jq '.' > stats/random_sample_python_only.json
head stats/random_sample_python_only.json
- uses: EndBug/add-and-commit@v9
with:
add: |
stats/random_sample.json
stats/random_sample_python_only.json
author_email: "41898282+github-actions[bot]@users.noreply.github.com"
author_name: "commit-bot"
message: "Add random samples"
push: true
fetch: true
pull: '--rebase --autostash'
- name: Minimize uv cache
run: uv cache prune --ci