Skip to content

Workflow file for this run

# On every push this script is executed
on:
workflow_dispatch:
push:
branches:
- main
schedule:
- cron: "0 3 * * *"
concurrency: data
name: Build data
jobs:
build-data:
runs-on: ubuntu-latest
permissions:
contents: write
packages: write
steps:
- name: checkout
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
- uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"
- name: Install deps
run: uv sync --frozen
- name: "Set current date as env variable"
run: |
echo "tag_name=$(date +'%Y-%m-%d-%H-%M')" >> $GITHUB_OUTPUT
id: version
- name: Create Release
id: create-release
uses: shogo82148/actions-create-release@v1
with:
draft: true
release_name: ${{ steps.version.outputs.tag_name }}
tag_name: ${{ steps.version.outputs.tag_name }}
commitish: ${{ github.sha }}
- name: Generate token
id: generate_token
uses: pypi-data/github-app-token@v2
with:
app_id: ${{ secrets.APP_ID }}
private_key: ${{ secrets.APP_PRIVATE_KEY }}
- name: Generate Repo Metadata
env:
GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }}
run: uv run pypi-data load-repos repos.jsonl.zst
- name: Upload Assets
uses: shogo82148/actions-upload-release-asset@v1
with:
upload_url: ${{ steps.create-release.outputs.upload_url }}
asset_path: ${{ github.workspace }}/repos.jsonl.zst
- name: Create links
run: uv run pypi-data create-links repos.jsonl.zst
- name: Remove repos.json.zst
run: rm repos.jsonl.zst
- uses: EndBug/add-and-commit@v9
with:
add: links/*
message: "Add repository URLs"
push: true
fetch: true
pull: '--rebase --autostash'
- name: Max build space
run: |
rm -rf /usr/share/dotnet/ &
rm -rf /usr/local/lib/android/ &
rm -rf /opt/ghc/ &
rm -rf /opt/hostedtoolcache/CodeQL/ &
sudo docker image prune --all --force &
- name: Create dataset
run: uv run pypi-data merge-datasets links/repositories.json dataset/
- name: Debug
run: ls -la dataset/
- name: Upload Dataset
uses: shogo82148/actions-upload-release-asset@v1
with:
upload_url: ${{ steps.create-release.outputs.upload_url }}
asset_path: ${{ github.workspace }}/dataset/merged-*.parquet