diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index d8c3ce60..68873391 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -48,6 +48,21 @@ jobs: app_id: ${{ secrets.APP_ID }} private_key: ${{ secrets.APP_PRIVATE_KEY }} + - env: + GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} + run: | + poetry run pypi-data print-git-urls > links/repositories.txt + + - uses: EndBug/add-and-commit@v9 + with: + add: 'links/repositories.txt' + author_email: "41898282+github-actions[bot]@users.noreply.github.com" + author_name: "commit-bot" + message: "Add repository URLs" + push: true + fetch: true + pull: '--rebase --autostash' + - env: GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} id: groups diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py index 89ab6a46..d5e70137 100644 --- a/src/pypi_data/cli.py +++ b/src/pypi_data/cli.py @@ -31,10 +31,10 @@ GithubToken = Annotated[str, typer.Option(envvar="GITHUB_TOKEN")] -def _get_index_urls(github: Github) -> Iterable[tuple[str]]: +def _get_urls(github: Github) -> Iterable[tuple[str, str]]: for repo in github.get_organization("pypi-data").get_repos(): if repo.name.startswith("pypi-mirror-"): - yield f"{repo.html_url}/releases/download/latest/dataset.parquet" + yield f'{repo.html_url}.git', f"{repo.html_url}/releases/download/latest/dataset.parquet" def github_client(github_token) -> Github: @@ -45,15 +45,15 @@ def github_client(github_token) -> Github: @app.command() -def print_index_urls(github_token: GithubToken): +def print_git_urls(github_token: GithubToken): g = github_client(github_token) - for url in _get_index_urls(g): + for url, _ in _get_urls(g): print(url) def group_by_size(github: Github, target_size: int) -> Iterable[list[str]]: fs = HTTPFileSystem() - urls = _get_index_urls(github) + urls = (u[1] for u in _get_urls(github)) with ThreadPoolExecutor() as pool: stat_results = pool.map(lambda url: fs.stat(url), urls)