diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml index 2af697d8..538a5af6 100644 --- a/.github/workflows/run.yml +++ b/.github/workflows/run.yml @@ -52,10 +52,13 @@ jobs: GITHUB_TOKEN: ${{ steps.generate_token.outputs.token }} run: | poetry run pypi-data print-git-urls > links/repositories.txt + poetry run pypi-data print-git-urls --ssh-urls > links/repositories_ssh.txt - uses: EndBug/add-and-commit@v9 with: - add: 'links/repositories.txt' + add: | + links/repositories.txt + links/repositories_ssh.txt author_email: "41898282+github-actions[bot]@users.noreply.github.com" author_name: "commit-bot" message: "Add repository URLs" diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py index 89898d9d..6d6e1515 100644 --- a/src/pypi_data/cli.py +++ b/src/pypi_data/cli.py @@ -31,10 +31,10 @@ GithubToken = Annotated[str, typer.Option(envvar="GITHUB_TOKEN")] -def _get_urls(github: Github) -> Iterable[tuple[str, str]]: +def _get_urls(github: Github) -> Iterable[tuple[str, str, str]]: for repo in github.get_organization("pypi-data").get_repos(): if repo.name.startswith("pypi-mirror-"): - yield f'{repo.html_url}.git', f"{repo.html_url}/releases/download/latest/dataset.parquet" + yield f'{repo.html_url}.git', repo.ssh_url, f"{repo.html_url}/releases/download/latest/dataset.parquet" def github_client(github_token) -> Github: @@ -45,15 +45,18 @@ def github_client(github_token) -> Github: @app.command() -def print_git_urls(github_token: GithubToken): +def print_git_urls(github_token: GithubToken, ssh_urls: bool = False): g = github_client(github_token) - for url, _ in _get_urls(g): - print(url) + for https_url, ssh_url, _ in _get_urls(g): + if ssh_urls: + print(ssh_url) + else: + print(https_url) def group_by_size(github: Github, target_size: int) -> Iterable[list[tuple[int, str]]]: fs = HTTPFileSystem() - urls = (u[1] for u in _get_urls(github)) + urls = (u[2] for u in _get_urls(github)) with ThreadPoolExecutor() as pool: stat_results = pool.map(lambda url: fs.stat(url), urls)