-
Notifications
You must be signed in to change notification settings - Fork 0
102 lines (88 loc) · 3.21 KB
/
unique_python_files.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# On every push this script is executed
on:
workflow_dispatch:
#concurrency: data
name: Unique Python files
jobs:
unique_python_files:
timeout-minutes: 120
runs-on: ubuntu-latest
steps:
- name: Maximize build space
uses: easimon/maximize-build-space@master
with:
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
remove-docker-images: 'true'
- name: checkout
uses: actions/checkout@v3
- name: Set up python
id: setup-python
uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
- name: Install Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Install root
run: poetry install --only-root
- name: Download links
run: |
mkdir dataset/
cat links/dataset.txt | xargs -P 5 -n 4 wget --no-verbose -P dataset/
- name: Install parallel
run: |
sudo apt-get install parallel
- name: Ingest
run: |
mkdir combined/
find dataset/ -name '*.parquet' | parallel -j 1 --xargs -n4 poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files.prql --per-thread-output --output=parquet --threads=2 combined/{#}.parquet {}
- name: List
run: ls combined/
- name: Combine
run: |
poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files_combine.prql --per-thread-output --output=parquet --memory=3 --threads=2 unique-python-files.parquet combined/*.parquet
#
# - name: Gets latest created release info
# id: latest_release_info
# uses: jossef/[email protected]
# env:
# GITHUB_TOKEN: ${{ github.token }}
#
# - name: Upload Assets
# id: upload
# uses: shogo82148/actions-upload-release-asset@v1
# with:
# upload_url: ${{ steps.latest_release_info.outputs.upload_url }}
# asset_name: unique-python-files.parquet
# asset_path: unique-python-files.parquet
# overwrite: true
#
# - name: Create download links
# run: |
# echo "${{ steps.upload.outputs.browser_download_url }}" > links/unique_python_files.txt
#
# - uses: EndBug/add-and-commit@v9
# with:
# add: 'links/unique_python_files.txt'
# author_email: "41898282+github-actions[bot]@users.noreply.github.com"
# author_name: "commit-bot"
# message: "Add only python links for asset ${{ needs.generate-matrix.outputs.release_id }}"
# push: true
# fetch: true
# pull: '--rebase --autostash'