diff --git a/.github/workflows/unique_python_files.yml b/.github/workflows/unique_python_files.yml
index 6524406c..7134e95a 100644
--- a/.github/workflows/unique_python_files.yml
+++ b/.github/workflows/unique_python_files.yml
@@ -59,14 +59,14 @@ jobs:
         run: |
           sudo apt-get install parallel
 
-      - name: Combine
+      - name: Ingest
         run: |
           mkdir combined/
           find dataset/ -name '*.parquet' | parallel -j 1 --xargs -n2 poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files.prql --output=parquet --threads=2 combined/{#}.parquet {}
 
-      - name: Complete
+      - name: Combine
         run: |
-          poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files.prql --output=parquet --threads=2 unique-python-files.parquet combined/*.parquet
+          poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files_combine.prql --output=parquet --threads=2 unique-python-files.parquet combined/*.parquet
 
       - name: List
         run: ls combined/
diff --git a/sql/unique_python_files_combine.prql b/sql/unique_python_files_combine.prql
new file mode 100644
index 00000000..ac9baa47
--- /dev/null
+++ b/sql/unique_python_files_combine.prql
@@ -0,0 +1,10 @@
+prql target:sql.duckdb
+
+let any_value = column -> s"any_value({column})"
+
+from (read_parquet $1)
+select {hash, path}
+group {hash} (
+aggregate {
+  path = any_value(path)
+})