Try this?

pypi-data · Aug 2, 2023 · b751932 · b751932
1 parent c276971
commit b751932
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/.github/workflows/unique_python_files.yml b/.github/workflows/unique_python_files.yml
@@ -69,7 +69,7 @@ jobs:
 
       - name: Combine
         run: |
-          poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files_combine.prql --output=parquet --memory=3 --threads=2 unique-python-files.parquet combined/*.parquet
+          poetry run pypi-data run-sql ${{ github.workspace }}/sql/unique_python_files_combine.prql --output=parquet --memory=3 --threads=2 --per-thread-output unique-python-files.parquet combined/*.parquet
 
       - name: Gets latest created release info
         id: latest_release_info

diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py
@@ -222,11 +222,11 @@ def print_thread():
             sql_obj.insert_into("temp_table")
     elif output == OutputFormat.PARQUET:
         if per_thread_output:
-            output_sql = f'COPY ({sql}) TO \'{output_file}\' (FORMAT PARQUET, COMPRESSION zstd)'
+            output_sql = f'COPY ({sql}) TO \'{output_file}\' (FORMAT PARQUET, per_thread_output true, COMPRESSION snappy)'
             print(f'\n\nper_thread_output {output_sql}\n\n\n')
             conn.execute(output_sql)
         else:
-            sql_obj.to_parquet(str(output_file), compression="snappy")
+            sql_obj.to_parquet(str(output_file), compression="zstd")
     else:
         sql_obj.to_table("temp_table")
         conn.execute(f'COPY temp_table TO \'{output_file}\' (FORMAT JSON, array TRUE)')