From d78f3f685b2d0066bdbcde447c378f5d657f0d25 Mon Sep 17 00:00:00 2001 From: Tom Forbes Date: Sat, 29 Jul 2023 03:07:16 +0100 Subject: [PATCH] Revert "try this" This reverts commit 17c7fb9d --- sql/combine.sql | 5 ++++- src/pypi_data/cli.py | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sql/combine.sql b/sql/combine.sql index 97c7242e..35c4b4b1 100644 --- a/sql/combine.sql +++ b/sql/combine.sql @@ -1 +1,4 @@ -SELECT * FROM read_parquet('input/*.parquet', union_by_name=True); \ No newline at end of file +PRAGMA memory_limit='2GB'; +PRAGMA threads=4; +CREATE TABLE temp_table AS SELECT * FROM read_parquet('input/*.parquet', union_by_name=True); +COPY temp_table TO 'output.parquet' (FORMAT PARQUET, COMPRESSION zstd); \ No newline at end of file diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py index d2404499..1d30b784 100644 --- a/src/pypi_data/cli.py +++ b/src/pypi_data/cli.py @@ -93,11 +93,12 @@ def run_sql( if prql_file.name.endswith(".sql"): sql = prql_file.read_text() # Can't get it to work without doing this. So dumb. - compiled_sql = sql.replace('$1', json.dumps(parameter)) + sql = sql.replace('$1', json.dumps(parameter)) + sql = f"{sql}; COPY temp_table TO '{output_file}' (FORMAT PARQUET, COMPRESSION zstd);" + parameter = [] else: - # to-do: make this not shit.... - compiled_sql = prql.compile(prql_file.read_text(), options=options).replace('$1', json.dumps(parameter)) - sql = f"CREATE TABLE temp_table AS {compiled_sql}; COPY temp_table TO '{output_file}' (FORMAT PARQUET, COMPRESSION zstd)" + compiled_sql = prql.compile(prql_file.read_text(), options=options) + sql = f"CREATE TABLE temp_table AS {compiled_sql}; COPY temp_table TO '{output_file}' (FORMAT PARQUET, COMPRESSION zstd)" print(sql) print("\n\n\n")