From 69c1eaa6f1a2dc4cb3b6510cb7a301b48e08a947 Mon Sep 17 00:00:00 2001
From: Tom Forbes <tom@tomforb.es>
Date: Sat, 29 Jul 2023 19:47:05 +0100
Subject: [PATCH] try fixing stats

---
 .github/workflows/stats.yml | 2 +-
 src/pypi_data/cli.py        | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/stats.yml b/.github/workflows/stats.yml
index 6894c9c0..b14a58f0 100644
--- a/.github/workflows/stats.yml
+++ b/.github/workflows/stats.yml
@@ -53,7 +53,7 @@ jobs:
       - name: Generate stats
         run: |
           cat links/dataset.txt
-          poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --no-limits $(cat links/dataset.txt)
+          poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --threads=4 $(cat links/dataset.txt)
 
       - run: |
           cp stats/totals.json t
diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py
index 5ec7d181..fd6a933c 100644
--- a/src/pypi_data/cli.py
+++ b/src/pypi_data/cli.py
@@ -189,10 +189,11 @@ def print_thread():
     elif output == OutputFormat.PARQUET:
         sql.to_parquet(str(output_file), compression="zstd")
     else:
-        df: pd.DataFrame = sql.to_df()
-        df.set_index("name", inplace=True)
-        df["stat"] = df["stat"].apply(lambda x: json.loads(x))
-        df.to_json(output_file, orient="index", lines=False, indent=2)
+        sql.to_table("temp_table")
+        # df: pd.DataFrame = sql.to_df()
+        # df.set_index("name", inplace=True)
+        # df["stat"] = df["stat"].apply(lambda x: json.loads(x))
+        # df.to_json(output_file, orient="index", lines=False, indent=2)
 
 
 if __name__ == "__main__":