From 69c1eaa6f1a2dc4cb3b6510cb7a301b48e08a947 Mon Sep 17 00:00:00 2001 From: Tom Forbes Date: Sat, 29 Jul 2023 19:47:05 +0100 Subject: [PATCH] try fixing stats --- .github/workflows/stats.yml | 2 +- src/pypi_data/cli.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/stats.yml b/.github/workflows/stats.yml index 6894c9c0..b14a58f0 100644 --- a/.github/workflows/stats.yml +++ b/.github/workflows/stats.yml @@ -53,7 +53,7 @@ jobs: - name: Generate stats run: | cat links/dataset.txt - poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --no-limits $(cat links/dataset.txt) + poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --threads=4 $(cat links/dataset.txt) - run: | cp stats/totals.json t diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py index 5ec7d181..fd6a933c 100644 --- a/src/pypi_data/cli.py +++ b/src/pypi_data/cli.py @@ -189,10 +189,11 @@ def print_thread(): elif output == OutputFormat.PARQUET: sql.to_parquet(str(output_file), compression="zstd") else: - df: pd.DataFrame = sql.to_df() - df.set_index("name", inplace=True) - df["stat"] = df["stat"].apply(lambda x: json.loads(x)) - df.to_json(output_file, orient="index", lines=False, indent=2) + sql.to_table("temp_table") + # df: pd.DataFrame = sql.to_df() + # df.set_index("name", inplace=True) + # df["stat"] = df["stat"].apply(lambda x: json.loads(x)) + # df.to_json(output_file, orient="index", lines=False, indent=2) if __name__ == "__main__":