diff --git a/.github/workflows/stats.yml b/.github/workflows/stats.yml index 6894c9c0..b14a58f0 100644 --- a/.github/workflows/stats.yml +++ b/.github/workflows/stats.yml @@ -53,7 +53,7 @@ jobs: - name: Generate stats run: | cat links/dataset.txt - poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --no-limits $(cat links/dataset.txt) + poetry run pypi-data run-sql ${{ github.workspace }}/sql/stats.prql stats/totals.json --output=json --threads=4 $(cat links/dataset.txt) - run: | cp stats/totals.json t diff --git a/src/pypi_data/cli.py b/src/pypi_data/cli.py index 5ec7d181..fd6a933c 100644 --- a/src/pypi_data/cli.py +++ b/src/pypi_data/cli.py @@ -189,10 +189,11 @@ def print_thread(): elif output == OutputFormat.PARQUET: sql.to_parquet(str(output_file), compression="zstd") else: - df: pd.DataFrame = sql.to_df() - df.set_index("name", inplace=True) - df["stat"] = df["stat"].apply(lambda x: json.loads(x)) - df.to_json(output_file, orient="index", lines=False, indent=2) + sql.to_table("temp_table") + # df: pd.DataFrame = sql.to_df() + # df.set_index("name", inplace=True) + # df["stat"] = df["stat"].apply(lambda x: json.loads(x)) + # df.to_json(output_file, orient="index", lines=False, indent=2) if __name__ == "__main__":