Skip to content

Commit

Permalink
feat: replicate crates data on bigquery (#2589)
Browse files Browse the repository at this point in the history
* add: `archive2bq` factory asset

* feat: replicate `crates` data on bigquery
  • Loading branch information
Jabolol authored Dec 5, 2024
1 parent 287561c commit a8f3b0e
Show file tree
Hide file tree
Showing 2 changed files with 427 additions and 0 deletions.
21 changes: 21 additions & 0 deletions warehouse/oso_dagster/assets/crates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from google.cloud.bigquery import SourceFormat
from ..constants import staging_bucket
from ..factories.archive2bq import Archive2BqAssetConfig, create_archive2bq_asset

crates_data = create_archive2bq_asset(
Archive2BqAssetConfig(
key_prefix="rust",
asset_name="crates",
source_url="https://static.crates.io/db-dump.tar.gz",
source_format=SourceFormat.CSV,
filter_fn=lambda file: file.endswith(".csv"),
schema_overrides={
"crates": {
"id": "INTEGER",
}
},
staging_bucket=staging_bucket,
dataset_id="crates",
deps=[],
)
)
Loading

0 comments on commit a8f3b0e

Please sign in to comment.