diff --git a/movielens/ML100K/.gitignore b/movielens/ML100K/.gitignore index 096baa3..ad3fc18 100644 --- a/movielens/ML100K/.gitignore +++ b/movielens/ML100K/.gitignore @@ -3,3 +3,4 @@ /ratings.duckdb /ratings.parquet /stats.duckdb +/run-summary.csv diff --git a/movielens/ML100K/dvc.lock b/movielens/ML100K/dvc.lock index c178fdb..f329567 100644 --- a/movielens/ML100K/dvc.lock +++ b/movielens/ML100K/dvc.lock @@ -427,13 +427,73 @@ stages: size: 25932 nfiles: 4 collect-metrics: - cmd: lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql - runs + cmd: lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet + -L runs/manifest.csv + deps: + - path: runs/random-default/Bias + hash: md5 + md5: 9f6295c785d22b2c36d339ab9daa2891.dir + size: 43468 + nfiles: 5 + - path: runs/random-default/BiasedMF-ALS + hash: md5 + md5: 0af5c691df7f1bc57bf25d5e433186d7.dir + size: 36568 + nfiles: 5 + - path: runs/random-default/IKNN-Explicit + hash: md5 + md5: e5cd24870df4aeebf387843c0d0b7cf1.dir + size: 35537 + nfiles: 5 + - path: runs/random-default/IKNN-Implicit + hash: md5 + md5: b9f96e7d3faa3c845118148fb8437adc.dir + size: 23551 + nfiles: 4 + - path: runs/random-default/ImplicitMF-ALS + hash: md5 + md5: 2da1f95cee4cc680f3fdd49030cf1bcf.dir + size: 25932 + nfiles: 4 + - path: runs/random-default/Popular + hash: md5 + md5: 4ef404da31e15763114100030c890bcc.dir + size: 27073 + nfiles: 4 + - path: runs/random-sweep-best/Bias + hash: md5 + md5: dbf333689ed99ec5a0de83c66a078593.dir + size: 34115 + nfiles: 5 + - path: runs/random-sweep-best/BiasedMF-ALS + hash: md5 + md5: 3428267f151673f5ac3dff8ff52b17f7.dir + size: 34027 + nfiles: 5 + - path: runs/random-sweep-best/IKNN-Explicit + hash: md5 + md5: d4e21d6cd56ecb23c8ea0b2a40902391.dir + size: 33479 + nfiles: 5 + - path: runs/random-sweep-best/IKNN-Implicit + hash: md5 + md5: e7c0bbe881053c4a1f9768fc2a7d79d2.dir + size: 23550 + nfiles: 4 + - path: runs/random-sweep-best/ImplicitMF-ALS + hash: md5 + md5: 1bd6f88533d82bc54c68f53333bd5d90.dir + size: 23478 + nfiles: 4 outs: - - path: run-metrics.duckdb + - path: run-summary.csv + hash: md5 + md5: f40f42c11efa89f222c4e7e6ccf02093 + size: 8599 + - path: run-user-metrics.parquet hash: md5 - md5: 073567bfe9ab23edc69ba21e48956b7f - size: 1060864 + md5: f813ef76eec647fbad5b38055b8ce5f1 + size: 166085 run-random-sweep-best-IKNN-Implicit: cmd: lenskit-codex generate --param-file=sweeps/random/IKNN-Implicit.json --test-part=-0 --split=splits/random.toml -o runs/random-sweep-best/IKNN-Implicit IKNN-Implicit @@ -619,8 +679,8 @@ stages: outs: - path: runs/random-default/Popular hash: md5 - md5: e5ae7480ef8451e18d6f251ec577125c.dir - size: 22145 + md5: 4ef404da31e15763114100030c890bcc.dir + size: 27073 nfiles: 4 run-random-default-Implicit-BPR: cmd: python ../../action.py generate --default --test-part=-0 --assignments=splits/random.duckdb diff --git a/movielens/ML100K/dvc.yaml b/movielens/ML100K/dvc.yaml index a58ca70..ff48d93 100644 --- a/movielens/ML100K/dvc.yaml +++ b/movielens/ML100K/dvc.yaml @@ -223,7 +223,19 @@ stages: - sweeps/random/ImplicitMF-ALS.json collect-metrics: cmd: >- - lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql runs - deps: [] + lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet -L runs/manifest.csv + deps: + - runs/random-default/Bias + - runs/random-sweep-best/Bias + - runs/random-default/Popular + - runs/random-default/BiasedMF-ALS + - runs/random-sweep-best/BiasedMF-ALS + - runs/random-default/IKNN-Explicit + - runs/random-sweep-best/IKNN-Explicit + - runs/random-default/IKNN-Implicit + - runs/random-sweep-best/IKNN-Implicit + - runs/random-default/ImplicitMF-ALS + - runs/random-sweep-best/ImplicitMF-ALS outs: - - run-metrics.duckdb + - run-summary.csv + - run-user-metrics.parquet diff --git a/movielens/ML10M/dvc.yaml b/movielens/ML10M/dvc.yaml index 63c80df..ee42a5c 100644 --- a/movielens/ML10M/dvc.yaml +++ b/movielens/ML10M/dvc.yaml @@ -212,7 +212,19 @@ stages: - sweeps/temporal/ImplicitMF-ALS.json collect-metrics: cmd: >- - lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql runs - deps: [] + lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet -L runs/manifest.csv + deps: + - runs/temporal-default/Bias + - runs/temporal-sweep-best/Bias + - runs/temporal-default/Popular + - runs/temporal-default/BiasedMF-ALS + - runs/temporal-sweep-best/BiasedMF-ALS + - runs/temporal-default/IKNN-Explicit + - runs/temporal-sweep-best/IKNN-Explicit + - runs/temporal-default/IKNN-Implicit + - runs/temporal-sweep-best/IKNN-Implicit + - runs/temporal-default/ImplicitMF-ALS + - runs/temporal-sweep-best/ImplicitMF-ALS outs: - - run-metrics.duckdb + - run-summary.csv + - run-user-metrics.parquet diff --git a/movielens/ML1M/dvc.yaml b/movielens/ML1M/dvc.yaml index 4c30e45..73bb096 100644 --- a/movielens/ML1M/dvc.yaml +++ b/movielens/ML1M/dvc.yaml @@ -223,7 +223,19 @@ stages: - sweeps/random/ImplicitMF-ALS.json collect-metrics: cmd: >- - lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql runs - deps: [] + lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet -L runs/manifest.csv + deps: + - runs/random-default/Bias + - runs/random-sweep-best/Bias + - runs/random-default/Popular + - runs/random-default/BiasedMF-ALS + - runs/random-sweep-best/BiasedMF-ALS + - runs/random-default/IKNN-Explicit + - runs/random-sweep-best/IKNN-Explicit + - runs/random-default/IKNN-Implicit + - runs/random-sweep-best/IKNN-Implicit + - runs/random-default/ImplicitMF-ALS + - runs/random-sweep-best/ImplicitMF-ALS outs: - - run-metrics.duckdb + - run-summary.csv + - run-user-metrics.parquet diff --git a/movielens/ML20M/dvc.yaml b/movielens/ML20M/dvc.yaml index cfb814e..afce232 100644 --- a/movielens/ML20M/dvc.yaml +++ b/movielens/ML20M/dvc.yaml @@ -212,7 +212,19 @@ stages: - sweeps/temporal/ImplicitMF-ALS.json collect-metrics: cmd: >- - lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql runs - deps: [] + lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet -L runs/manifest.csv + deps: + - runs/temporal-default/Bias + - runs/temporal-sweep-best/Bias + - runs/temporal-default/Popular + - runs/temporal-default/BiasedMF-ALS + - runs/temporal-sweep-best/BiasedMF-ALS + - runs/temporal-default/IKNN-Explicit + - runs/temporal-sweep-best/IKNN-Explicit + - runs/temporal-default/IKNN-Implicit + - runs/temporal-sweep-best/IKNN-Implicit + - runs/temporal-default/ImplicitMF-ALS + - runs/temporal-sweep-best/ImplicitMF-ALS outs: - - run-metrics.duckdb + - run-summary.csv + - run-user-metrics.parquet diff --git a/movielens/ML25M/dvc.yaml b/movielens/ML25M/dvc.yaml index 6a6754b..f1ebda6 100644 --- a/movielens/ML25M/dvc.yaml +++ b/movielens/ML25M/dvc.yaml @@ -212,7 +212,19 @@ stages: - sweeps/temporal/ImplicitMF-ALS.json collect-metrics: cmd: >- - lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql runs - deps: [] + lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet -L runs/manifest.csv + deps: + - runs/temporal-default/Bias + - runs/temporal-sweep-best/Bias + - runs/temporal-default/Popular + - runs/temporal-default/BiasedMF-ALS + - runs/temporal-sweep-best/BiasedMF-ALS + - runs/temporal-default/IKNN-Explicit + - runs/temporal-sweep-best/IKNN-Explicit + - runs/temporal-default/IKNN-Implicit + - runs/temporal-sweep-best/IKNN-Implicit + - runs/temporal-default/ImplicitMF-ALS + - runs/temporal-sweep-best/ImplicitMF-ALS outs: - - run-metrics.duckdb + - run-summary.csv + - run-user-metrics.parquet diff --git a/movielens/ML32M/dvc.yaml b/movielens/ML32M/dvc.yaml index fe775e1..8264740 100644 --- a/movielens/ML32M/dvc.yaml +++ b/movielens/ML32M/dvc.yaml @@ -212,7 +212,19 @@ stages: - sweeps/temporal/ImplicitMF-ALS.json collect-metrics: cmd: >- - lenskit-codex collect metrics run-metrics.duckdb --view-script=../ml-run-metrics.sql runs - deps: [] + lenskit-codex collect metrics -S run-summary.csv -U run-user-metrics.parquet -L runs/manifest.csv + deps: + - runs/temporal-default/Bias + - runs/temporal-sweep-best/Bias + - runs/temporal-default/Popular + - runs/temporal-default/BiasedMF-ALS + - runs/temporal-sweep-best/BiasedMF-ALS + - runs/temporal-default/IKNN-Explicit + - runs/temporal-sweep-best/IKNN-Explicit + - runs/temporal-default/IKNN-Implicit + - runs/temporal-sweep-best/IKNN-Implicit + - runs/temporal-default/ImplicitMF-ALS + - runs/temporal-sweep-best/ImplicitMF-ALS outs: - - run-metrics.duckdb + - run-summary.csv + - run-user-metrics.parquet diff --git a/movielens/pipeline.ts b/movielens/pipeline.ts index 8139c12..0299cf7 100644 --- a/movielens/pipeline.ts +++ b/movielens/pipeline.ts @@ -8,7 +8,7 @@ import { action_cmd, Pipeline, Stage } from "../src/dvc.ts"; import { mlRuns } from "./pipe-run.ts"; import { mlSweep } from "./pipe-sweep.ts"; -import { encodeRunList, runPath, runStages } from "../src/pipeline/run.ts"; +import { encodeRunList, Run, runPath, runStages } from "../src/pipeline/run.ts"; type SplitSpec = { source: string; @@ -72,12 +72,13 @@ async function ml_pipeline(name: string): Promise { let split_stages: Record = {}; let sweep_stages: Record = {}; - let run_stages: Record = {}; + let runs: Run[] = []; for (let [split, spec] of Object.entries(splits)) { Object.assign(split_stages, mlSplit(split, spec)); Object.assign(sweep_stages, mlSweep(name, split)); - Object.assign(run_stages, runStages(`movielens/${name}`, mlRuns(split, spec))); + runs.push(...mlRuns(split, spec)); } + let run_stages = runStages(`movielens/${name}`, runs); return { stages: { @@ -90,15 +91,13 @@ async function ml_pipeline(name: string): Promise { "collect-metrics": { cmd: action_cmd( "collect metrics", - "run-metrics.duckdb", - "--view-script=../ml-run-metrics.sql", - "runs", + "-S run-summary.csv", + "-U run-user-metrics.parquet", + "-L runs/manifest.csv", ), // @ts-ignore i'm lazy - deps: Object.values(run_stages).map((s) => s.outs).flat().filter((d) => - typeof d == "string" && d.endsWith(".duckdb") - ), - outs: ["run-metrics.duckdb"], + deps: Object.values(runs).map((r) => "runs/" + runPath(r)), + outs: ["run-summary.csv", "run-user-metrics.parquet"], }, }, };