-
Notifications
You must be signed in to change notification settings - Fork 0
/
dvc.lock
88 lines (88 loc) · 2.43 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
schema: '2.0'
stages:
data_ingestion:
cmd: python -m ml.comment_sentiment.ingestion
deps:
- path: ml/comment_sentiment/ingestion.py
hash: md5
md5: c46bda70cd7013476c8022cf85d694fd
size: 2509
params:
params.yaml:
dataset:
url:
https://raw.githubusercontent.com/Himanshu-1703/reddit-sentiment-analysis/refs/heads/main/data/reddit.csv
rename_columns:
clean_comment: text
category: target
train_size: 0.75
target_labels:
0: neg
1: neu
2: pos
ingestion:
processed_train_path: data/processed/train.parquet
processed_test_path: data/processed/test.parquet
outs:
- path: data/processed/test.parquet
hash: md5
md5: b59e6dbce031d40ae776b7540775d3d2
size: 651415
- path: data/processed/train.parquet
hash: md5
md5: d0f05f0713cd9c610574593ac8f67d58
size: 1966155
model_building:
cmd: python -m ml.comment_sentiment.building
deps:
- path: data/processed/train.parquet
hash: md5
md5: d0f05f0713cd9c610574593ac8f67d58
size: 1966155
- path: ml/comment_sentiment/building.py
hash: md5
md5: f16b1f3812b97a3cab80948cd048a5dd
size: 2659
params:
params.yaml:
model:
module: sklearn.ensemble
name: HistGradientBoostingClassifier
params: {}
vectorizer:
module: sklearn.feature_extraction.text
name: TfidfVectorizer
params:
max_features: 5000
outs:
- path: models/classifier.pkl
hash: md5
md5: bc1e6448aa94bb576f3017c5796ed2fe
size: 3690247
model_evaluation:
cmd: python -m ml.comment_sentiment.evaluation
deps:
- path: data/processed/test.parquet
hash: md5
md5: b59e6dbce031d40ae776b7540775d3d2
size: 651415
- path: ml/comment_sentiment/evaluation.py
hash: md5
md5: a365f24a1738ac4b01a644642071595e
size: 5113
- path: models/classifier.pkl
hash: md5
md5: bc1e6448aa94bb576f3017c5796ed2fe
size: 3690247
params:
params.yaml:
evaluation:
train_vec_path: models/train_vec_data.pkl
mlflow:
tracking_uri: ./mlruns
experiment_name: exp1-arv-testing
outs:
- path: mlflow_run_info.json
hash: md5
md5: 4e8a3b62c18edc0906f89907a2703b1b
size: 75