Skip to content

Commit

Permalink
Feat: huggingface raw dataset with readme and instruct readme (#26)
Browse files Browse the repository at this point in the history
* feat: add push raw dataset script

* feat: add push instruct dataset script

* refactor: dvc commit raw_dataset_readme

* feat: extend scripts readme
  • Loading branch information
asawczyn authored Jun 10, 2024
1 parent d03c181 commit 2bd68c2
Show file tree
Hide file tree
Showing 9 changed files with 1,036 additions and 14 deletions.
2 changes: 2 additions & 0 deletions data/datasets/pl/readme/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/raw
/instruct
33 changes: 33 additions & 0 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -425,3 +425,36 @@ stages:
hash: md5
md5: 68b09dd0ce741e6ee1fff4e37c954fa6
size: 564
raw_dataset_readme:
cmd: jupyter nbconvert --no-input --to markdown --execute nbs/Data/02_Dataset_Description_Raw.ipynb
--output-dir data/datasets/pl/readme/raw --output README
deps:
- path: data/datasets/pl/raw
hash: md5
md5: 5dd44be2eea852bcce3d0918ff8b97da.dir
size: 10234880729
nfiles: 17
- path: nbs/Data/02_Dataset_Description_Raw.ipynb
hash: md5
md5: d3d7509d084b85676857e13a2f20b82a
size: 73872
outs:
- path: data/datasets/pl/readme/raw/
hash: md5
md5: f1d267b2829519729d5615b4a128e03b.dir
size: 473589
nfiles: 8
instruct_dataset_readme:
cmd: jupyter nbconvert --no-input --to markdown --execute nbs/Data/03_Dataset_Description_Instruct.ipynb
--output-dir data/datasets/pl/readme/instruct --output README
deps:
- path: nbs/Data/03_Dataset_Description_Instruct.ipynb
hash: md5
md5: 27e6d517445028d45e5c40b22febece4
size: 16215
outs:
- path: data/datasets/pl/readme/instruct/
hash: md5
md5: de02794df3d74d86f8610f040a17dcbe.dir
size: 144326
nfiles: 5
26 changes: 26 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
stages:
raw_dataset_readme:
cmd: >-
jupyter nbconvert
--no-input
--to markdown
--execute nbs/Data/02_Dataset_Description_Raw.ipynb
--output-dir data/datasets/pl/readme/raw
--output README
deps:
- nbs/Data/02_Dataset_Description_Raw.ipynb
- data/datasets/pl/raw
outs:
- data/datasets/pl/readme/raw/

instruct_dataset_readme:
cmd: >-
jupyter nbconvert
--no-input
--to markdown
--execute nbs/Data/03_Dataset_Description_Instruct.ipynb
--output-dir data/datasets/pl/readme/instruct
--output README
deps:
- nbs/Data/03_Dataset_Description_Instruct.ipynb
outs:
- data/datasets/pl/readme/instruct/
build_instruct_dataset:
cmd: >-
PYTHONPATH=. python scripts/dataset/build_instruct_dataset.py
Expand Down
419 changes: 419 additions & 0 deletions nbs/Data/02_Dataset_Description_Raw.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 2bd68c2

Please sign in to comment.