Skip to content

Commit

Permalink
Merge pull request #101 from ArneBinder/use_fixed_cdcp_dataset
Browse files Browse the repository at this point in the history
`cdcp` dataset: use fixed HF dataset
  • Loading branch information
ArneBinder authored Jan 29, 2024
2 parents 8072a91 + 6468338 commit a84dbda
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion dataset_builders/pie/cdcp/cdcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ class CDCP(GeneratorBasedBuilder):
}

BASE_DATASET_PATH = "DFKI-SLT/cdcp"
BASE_DATASET_REVISION = "45cf7a6d89866caa8a21c40edf335b88a725ecdb"
BASE_DATASET_REVISION = "3cf79257900b3f97e4b8f9faae2484b1a534f484"

BUILDER_CONFIGS = [datasets.BuilderConfig(name="default")]

Expand Down
5 changes: 3 additions & 2 deletions tests/dataset_builders/pie/test_cdcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@

DATASET_NAME = "cdcp"
BUILDER_CLASS = CDCP
SPLIT_SIZES = {"train": 581, "test": 150}
SPLIT_SIZES = {"train": 580, "test": 150}
HF_DATASET_PATH = CDCP.BASE_DATASET_PATH
HF_DATASET_REVISION = CDCP.BASE_DATASET_REVISION
PIE_DATASET_PATH = PIE_BASE_PATH / DATASET_NAME
DATA_PATH = FIXTURES_ROOT / "dataset_builders" / "cdcp_acl17.zip"

Expand Down Expand Up @@ -77,7 +78,7 @@ def split(request):

@pytest.fixture(scope="module")
def hf_dataset():
return load_dataset(str(HF_DATASET_PATH), data_dir=DATA_PATH)
return load_dataset(str(HF_DATASET_PATH), data_dir=DATA_PATH, revision=HF_DATASET_REVISION)


def test_hf_dataset(hf_dataset):
Expand Down

0 comments on commit a84dbda

Please sign in to comment.