From d63c664ca0021fbac31cee57ff1eaa8bce3d1903 Mon Sep 17 00:00:00 2001 From: rui-ren Date: Thu, 15 Feb 2024 00:02:08 -0800 Subject: [PATCH] fix rocm ci pipeline (#19525) ### Description ROCm CI pipeline issue. ``` Downloading and preparing dataset wikitext/wikitext-2-raw-v1 (download: 4.50 MiB, generated: 12.91 MiB, post-processed: Unknown size, total: 17.41 MiB) to /home/onnxruntimedev/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20... main() File "/stage/huggingface-transformers/examples/pytorch/language-modeling/run_mlm.py", line 242, in main datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir) File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/load.py", line 856, in load_dataset builder_instance.download_and_prepare( File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/builder.py", line 583, in download_and_prepare self._download_and_prepare( File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/builder.py", line 639, in _download_and_prepare split_generators = self._split_generators(dl_manager, **split_generators_kwargs) File "/home/onnxruntimedev/.cache/huggingface/modules/datasets_modules/datasets/wikitext/aa5e094000ec7afeb74c3be92c88313cd6f132d564c7effd961c10fd47c76f20/wikitext.py", line 138, in _split_generators data_file = dl_manager.download_and_extract(self.config.data_url) File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/utils/download_manager.py", line 289, in download_and_extract return self.extract(self.download(url_or_urls)) File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/utils/download_manager.py", line 197, in download downloaded_path_or_paths = map_nested( File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/utils/py_utils.py", line 195, in map_nested return function(data_struct) File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/utils/download_manager.py", line 220, in _download return cached_path(url_or_filename, download_config=download_config) File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/utils/file_utils.py", line 281, in cached_path output_path = get_from_cache( File "/opt/miniconda/envs/rocm-ci/lib/python3.9/site-packages/datasets/utils/file_utils.py", line 634, in get_from_cache raise ConnectionError("Couldn't reach {}".format(url)) ConnectionError: Couldn't reach https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip ``` ### Motivation and Context Update the `datasets` pipeline to latest version `2.17.0`. --- tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile index 64710a982a29d..496b57b417fbd 100644 --- a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile @@ -112,7 +112,7 @@ RUN pip install \ cerberus \ sympy \ h5py \ - datasets==1.9.0 \ + datasets==2.17.0 \ requests \ sacrebleu==1.5.1 \ sacremoses \