Skip to content

Commit

Permalink
Migrate training storage from SAS to managed identity (#20618)
Browse files Browse the repository at this point in the history
### Description
orttrainingtestdatascus has only save mnist whose size is only 64M in
Azure File
To meet security requirements and reduce maintenance cost, move the test
data to lotusscus and saved in Azure blob.
  • Loading branch information
mszhanyi authored May 9, 2024
1 parent 768c793 commit 5a18818
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ stages:
clean: true
submodules: recursive

- template: templates/jobs/download_training_test_data.yml

- template: templates/run-docker-build-steps.yml
parameters:
RunDockerBuildArgs: |
Expand All @@ -58,10 +60,6 @@ stages:
-e
DisplayName: 'Build'

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()

# Entry point for all ORTModule distributed tests
# Refer to orttraining/orttraining/test/python/how_to_add_ortmodule_distributed_ci_pipeline_tests.md for guidelines on how to add new tests to this pipeline.
- script: |
Expand All @@ -71,7 +69,7 @@ stages:
--rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--volume /mnist:/mnist \
--volume $(Agent.TempDirectory)/mnist:/mnist \
onnxruntime_ortmodule_distributed_tests_image \
bash -c "rm -rf /build/RelWithDebInfo/onnxruntime/ && python3 -m pip install /build/RelWithDebInfo/dist/onnxruntime*.whl && python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install && /build/RelWithDebInfo/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_distributed_tests.py --mnist /mnist' --cwd /build/RelWithDebInfo" \
displayName: 'Run orttraining_ortmodule_distributed_tests.py'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
steps:
- script: |
azcopy cp --recursive https://lotusscus.blob.core.windows.net/orttrainingtestdatascus/mnist/ $(Agent.TempDirectory)
displayName: 'Download Training Test Data MNIST'
- script: |
ls -al $(Agent.TempDirectory)/mnist
displayName: 'Print contents of Training Test Data MNIST'
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,7 @@ parameters:

steps:

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed()

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
displayName: 'Mount hf-models-cache'
condition: succeededOrFailed()
- template: jobs/download_training_test_data.yml

# Entry point for all ORTModule tests
# The onnxruntime folder is deleted in the build directory
Expand All @@ -29,9 +19,7 @@ steps:
--rm \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory)/${{ parameters.BuildConfig }}:/build \
--volume /mnist:/mnist \
--volume /bert_data:/bert_data \
--volume /hf_models_cache:/hf_models_cache \
--volume $(Agent.TempDirectory)/mnist:/mnist \
${{ parameters.DockerImageTag }} \
bash -c "rm -rf /build/onnxruntime/ && python3 -m pip install /build/dist/onnxruntime*.whl && python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install && /build/launch_test.py --cmd_line_with_args 'python orttraining_ortmodule_tests.py --mnist /mnist --bert_data /bert_data/hf_data/glue_data/CoLA/original/raw' --cwd /build" \
displayName: 'Run orttraining_ortmodule_tests.py'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ stages:
clean: true
submodules: none

- template: jobs/download_training_test_data.yml

- template: set-python-manylinux-variables-step.yml

- template: flex-downloadPipelineArtifact.yml
Expand Down Expand Up @@ -188,21 +190,6 @@ stages:
--build-arg LD_LIBRARY_PATH_ARG=/usr/local/lib64
Repository: $(Repository)

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/mnist" -d "/mnist"
displayName: 'Mount MNIST'
condition: succeededOrFailed()
workingDirectory: $(Build.SourcesDirectory)

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/bert-data" -d "/bert_data"
displayName: 'Mount bert-data'
condition: succeededOrFailed()
workingDirectory: $(Build.SourcesDirectory)

- bash: tools/ci_build/github/linux/docker/scripts/training/azure_scale_set_vm_mount_test_data.sh -p $(orttrainingtestdatascus-storage-key) -s "//orttrainingtestdatascus.file.core.windows.net/hf-models-cache" -d "/hf_models_cache"
displayName: 'Mount hf-models-cache'
condition: succeededOrFailed()
workingDirectory: $(Build.SourcesDirectory)

- task: CmdLine@2
displayName: 'test ortmodule'
inputs:
Expand All @@ -215,9 +202,7 @@ stages:
--gpus all \
-e NVIDIA_VISIBLE_DEVICES=all \
--volume $(Build.ArtifactStagingDirectory):/build \
--volume /mnist:/mnist \
--volume /bert_data:/bert_data \
--volume /hf_models_cache:/hf_models_cache \
--volume $(Agent.TempDirectory)/MNIST:/mnist \
$(Repository) \
bash -c " $(PythonManylinuxDir)/bin/python3 -m pip install /build/Release/dist/$basefilename && $(PythonManylinuxDir)/bin/python3 -m onnxruntime.training.ortmodule.torch_cpp_extensions.install " ;
workingDirectory: $(Build.SourcesDirectory)
Expand Down

0 comments on commit 5a18818

Please sign in to comment.