Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pull] master from ray-project:master #2374

Merged
merged 15 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion .buildkite/ml.rayci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
group: ml tests
steps:
- label: ":train: ml: train tests and examples"
- label: ":train: ml: train tests"
tags: train
instance_type: large
parallelism: 2
Expand All @@ -10,3 +10,17 @@ steps:
--except-tags gpu_only,gpu,minimal,tune,doctest,needs_credentials
depends_on: mlbuild
job_env: forge

- label: ":train: ml: train gpu tests"
tags:
- train
- gpu
instance_type: gpu-large
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/train/... //python/ray/air/... //doc/... ml
--parallelism-per-worker 2
--build-name mlgpubuild
--only-tags gpu,gpu_only
--except-tags doctest
depends_on: mlgpubuild
job_env: forge
34 changes: 1 addition & 33 deletions .buildkite/pipeline.gpu_large.yml
Original file line number Diff line number Diff line change
@@ -1,26 +1,5 @@
#ci:group=Large GPU tests

- label: ":tv: :steam_locomotive: Train GPU tests "
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
- ./ci/env/install-horovod.sh
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,gpu_only,-ray_air
python/ray/train/...

- label: ":tv: :database: :steam_locomotive: Datasets Train Integration GPU Tests and Examples (Python 3.7)"
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- TRAIN_TESTING=1 DATA_PROCESSING_TESTING=1 ./ci/env/install-dependencies.sh
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=datasets_train,-doctest
doc/...

- label: ":tv: :brain: RLlib: Multi-GPU Tests"
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_RLLIB_AFFECTED"]
parallelism: 2
Expand All @@ -39,17 +18,6 @@
--test_env=RAY_USE_MULTIPROCESSING_CPU_COUNT=1
rllib/...

- label: ":tv: :airplane: AIR GPU tests (ray/air)"
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_ML_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- DATA_PROCESSING_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
- pip install -Ur ./python/requirements/ml/dl-gpu-requirements.txt
- ./ci/env/install-horovod.sh
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu
python/ray/air/... python/ray/train/...

- label: ":tv: :book: Doc GPU tests and examples"
conditions:
["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_TUNE_AFFECTED", "RAY_CI_DOC_AFFECTED"]
Expand All @@ -62,7 +30,7 @@
# TODO(amogkam): Remove when https://github.com/ray-project/ray/issues/36011
# is resolved.
- pip install -U transformers
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-timeseries_libs,-post_wheel_build,-doctest
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-timeseries_libs,-post_wheel_build,-doctest,-team:ml
doc/...

- label: ":book: Doctest (GPU)"
Expand Down
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
/src/ray/protobuf/common.proto @wuisawesome @ericl @ameerhajali @robertnishihara @pcmoritz @raulchen @ray-project/ray-core
/src/ray/protobuf/gcs.proto @wuisawesome @ericl @ameerhajali @robertnishihara @pcmoritz @raulchen @ray-project/ray-core
/src/ray/protobuf/gcs_service.proto @wuisawesome @ericl @ameerhajali @robertnishihara @pcmoritz @raulchen @ray-project/ray-core
/dashboard/modules/snapshot @wuisawesome @ijrsvt @edoakes @alanwguo @architkulkarni
/dashboard/modules/snapshot @alanwguo @nikitavemuri
/python/ray/autoscaler/_private/monitor.py @wuisawesome @DmitriGekhtman

# Autoscaler
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
*test-output.xml
/bazel-*
/python/ray/core/**/*
/python/ray/pickle5_files/
/python/ray/thirdparty_files/
/python/ray/_private/runtime_env/agent/thirdparty_files/
/python/ray/pyarrow_files/
Expand Down
9 changes: 5 additions & 4 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ load("@rules_cc//cc:defs.bzl", "cc_proto_library")
load("@com_github_grpc_grpc//bazel:cc_grpc_library.bzl", "cc_grpc_library")
load("@com_github_grpc_grpc//bazel:cython_library.bzl", "pyx_library")
load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
load("//bazel:ray.bzl", "COPTS", "PYX_COPTS", "PYX_SRCS", "copy_to_workspace", "ray_cc_binary", "ray_cc_test", "ray_cc_library")
load("//bazel:ray.bzl", "COPTS", "PYX_COPTS", "PYX_SRCS", "copy_to_workspace", "ray_cc_binary", "ray_cc_library", "ray_cc_test")
load("@python3_9//:defs.bzl", python39 = "interpreter")

package(
Expand Down Expand Up @@ -86,7 +86,7 @@ config_setting(
],
flag_values = {
":jemalloc_flag": "true",
}
},
)

# === Begin of rpc definitions ===
Expand Down Expand Up @@ -1497,7 +1497,8 @@ ray_cc_test(
name = "metric_exporter_grpc_test",
size = "small",
srcs = [
"src/ray/stats/metric_exporter_grpc_test.cc"],
"src/ray/stats/metric_exporter_grpc_test.cc",
],
tags = [
"stats",
"team:core",
Expand Down Expand Up @@ -2367,8 +2368,8 @@ pyx_library(
),
deps = [
"//:core_worker_lib",
"//:global_state_accessor_lib",
"//:gcs_server_lib",
"//:global_state_accessor_lib",
"//:raylet_lib",
"//:redis_client",
"//:src/ray/ray_exported_symbols.lds",
Expand Down
6 changes: 2 additions & 4 deletions bazel/ray.bzl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_library_public")
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
load("@bazel_common//tools/maven:pom_file.bzl", "pom_file")
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test", "cc_binary")
load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library", "cc_test")

COPTS_WITHOUT_LOG = select({
"//:opt": ["-DBAZEL_OPT"],
Expand Down Expand Up @@ -180,7 +180,6 @@ def ray_cc_library(name, strip_include_prefix = "/src", copts = [], **kwargs):
**kwargs
)


def ray_cc_test(name, linkopts = [], copts = [], **kwargs):
cc_test(
name = name,
Expand All @@ -197,7 +196,6 @@ def ray_cc_binary(name, linkopts = [], copts = [], **kwargs):
**kwargs
)


def _filter_files_with_suffix_impl(ctx):
suffix = ctx.attr.suffix
filtered_files = [f for f in ctx.files.srcs if f.basename.endswith(suffix)]
Expand All @@ -211,7 +209,7 @@ def _filter_files_with_suffix_impl(ctx):
filter_files_with_suffix = rule(
implementation = _filter_files_with_suffix_impl,
attrs = {
"srcs": attr.label_list(allow_files=True),
"srcs": attr.label_list(allow_files = True),
"suffix": attr.string(),
},
)
14 changes: 0 additions & 14 deletions ci/ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,7 @@ prepare_docker() {

# For running Serve tests on Windows.
test_serve() {
local pathsep=":" args=()
if [ "${OSTYPE}" = msys ]; then
pathsep=";"
args+=(
python/ray/serve/...
-python/ray/serve/tests:test_cross_language # Ray java not built on Windows yet.
Expand All @@ -229,14 +227,9 @@ test_serve() {
BUILDKITE_PARALLEL_JOB_COUNT=${BUILDKITE_PARALLEL_JOB_COUNT:-'1'}
test_shard_selection=$(python ./ci/ray_ci/bazel_sharding.py --exclude_manual --index "${BUILDKITE_PARALLEL_JOB}" --count "${BUILDKITE_PARALLEL_JOB_COUNT}" "${args[@]}")

# TODO(mehrdadn): We set PYTHONPATH here to let Python find our pickle5 under pip install -e.
# It's unclear to me if this should be necessary, but this is to make tests run for now.
# Check why this issue doesn't arise on Linux/Mac.
# Ideally importing ray.cloudpickle should import pickle5 automatically.
# shellcheck disable=SC2046,SC2086
bazel test --config=ci \
--build_tests_only $(./ci/run/bazel_export_options) \
--test_env=PYTHONPATH="${PYTHONPATH-}${pathsep}${WORKSPACE_DIR}/python/ray/pickle5_files" \
--test_env=CI="1" \
--test_env=RAY_CI_POST_WHEEL_TESTS="1" \
--test_env=USERPROFILE="${USERPROFILE}" \
Expand All @@ -248,9 +241,7 @@ test_serve() {

# For running Python tests on Windows (excluding Serve).
test_python() {
local pathsep=":" args=()
if [ "${OSTYPE}" = msys ]; then
pathsep=";"
args+=(
python/ray/tests/...
python/ray/train:test_windows
Expand Down Expand Up @@ -289,14 +280,9 @@ test_python() {
BUILDKITE_PARALLEL_JOB_COUNT=${BUILDKITE_PARALLEL_JOB_COUNT:-'1'}
test_shard_selection=$(python ./ci/ray_ci/bazel_sharding.py --exclude_manual --index "${BUILDKITE_PARALLEL_JOB}" --count "${BUILDKITE_PARALLEL_JOB_COUNT}" "${args[@]}")

# TODO(mehrdadn): We set PYTHONPATH here to let Python find our pickle5 under pip install -e.
# It's unclear to me if this should be necessary, but this is to make tests run for now.
# Check why this issue doesn't arise on Linux/Mac.
# Ideally importing ray.cloudpickle should import pickle5 automatically.
# shellcheck disable=SC2046,SC2086
bazel test --config=ci \
--build_tests_only $(./ci/run/bazel_export_options) \
--test_env=PYTHONPATH="${PYTHONPATH-}${pathsep}${WORKSPACE_DIR}/python/ray/pickle5_files" \
--test_env=CI="1" \
--test_env=RAY_CI_POST_WHEEL_TESTS="1" \
--test_env=USERPROFILE="${USERPROFILE}" \
Expand Down
1 change: 0 additions & 1 deletion ci/env/install-minimal.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ MINIMAL_INSTALL=1 PYTHON=${PYTHON_VERSION} "${WORKSPACE_DIR}/ci/env/install-depe

# Re-install Ray wheels
rm -rf "${WORKSPACE_DIR}/python/ray/thirdparty_files"
rm -rf "${WORKSPACE_DIR}/python/ray/pickle5_files"
eval "${WORKSPACE_DIR}/ci/ci.sh build"

# Install test requirements
Expand Down
1 change: 1 addition & 0 deletions ci/ray_ci/core.tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ flaky_tests:
- //python/ray/tests:test_threaded_actor
- //python/ray/tests:test_unhandled_error
- //python/ray/tests:test_state_api_log
- //python/ray/tests:test_ray_init_2
18 changes: 18 additions & 0 deletions ci/ray_ci/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@
from ci.ray_ci.tester_container import TesterContainer
from ci.ray_ci.utils import docker_login

CUDA_COPYRIGHT = """
==========
== CUDA ==
==========

CUDA Version 11.8.0

Container image Copyright (c) 2016-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

This container image and its contents are governed by the NVIDIA Deep Learning Container License.
By pulling and using the container, you accept the terms and conditions of this license:
https://developer.nvidia.com/ngc/nvidia-deep-learning-container-license

A copy of this license is made available in this container at /NGC-DL-CONTAINER-LICENSE for your convenience.
""" # noqa: E501

# Gets the path of product/tools/docker (i.e. the parent of 'common')
bazel_workspace_dir = os.environ.get("BUILD_WORKSPACE_DIRECTORY", "")

Expand Down Expand Up @@ -183,6 +199,8 @@ def _get_test_targets(
]
)
.decode("utf-8")
# CUDA image comes with a license header that we need to remove
.replace(CUDA_COPYRIGHT, "")
.strip()
.split("\n")
)
Expand Down
2 changes: 0 additions & 2 deletions dashboard/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ py_test_run_all_subdirectory(
"modules/job/tests/backwards_compatibility_scripts/test_backwards_compatibility.sh",
"modules/job/tests/backwards_compatibility_scripts/script.py",
"modules/job/tests/pip_install_test-0.5-py3-none-any.whl",
"modules/snapshot/snapshot_schema.json",
"modules/tests/test_config_files/basic_runtime_env.yaml",
] + glob([
"modules/job/tests/subprocess_driver_scripts/*.py",
Expand All @@ -59,7 +58,6 @@ py_test(
"modules/job/tests/backwards_compatibility_scripts/test_backwards_compatibility.sh",
"modules/job/tests/backwards_compatibility_scripts/script.py",
"modules/job/tests/pip_install_test-0.5-py3-none-any.whl",
"modules/snapshot/snapshot_schema.json",
"modules/tests/test_config_files/basic_runtime_env.yaml",
] + glob([
"modules/job/tests/subprocess_driver_scripts/*.py",
Expand Down
22 changes: 22 additions & 0 deletions dashboard/modules/serve/serve_rest_api_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,11 @@ async def get_version(self, req: Request) -> Response:
async def get_all_deployments(self, req: Request) -> Response:
from ray.serve.schema import ServeApplicationSchema

logger.warning(
"The Serve REST API endpoint `GET /api/serve/deployments/` is "
"deprecated. Please switch to using `GET /api/serve/applications/`. "
)

controller = await self.get_serve_controller()

if controller is None:
Expand Down Expand Up @@ -177,6 +182,11 @@ async def get_serve_instance_details(self, req: Request) -> Response:
async def get_all_deployment_statuses(self, req: Request) -> Response:
from ray.serve.schema import _serve_status_to_schema, ServeStatusSchema

logger.warning(
"The Serve REST API endpoint `GET /api/serve/deployments/status` is "
"deprecated. Please switch to using `GET /api/serve/applications/`. "
)

controller = await self.get_serve_controller()

if controller is None:
Expand Down Expand Up @@ -204,6 +214,11 @@ async def get_all_deployment_statuses(self, req: Request) -> Response:
async def delete_serve_application(self, req: Request) -> Response:
from ray import serve

logger.warning(
"The Serve REST API endpoint `DELETE /api/serve/deployments/` is "
"deprecated. Please switch to using `DELETE /api/serve/applications/`. "
)

if await self.get_serve_controller() is not None:
serve.shutdown()

Expand All @@ -228,6 +243,13 @@ async def put_all_deployments(self, req: Request) -> Response:
from ray.serve._private.constants import MULTI_APP_MIGRATION_MESSAGE
from ray._private.usage.usage_lib import TagKey, record_extra_usage_tag

logger.warning(
"The Serve REST API endpoint `PUT /api/serve/deployments/` and the "
"single-application config format is deprecated. Please switch to "
"using `PUT /api/serve/applications/` and the multi-application config "
"format."
)

try:
config = ServeApplicationSchema.parse_obj(await req.json())
except ValidationError as e:
Expand Down
Loading