Skip to content

Commit

Permalink
data tests (ray-project#40658)
Browse files Browse the repository at this point in the history
- Add java build option (one of the test required java build)
- Move starting mongodb to mongo_test setup

Signed-off-by: can <[email protected]>
  • Loading branch information
can-anyscale authored Oct 27, 2023
1 parent d3567e0 commit f2f1e63
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 35 deletions.
12 changes: 0 additions & 12 deletions .buildkite/_forge.rayci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,18 +102,6 @@ steps:
wanda: ci/docker/core.build.wanda.yaml
depends_on: oss-ci-base_build

- name: data6build
wanda: ci/docker/data6.build.wanda.yaml
depends_on: oss-ci-base_ml

- name: data12build
wanda: ci/docker/data12.build.wanda.yaml
depends_on: oss-ci-base_ml

- name: datanbuild
wanda: ci/docker/datan.build.wanda.yaml
depends_on: oss-ci-base_ml

- name: servebuild
wanda: ci/docker/serve.build.wanda.yaml
depends_on: oss-ci-base_build
Expand Down
32 changes: 32 additions & 0 deletions .buildkite/data.rayci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
group: data tests
steps:
# builds
- name: data6build
wanda: ci/docker/data6.build.wanda.yaml
depends_on: oss-ci-base_ml

- name: data12build
wanda: ci/docker/data12.build.wanda.yaml
depends_on: oss-ci-base_ml

- name: datanbuild
wanda: ci/docker/datan.build.wanda.yaml
depends_on: oss-ci-base_ml

- name: datamongobuild
wanda: ci/docker/datamongo.build.wanda.yaml
depends_on: oss-ci-base_ml

# tests
- label: ":database: data: arrow 6 tests"
tags:
- python
Expand Down Expand Up @@ -61,6 +79,20 @@ steps:
depends_on: docgpubuild
job_env: forge

- label: ":database: data: integration tests"
tags:
- python
- data
instance_type: medium
commands:
- bazel run //ci/ray_ci:test_in_docker -- //python/ray/data/... data
--build-name datamongobuild
--build-type java
--only-tags data_integration
--except-tags doctest
depends_on: datamongobuild
job_env: forge

- label: ":database: data: flaky tests"
tags:
- python
Expand Down
16 changes: 0 additions & 16 deletions .buildkite/pipeline.ml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,6 @@
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only python/ray/tests/modin/...

- label: ":potable_water: Dataset datasource integration tests"
conditions: ["NO_WHEELS_REQUIRED", "RAY_CI_PYTHON_AFFECTED", "RAY_CI_DATA_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- ./ci/env/install-java.sh
# TODO(scottjlee): upgrade ARROW_VERSION to 12.* and ARROW_MONGO_VERSION to 1.0.*
- DATA_PROCESSING_TESTING=1 ARROW_VERSION=9.* ARROW_MONGO_VERSION=0.5.* ./ci/env/install-dependencies.sh
- ./ci/env/env_info.sh
- sudo apt-get purge -y mongodb*
- sudo apt-get install -y mongodb
- sudo rm -rf /var/lib/mongodb/mongod.lock
- sudo service mongodb start
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=data_integration,-doctest python/ray/data/...
- sudo service mongodb stop
- sudo apt-get purge -y mongodb*

- label: ":book: Doc tests and examples (excluding Ray AIR examples)"
# Todo: check if we can modify the examples to use Ray with fewer CPUs.
conditions:
Expand Down
27 changes: 24 additions & 3 deletions ci/docker/data.build.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# syntax=docker/dockerfile:1.3-labs

ARG DOCKER_IMAGE_BASE_BUILD=cr.ray.io/rayproject/oss-ci-base_ml
FROM $DOCKER_IMAGE_BASE_BUILD

ARG ARROW_VERSION
ARG ARROW_VERSION=
ARG ARROW_MONGO_VERSION=
ARG RAY_CI_JAVA_BUILD=

# Unset dind settings; we are using the host's docker daemon.
ENV DOCKER_TLS_CERTDIR=
Expand All @@ -13,5 +17,22 @@ SHELL ["/bin/bash", "-ice"]

COPY . .

RUN DATA_PROCESSING_TESTING=1 ARROW_VERSION=$ARROW_VERSION ./ci/env/install-dependencies.sh
RUN pip install "datasets==2.14.0"
RUN <<EOF
#!/bin/bash

DATA_PROCESSING_TESTING=1 ARROW_VERSION=$ARROW_VERSION \
ARROW_MONGO_VERSION=$ARROW_MONGO_VERSION ./ci/env/install-dependencies.sh
pip install "datasets==2.14.0"

# Install MongoDB
sudo apt-get purge -y mongodb*
sudo apt-get install -y mongodb
sudo rm -rf /var/lib/mongodb/mongod.lock

if [[ $RAY_CI_JAVA_BUILD == 1 ]]; then
# These packages increase the image size quite a bit, so we only install them
# as needed.
sudo apt-get install -y -qq maven openjdk-8-jre openjdk-8-jdk
fi

EOF
17 changes: 17 additions & 0 deletions ci/docker/datamongo.build.wanda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: "datamongobuild"
froms: ["cr.ray.io/rayproject/oss-ci-base_ml"]
dockerfile: ci/docker/data.build.Dockerfile
srcs:
- ci/env/install-dependencies.sh
- python/requirements.txt
- python/requirements_compiled.txt
- python/requirements/test-requirements.txt
- python/requirements/ml/dl-cpu-requirements.txt
- python/requirements/ml/data-requirements.txt
- python/requirements/ml/data-test-requirements.txt
build_args:
- ARROW_VERSION=9.*
- ARROW_MONGO_VERSION=0.5.*
- RAY_CI_JAVA_BUILD=1
tags:
- cr.ray.io/rayproject/datamongobuild
2 changes: 1 addition & 1 deletion ci/pipeline/determine_tests_to_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def get_commit_range():
RAY_CI_MACOS_WHEELS_AFFECTED = 1
elif (
changed_file.startswith("python/ray/data")
or changed_file == ".buildkite/pipeline.ml.yml"
or changed_file == ".buildkite/data.rayci.yml"
or changed_file == "ci/docker/data.build.Dockerfile"
or changed_file == "ci/docker/data.build.wanda.yaml"
Expand Down Expand Up @@ -185,7 +186,6 @@ def get_commit_range():
elif (
changed_file == ".buildkite/ml.rayci.yml"
or changed_file == ".buildkite/pipeline.test.yml"
or changed_file == ".buildkite/pipeline.ml.yml"
or changed_file == "ci/docker/ml.build.Dockerfile"
or changed_file == ".buildkite/pipeline.gpu.yml"
or changed_file == ".buildkite/pipeline.gpu_large.yml"
Expand Down
2 changes: 1 addition & 1 deletion ci/ray_ci/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
)
@click.option(
"--build-type",
type=click.Choice(["optimized", "debug", "asan"]),
type=click.Choice(["optimized", "debug", "asan", "java"]),
default="optimized",
)
def main(
Expand Down
3 changes: 3 additions & 0 deletions ci/ray_ci/tests.env.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ if [[ "$BUILD_TYPE" == "debug" ]]; then
elif [[ "$BUILD_TYPE" == "asan" ]]; then
pip install -v -e python/
bazel build $(./ci/run/bazel_export_options) --no//:jemalloc_flag //:ray_pkg
elif [[ "$BUILD_TYPE" == "java" ]]; then
./java/build-jar-multiplatform.sh linux
RAY_INSTALL_JAVA=1 pip install -v -e python/
else
pip install -v -e python/
fi
Expand Down
8 changes: 6 additions & 2 deletions python/ray/data/tests/test_mongo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import subprocess

import pandas as pd
import pyarrow as pa
import pytest
Expand All @@ -11,13 +13,13 @@
# To run tests locally, make sure you install mongodb
# and start a local service:
# sudo apt-get install -y mongodb
# sudo service mongodb start


@pytest.fixture
def start_mongo():
import pymongo

subprocess.check_call(["service", "mongodb", "start"])
mongo_url = "mongodb://localhost:27017"
client = pymongo.MongoClient(mongo_url)
# Make sure a clean slate for each test by dropping
Expand All @@ -26,7 +28,9 @@ def start_mongo():
# Keep the MongoDB default databases.
if db not in ("admin", "local", "config"):
client.drop_database(db)
return client, mongo_url
yield client, mongo_url

subprocess.check_call(["service", "mongodb", "stop"])


def test_read_write_mongo(ray_start_regular_shared, start_mongo):
Expand Down

0 comments on commit f2f1e63

Please sign in to comment.