-
-
Notifications
You must be signed in to change notification settings - Fork 8.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[pyspark] support gpu transform #9542
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -59,7 +59,7 @@ | |||||
|
||||||
import xgboost | ||||||
from xgboost import XGBClassifier | ||||||
from xgboost.compat import is_cudf_available | ||||||
from xgboost.compat import is_cudf_available, is_cupy_available | ||||||
from xgboost.core import Booster, _check_distributed_params | ||||||
from xgboost.sklearn import DEFAULT_N_ESTIMATORS, XGBModel, _can_use_qdm | ||||||
from xgboost.training import train as worker_train | ||||||
|
@@ -242,6 +242,12 @@ class _SparkXGBParams( | |||||
TypeConverters.toList, | ||||||
) | ||||||
|
||||||
def set_device(self, value: str) -> "_SparkXGBParams": | ||||||
"""Set device (cpu, cuda, gpu)""" | ||||||
assert value in ("cpu", "cuda", "gpu") | ||||||
self.set(self.device, value) | ||||||
return self | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have a check here: xgboost/python-package/xgboost/core.py Line 284 in 3b9e590
Line 95 in 3b9e590
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems _check_distributed_params just checks the type of the value, it seems we also need to restrict it to be one of (CPU ,GPU, cuda) |
||||||
@classmethod | ||||||
def _xgb_cls(cls) -> Type[XGBModel]: | ||||||
""" | ||||||
|
@@ -1193,6 +1199,31 @@ def _post_transform(self, dataset: DataFrame, pred_col: Column) -> DataFrame: | |||||
dataset = dataset.drop(pred_struct_col) | ||||||
return dataset | ||||||
|
||||||
def _gpu_transform(self) -> bool: | ||||||
"""If gpu is used to do the prediction, true to gpu prediction""" | ||||||
wbo4958 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
if _is_local(_get_spark_session().sparkContext): | ||||||
# if it's local model, we just use the internal "device" | ||||||
return use_cuda(self.getOrDefault(self.device)) | ||||||
|
||||||
gpu_per_task = ( | ||||||
_get_spark_session() | ||||||
.sparkContext.getConf() | ||||||
.get("spark.task.resource.gpu.amount") | ||||||
) | ||||||
|
||||||
# User don't set gpu configurations, just use cpu | ||||||
if gpu_per_task is None: | ||||||
if use_cuda(self.getOrDefault(self.device)): | ||||||
get_logger("XGBoost-PySpark").warning( | ||||||
"Do the prediction on the CPUs since " | ||||||
"no gpu configurations are set" | ||||||
) | ||||||
return False | ||||||
|
||||||
# User already sets the gpu configurations, we just use the internal "device". | ||||||
return use_cuda(self.getOrDefault(self.device)) | ||||||
|
||||||
def _transform(self, dataset: DataFrame) -> DataFrame: | ||||||
# pylint: disable=too-many-statements, too-many-locals | ||||||
# Save xgb_sklearn_model and predict_params to be local variable | ||||||
|
@@ -1216,21 +1247,67 @@ def _transform(self, dataset: DataFrame) -> DataFrame: | |||||
|
||||||
_, schema = self._out_schema() | ||||||
|
||||||
is_local = _is_local(_get_spark_session().sparkContext) | ||||||
run_on_gpu = self._gpu_transform() | ||||||
|
||||||
@pandas_udf(schema) # type: ignore | ||||||
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]: | ||||||
assert xgb_sklearn_model is not None | ||||||
model = xgb_sklearn_model | ||||||
|
||||||
from pyspark import TaskContext | ||||||
trivialfis marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
context = TaskContext.get() | ||||||
assert context is not None | ||||||
|
||||||
dev_ordinal = -1 | ||||||
if is_local: | ||||||
if run_on_gpu and is_cupy_available(): | ||||||
import cupy as cp # pylint: disable=import-error | ||||||
|
||||||
total_gpus = cp.cuda.runtime.getDeviceCount() | ||||||
if total_gpus > 0: | ||||||
partition_id = context.partitionId() | ||||||
# For transform local mode, default the gpu_id to (partition id) % gpus. | ||||||
trivialfis marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
dev_ordinal = partition_id % total_gpus | ||||||
elif run_on_gpu: | ||||||
dev_ordinal = _get_gpu_id(context) | ||||||
|
||||||
if dev_ordinal >= 0: | ||||||
device = "cuda:" + str(dev_ordinal) | ||||||
get_logger("XGBoost-PySpark").info( | ||||||
"Do the inference with device: %s", device | ||||||
) | ||||||
model.set_params(device=device) | ||||||
else: | ||||||
get_logger("XGBoost-PySpark").info("Do the inference on the CPUs") | ||||||
|
||||||
def to_gpu_if_possible(data: ArrayLike) -> ArrayLike: | ||||||
"""Move the data to gpu if possible""" | ||||||
if dev_ordinal >= 0: | ||||||
import cudf # pylint: disable=import-error | ||||||
trivialfis marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
import cupy as cp # pylint: disable=import-error | ||||||
|
||||||
# We must set the device after import cudf, which will change the device id to 0 | ||||||
# See https://github.com/rapidsai/cudf/issues/11386 | ||||||
cp.cuda.runtime.setDevice(dev_ordinal) # pylint: disable=I1101 | ||||||
df = cudf.DataFrame(data) | ||||||
del data | ||||||
return df | ||||||
return data | ||||||
|
||||||
for data in iterator: | ||||||
if enable_sparse_data_optim: | ||||||
X = _read_csr_matrix_from_unwrapped_spark_vec(data) | ||||||
else: | ||||||
if feature_col_names is not None: | ||||||
X = data[feature_col_names] | ||||||
tmp = data[feature_col_names] | ||||||
else: | ||||||
X = stack_series(data[alias.data]) | ||||||
tmp = stack_series(data[alias.data]) | ||||||
X = to_gpu_if_possible(tmp) | ||||||
|
||||||
if has_base_margin: | ||||||
base_margin = data[alias.margin].to_numpy() | ||||||
base_margin = to_gpu_if_possible(data[alias.margin].to_numpy()) | ||||||
trivialfis marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
else: | ||||||
base_margin = None | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test for cuDF availability originally had the comment:
which to me was weird, that's a mismanaged environment and I'm not sure it's necessary for xgboost to work around it (or even a good idea to workaround anything since users with cuDF installed might expect GPU to be used).