From cf337d8a210be03c893c0f66f5f9ac989b978563 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 5 Sep 2023 16:54:47 -0700 Subject: [PATCH] Add GPU support to NVFlare demo --- demo/nvflare/horizontal/README.md | 6 +++--- demo/nvflare/horizontal/custom/trainer.py | 2 +- demo/nvflare/vertical/README.md | 7 ++++++- demo/nvflare/vertical/custom/trainer.py | 6 ++++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/demo/nvflare/horizontal/README.md b/demo/nvflare/horizontal/README.md index 19ac4cf4e692..7337f1720ed9 100644 --- a/demo/nvflare/horizontal/README.md +++ b/demo/nvflare/horizontal/README.md @@ -85,8 +85,8 @@ shutdown server ## Training with GPUs To demo with Federated Learning using GPUs, make sure your machine has at least 2 GPUs. -Build XGBoost with the federated learning plugin enabled along with CUDA, but with NCCL -turned off (see the [README](../../plugin/federated/README.md)). +Build XGBoost with the federated learning plugin enabled along with CUDA +(see the [README](../../plugin/federated/README.md)). -Modify `config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps +Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps above. diff --git a/demo/nvflare/horizontal/custom/trainer.py b/demo/nvflare/horizontal/custom/trainer.py index b1ec942117d8..4f20b2f39f3f 100644 --- a/demo/nvflare/horizontal/custom/trainer.py +++ b/demo/nvflare/horizontal/custom/trainer.py @@ -67,7 +67,7 @@ def _do_training(self, fl_ctx: FLContext): dtest = xgb.DMatrix('agaricus.txt.test?format=libsvm') # Specify parameters via map, definition are same as c++ version - param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} + param = {'tree_method': 'hist', 'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'} if self._use_gpus: self.log_info(fl_ctx, f'Training with GPU {rank}') param['device'] = f"cuda:{rank}" diff --git a/demo/nvflare/vertical/README.md b/demo/nvflare/vertical/README.md index f9cca57d95a6..d63b2bca432c 100644 --- a/demo/nvflare/vertical/README.md +++ b/demo/nvflare/vertical/README.md @@ -56,4 +56,9 @@ shutdown server ## Training with GPUs -Currently GPUs are not yet supported by vertical federated XGBoost. +To demo with Vertical Federated Learning using GPUs, make sure your machine has at least 2 GPUs. +Build XGBoost with the federated learning plugin enabled along with CUDA +(see the [README](../../plugin/federated/README.md)). + +Modify `../config/config_fed_client.json` and set `use_gpus` to `true`, then repeat the steps +above. diff --git a/demo/nvflare/vertical/custom/trainer.py b/demo/nvflare/vertical/custom/trainer.py index 1c235a439bc5..efe3207341c6 100644 --- a/demo/nvflare/vertical/custom/trainer.py +++ b/demo/nvflare/vertical/custom/trainer.py @@ -77,13 +77,15 @@ def _do_training(self, fl_ctx: FLContext): 'gamma': 1.0, 'max_depth': 8, 'min_child_weight': 100, - 'tree_method': 'approx', + 'tree_method': 'hist', 'grow_policy': 'depthwise', 'objective': 'binary:logistic', 'eval_metric': 'auc', } if self._use_gpus: - self.log_info(fl_ctx, 'GPUs are not currently supported by vertical federated XGBoost') + if self._use_gpus: + self.log_info(fl_ctx, f'Training with GPU {rank}') + param['device'] = f"cuda:{rank}" # specify validations set to watch performance watchlist = [(dtest, "eval"), (dtrain, "train")]