dmlc · trivialfis · Jun 18, 2024 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
@@ -40,7 +40,7 @@ enum class DataType : uint8_t {
 
 enum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 };
 
-enum class DataSplitMode : int { kRow = 0, kCol = 1, kColSecure = 2 };
+enum class DataSplitMode : int { kRow = 0, kCol = 1, kColSecure = 2, kRowSecure = 3 };
 
 /*!
  * \brief Meta information about dataset, always sit in memory.
@@ -181,16 +181,16 @@ class MetaInfo {
  void SynchronizeNumberOfColumns(Context const* ctx);
 
  /*! \brief Whether the data is split row-wise. */
- bool IsRowSplit() const {
- return data_split_mode == DataSplitMode::kRow;
- }
+ bool IsRowSplit() const { return (data_split_mode == DataSplitMode::kRow)
+ || (data_split_mode == DataSplitMode::kRowSecure); }
 
  /** @brief Whether the data is split column-wise. */
  bool IsColumnSplit() const { return (data_split_mode == DataSplitMode::kCol)
  || (data_split_mode == DataSplitMode::kColSecure); }
 
  /** @brief Whether the data is split column-wise with secure computation. */
- bool IsSecure() const { return data_split_mode == DataSplitMode::kColSecure; }
+ bool IsSecure() const { return (data_split_mode == DataSplitMode::kColSecure)
+ || (data_split_mode == DataSplitMode::kRowSecure); }
 
  /** @brief Whether this is a learning to rank data. */
  bool IsRanking() const { return !group_ptr_.empty(); }

diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h
@@ -14,6 +14,7 @@
 #include "communicator-inl.h"
 #include "xgboost/collective/result.h" // for Result
 #include "xgboost/data.h" // for MetaINfo
+#include "../processing/processor.h" // for Processor
 
 namespace xgboost::collective {
 
@@ -69,7 +70,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, void* buffer, std::si
  * @param result The HostDeviceVector storing the results.
  * @param function The function used to calculate the results.
  */
-template <typename T, typename Function>
+template <bool is_gpair, typename T, typename Function>
 void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>* result,
  Function&& function) {
  if (info.IsVerticalFederated()) {
@@ -96,8 +97,46 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>*
  }
  collective::Broadcast(&size, sizeof(std::size_t), 0);
 
- result->Resize(size);
- collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
+ if (info.IsSecure() && is_gpair) {
+ // Under secure mode, gpairs will be processed to vector and encrypt
+ // information only available on rank 0
+ std::size_t buffer_size{};
+ std::int8_t *buffer;
+ if (collective::GetRank() == 0) {
+ std::vector<double> vector_gh;
+ for (std::size_t i = 0; i < size; i++) {
+ auto gpair = result->HostVector()[i];
+ // cast from GradientPair to float pointer
+ auto gpair_ptr = reinterpret_cast<float*>(&gpair);
+ // save to vector
+ vector_gh.push_back(gpair_ptr[0]);
+ vector_gh.push_back(gpair_ptr[1]);
+ }
+ // provide the vectors to the processor interface
+ size_t size;
+ auto buf = processor_instance->ProcessGHPairs(&size, vector_gh);
+ buffer_size = size;
+ buffer = reinterpret_cast<std::int8_t *>(buf);
+ }
+
+ // broadcast the buffer size for other ranks to prepare
+ collective::Broadcast(&buffer_size, sizeof(std::size_t), 0);
+ // prepare buffer on passive parties for satisfying broadcast mpi call
+ if (collective::GetRank() != 0) {
+ buffer = reinterpret_cast<std::int8_t *>(malloc(buffer_size));
+ }
+
+ // broadcast the data buffer holding processed gpairs
+ collective::Broadcast(buffer, buffer_size, 0);
+
+ // call HandleGHPairs
+ size_t size;
+ processor_instance->HandleGHPairs(&size, buffer, buffer_size);
+ } else {
+ // clear text mode, broadcast the data directly
+ result->Resize(size);
+ collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
+ }
  } else {
  std::forward<Function>(function)();
  }

diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc
@@ -1,6 +1,7 @@
 /*!
  * Copyright 2022 XGBoost contributors
  */
+#include <map>
 #include "communicator.h"
 
 #include "comm.h"
@@ -9,14 +10,39 @@
 #include "rabit_communicator.h"
 
 #if defined(XGBOOST_USE_FEDERATED)
-#include "../../plugin/federated/federated_communicator.h"
+ #include "../../plugin/federated/federated_communicator.h"
 #endif
 
+#include "../processing/processor.h"
+processing::Processor *processor_instance;
+
 namespace xgboost::collective {
 thread_local std::unique_ptr<Communicator> Communicator::communicator_{new NoOpCommunicator()};
 thread_local CommunicatorType Communicator::type_{};
 thread_local std::string Communicator::nccl_path_{};
 
+std::map<std::string, std::string> json_to_map(xgboost::Json const& config, std::string key) {
+ auto json_map = xgboost::OptionalArg<xgboost::Object>(config, key, xgboost::JsonObject::Map{});
+ std::map<std::string, std::string> params{};
+ for (auto entry : json_map) {
+ std::string text;
+ xgboost::Value* value = &(entry.second.GetValue());
+ if (value->Type() == xgboost::Value::ValueKind::kString) {
+ text = reinterpret_cast<xgboost::String *>(value)->GetString();
+ } else if (value->Type() == xgboost::Value::ValueKind::kInteger) {
+ auto num = reinterpret_cast<xgboost::Integer *>(value)->GetInteger();
+ text = std::to_string(num);
+ } else if (value->Type() == xgboost::Value::ValueKind::kNumber) {
+ auto num = reinterpret_cast<xgboost::Number *>(value)->GetNumber();
+ text = std::to_string(num);
+ } else {
+ text = "Unsupported type ";
+ }
+ params[entry.first] = text;
+ }
+ return params;
+}
+
 void Communicator::Init(Json const& config) {
  auto nccl = OptionalArg<String>(config, "dmlc_nccl_path", std::string{DefaultNcclName()});
  nccl_path_ = nccl;
@@ -38,26 +64,46 @@ void Communicator::Init(Json const& config) {
  }
  case CommunicatorType::kFederated: {
 #if defined(XGBOOST_USE_FEDERATED)
- communicator_.reset(FederatedCommunicator::Create(config));
+ communicator_.reset(FederatedCommunicator::Create(config));
+ // Get processor configs
+ std::string plugin_name{};
+ std::string loader_params_key{};
+ std::string loader_params_map{};
+ std::string proc_params_key{};
+ std::string proc_params_map{};
+ plugin_name = OptionalArg<String>(config, "plugin_name", plugin_name);
+ // Initialize processor if plugin_name is provided
+ if (!plugin_name.empty()) {
+ std::map<std::string, std::string> loader_params = json_to_map(config, "loader_params");
+ std::map<std::string, std::string> proc_params = json_to_map(config, "proc_params");
+ processing::ProcessorLoader loader(loader_params);
+ processor_instance = loader.load(plugin_name);
+ processor_instance->Initialize(collective::GetRank() == 0, proc_params);
+ }
 #else
-  LOG(FATAL) << "XGBoost is not compiled with Federated Learning support.";
+ LOG(FATAL) << "XGBoost is not compiled with Federated Learning support.";
 #endif
- break;
- }
- case CommunicatorType::kInMemory:
- case CommunicatorType::kInMemoryNccl: {
- communicator_.reset(InMemoryCommunicator::Create(config));
- break;
- }
- case CommunicatorType::kUnknown:
- LOG(FATAL) << "Unknown communicator type.";
+ break;
+ }
+
+ case CommunicatorType::kInMemory:
+ case CommunicatorType::kInMemoryNccl: {
+ communicator_.reset(InMemoryCommunicator::Create(config));
+ break;
+ }
+ case CommunicatorType::kUnknown:
+ LOG(FATAL) << "Unknown communicator type.";
  }
 }
 
 #ifndef XGBOOST_USE_CUDA
 void Communicator::Finalize() {
  communicator_->Shutdown();
  communicator_.reset(new NoOpCommunicator());
+ if (processor_instance != nullptr) {
+ processor_instance->Shutdown();
+ processor_instance = nullptr;
+ }
 }
 #endif
 } // namespace xgboost::collective
diff --git a/src/common/quantile.cu b/src/common/quantile.cu
@@ -179,7 +179,7 @@ common::Span<thrust::tuple<uint64_t, uint64_t>> MergePath(
  merge_path.data(), [=] __device__(Tuple const &t) -> Tuple {
  auto ind = get_ind(t); // == 0 if element is from x
  // x_counter, y_counter
- return thrust::make_tuple<uint64_t, uint64_t>(!ind, ind);
+ return thrust::make_tuple(static_cast<uint64_t>(!ind), static_cast<uint64_t>(ind));
  });
 
  // Compute the index for both x and y (which of the element in a and b are used in each

diff --git a/src/data/ellpack_page.cu b/src/data/ellpack_page.cu
@@ -171,11 +171,11 @@ struct WriteCompressedEllpackFunctor {
 
  using Tuple = thrust::tuple<size_t, size_t, size_t>;
  __device__ size_t operator()(Tuple out) {
- auto e = batch.GetElement(out.get<2>());
+ auto e = batch.GetElement(thrust::get<2>(out));
  if (is_valid(e)) {
  // -1 because the scan is inclusive
  size_t output_position =
- accessor.row_stride * e.row_idx + out.get<1>() - 1;
+ accessor.row_stride * e.row_idx + thrust::get<1>(out) - 1;
  uint32_t bin_idx = 0;
  if (common::IsCat(feature_types, e.column_idx)) {
  bin_idx = accessor.SearchBin<true>(e.value, e.column_idx);
@@ -192,8 +192,8 @@ template <typename Tuple>
 struct TupleScanOp {
  __device__ Tuple operator()(Tuple a, Tuple b) {
  // Key equal
- if (a.template get<0>() == b.template get<0>()) {
- b.template get<1>() += a.template get<1>();
+ if (thrust::get<0>(a) == thrust::get<0>(b)) {
+ thrust::get<1>(b) += thrust::get<1>(a);
  return b;
  }
  // Not equal

diff --git a/src/learner.cc b/src/learner.cc
@@ -846,7 +846,7 @@ class LearnerConfiguration : public Learner {
 
  void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
  base_score->Reshape(1);
- collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
+ collective::ApplyWithLabels<false>(this->Ctx(), info, base_score->Data(),
  [&] { UsePtr(obj_)->InitEstimation(info, base_score); });
  }
 };
@@ -1472,8 +1472,9 @@ class LearnerImpl : public LearnerIO {
  void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
  std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
  out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
- collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),
- [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
+ // calculate gradient and communicate
+ collective::ApplyWithLabels<true>(&ctx_, info, out_gpair->Data(),
+ [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
  }
 
  /*! \brief random number transformation seed. */