dmlc · trivialfis · Jun 18, 2024 · Jan 31, 2024 · Jan 31, 2024 · Jan 31, 2024
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
@@ -40,7 +40,7 @@ enum class DataType : uint8_t {
 
 enum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 };
 
-enum class DataSplitMode : int { kRow = 0, kCol = 1, kColSecure = 2 };
+enum class DataSplitMode : int { kRow = 0, kCol = 1, kColSecure = 2, kRowSecure = 3 };
 
 /*!
  * \brief Meta information about dataset, always sit in memory.
@@ -181,16 +181,16 @@ class MetaInfo {
   void SynchronizeNumberOfColumns(Context const* ctx);
 
   /*! \brief Whether the data is split row-wise. */
-  bool IsRowSplit() const {
-    return data_split_mode == DataSplitMode::kRow;
-  }
+  bool IsRowSplit() const { return (data_split_mode == DataSplitMode::kRow)
+  || (data_split_mode == DataSplitMode::kRowSecure); }
 
   /** @brief Whether the data is split column-wise. */
   bool IsColumnSplit() const { return (data_split_mode == DataSplitMode::kCol)
   || (data_split_mode == DataSplitMode::kColSecure); }
 
   /** @brief Whether the data is split column-wise with secure computation. */
-  bool IsSecure() const { return data_split_mode == DataSplitMode::kColSecure; }
+  bool IsSecure() const { return (data_split_mode == DataSplitMode::kColSecure)
+  || (data_split_mode == DataSplitMode::kRowSecure); }
 
   /** @brief Whether this is a learning to rank data. */
   bool IsRanking() const { return !group_ptr_.empty(); }

diff --git a/src/collective/aggregator.h b/src/collective/aggregator.h
@@ -14,6 +14,7 @@
 #include "communicator-inl.h"
 #include "xgboost/collective/result.h"  // for Result
 #include "xgboost/data.h"               // for MetaINfo
+#include "../processing/processor.h"      // for Processor
 
 namespace xgboost::collective {
 
@@ -69,7 +70,7 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, void* buffer, std::si
  * @param result The HostDeviceVector storing the results.
  * @param function The function used to calculate the results.
  */
-template <typename T, typename Function>
+template <bool is_gpair, typename T, typename Function>
 void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>* result,
                      Function&& function) {
   if (info.IsVerticalFederated()) {
@@ -96,8 +97,46 @@ void ApplyWithLabels(Context const*, MetaInfo const& info, HostDeviceVector<T>*
     }
     collective::Broadcast(&size, sizeof(std::size_t), 0);
 
-    result->Resize(size);
-    collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
+    if (info.IsSecure() && is_gpair) {
+      // Under secure mode, gpairs will be processed to vector and encrypt
+      // information only available on rank 0
+      std::size_t buffer_size{};
+      std::int8_t *buffer;
+      if (collective::GetRank() == 0) {
+        std::vector<double> vector_gh;
+        for (std::size_t i = 0; i < size; i++) {
+          auto gpair = result->HostVector()[i];
+          // cast from GradientPair to float pointer
+          auto gpair_ptr = reinterpret_cast<float*>(&gpair);
+          // save to vector
+          vector_gh.push_back(gpair_ptr[0]);
+          vector_gh.push_back(gpair_ptr[1]);
+        }
+        // provide the vectors to the processor interface
+        size_t size;
+        auto buf = processor_instance->ProcessGHPairs(&size, vector_gh);
+        buffer_size = size;
+        buffer = reinterpret_cast<std::int8_t *>(buf);
+      }
+
+      // broadcast the buffer size for other ranks to prepare
+      collective::Broadcast(&buffer_size, sizeof(std::size_t), 0);
+      // prepare buffer on passive parties for satisfying broadcast mpi call
+      if (collective::GetRank() != 0) {
+        buffer = reinterpret_cast<std::int8_t *>(malloc(buffer_size));
+      }
+
+      // broadcast the data buffer holding processed gpairs
+      collective::Broadcast(buffer, buffer_size, 0);
+
+      // call HandleGHPairs
+      size_t size;
+      processor_instance->HandleGHPairs(&size, buffer, buffer_size);
+    } else {
+      // clear text mode, broadcast the data directly
+      result->Resize(size);
+      collective::Broadcast(result->HostPointer(), size * sizeof(T), 0);
+    }
   } else {
     std::forward<Function>(function)();
   }

diff --git a/src/collective/communicator.cc b/src/collective/communicator.cc
@@ -1,6 +1,7 @@
 /*!
  * Copyright 2022 XGBoost contributors
  */
+#include <map>
 #include "communicator.h"
 
 #include "comm.h"
@@ -9,14 +10,39 @@
 #include "rabit_communicator.h"
 
 #if defined(XGBOOST_USE_FEDERATED)
-#include "../../plugin/federated/federated_communicator.h"
+  #include "../../plugin/federated/federated_communicator.h"
 #endif
 
+#include "../processing/processor.h"
+processing::Processor *processor_instance;
+
 namespace xgboost::collective {
 thread_local std::unique_ptr<Communicator> Communicator::communicator_{new NoOpCommunicator()};
 thread_local CommunicatorType Communicator::type_{};
 thread_local std::string Communicator::nccl_path_{};
 
+std::map<std::string, std::string> json_to_map(xgboost::Json const& config, std::string key) {
+  auto json_map = xgboost::OptionalArg<xgboost::Object>(config, key, xgboost::JsonObject::Map{});
+  std::map<std::string, std::string> params{};
+  for (auto entry : json_map) {
+    std::string text;
+    xgboost::Value* value = &(entry.second.GetValue());
+    if (value->Type() == xgboost::Value::ValueKind::kString) {
+      text = reinterpret_cast<xgboost::String *>(value)->GetString();
+    } else if (value->Type() == xgboost::Value::ValueKind::kInteger) {
+      auto num = reinterpret_cast<xgboost::Integer *>(value)->GetInteger();
+      text = std::to_string(num);
+    } else if (value->Type() == xgboost::Value::ValueKind::kNumber) {
+      auto num = reinterpret_cast<xgboost::Number *>(value)->GetNumber();
+      text = std::to_string(num);
+    } else {
+      text = "Unsupported type ";
+    }
+    params[entry.first] = text;
+  }
+  return params;
+}
+
 void Communicator::Init(Json const& config) {
   auto nccl = OptionalArg<String>(config, "dmlc_nccl_path", std::string{DefaultNcclName()});
   nccl_path_ = nccl;
@@ -38,26 +64,46 @@ void Communicator::Init(Json const& config) {
     }
     case CommunicatorType::kFederated: {
 #if defined(XGBOOST_USE_FEDERATED)
-      communicator_.reset(FederatedCommunicator::Create(config));
+  communicator_.reset(FederatedCommunicator::Create(config));
+  // Get processor configs
+  std::string plugin_name{};
+  std::string loader_params_key{};
+  std::string loader_params_map{};
+  std::string proc_params_key{};
+  std::string proc_params_map{};
+  plugin_name = OptionalArg<String>(config, "plugin_name", plugin_name);
+  // Initialize processor if plugin_name is provided
+  if (!plugin_name.empty()) {
+    std::map<std::string, std::string> loader_params = json_to_map(config, "loader_params");
+    std::map<std::string, std::string> proc_params = json_to_map(config, "proc_params");
+    processing::ProcessorLoader loader(loader_params);
+    processor_instance = loader.load(plugin_name);
+    processor_instance->Initialize(collective::GetRank() == 0, proc_params);
+  }
 #else
-      LOG(FATAL) << "XGBoost is not compiled with Federated Learning support.";
+  LOG(FATAL) << "XGBoost is not compiled with Federated Learning support.";
 #endif
-      break;
-    }
-    case CommunicatorType::kInMemory:
-    case CommunicatorType::kInMemoryNccl: {
-      communicator_.reset(InMemoryCommunicator::Create(config));
-      break;
-    }
-    case CommunicatorType::kUnknown:
-      LOG(FATAL) << "Unknown communicator type.";
+    break;
+  }
+
+  case CommunicatorType::kInMemory:
+  case CommunicatorType::kInMemoryNccl: {
+    communicator_.reset(InMemoryCommunicator::Create(config));
+    break;
+  }
+  case CommunicatorType::kUnknown:
+    LOG(FATAL) << "Unknown communicator type.";
   }
 }
 
 #ifndef XGBOOST_USE_CUDA
 void Communicator::Finalize() {
   communicator_->Shutdown();
   communicator_.reset(new NoOpCommunicator());
+  if (processor_instance != nullptr) {
+    processor_instance->Shutdown();
+    processor_instance = nullptr;
+  }
 }
 #endif
 }  // namespace xgboost::collective
diff --git a/src/learner.cc b/src/learner.cc
@@ -846,7 +846,7 @@ class LearnerConfiguration : public Learner {
 
   void InitEstimation(MetaInfo const& info, linalg::Tensor<float, 1>* base_score) {
     base_score->Reshape(1);
-    collective::ApplyWithLabels(this->Ctx(), info, base_score->Data(),
+    collective::ApplyWithLabels<false>(this->Ctx(), info, base_score->Data(),
                                 [&] { UsePtr(obj_)->InitEstimation(info, base_score); });
   }
 };
@@ -1472,8 +1472,9 @@ class LearnerImpl : public LearnerIO {
   void GetGradient(HostDeviceVector<bst_float> const& preds, MetaInfo const& info,
                    std::int32_t iter, linalg::Matrix<GradientPair>* out_gpair) {
     out_gpair->Reshape(info.num_row_, this->learner_model_param_.OutputLength());
-    collective::ApplyWithLabels(&ctx_, info, out_gpair->Data(),
-                                [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
+    // calculate gradient and communicate
+    collective::ApplyWithLabels<true>(&ctx_, info, out_gpair->Data(),
+                                  [&] { obj_->GetGradient(preds, info, iter, out_gpair); });
   }
 
   /*! \brief random number transformation seed. */

diff --git a/src/processing/plugins/mock_processor.cc b/src/processing/plugins/mock_processor.cc
@@ -0,0 +1,175 @@
+/**
+ * Copyright 2014-2024 by XGBoost Contributors
+ */
+#include <iostream>
+#include <cstring>
+#include <cstdint>
+#include "./mock_processor.h"
+
+const char kSignature[] = "NVDADAM1";  // DAM (Direct Accessible Marshalling) V1
+const int64_t kPrefixLen = 24;
+
+bool ValidDam(void *buffer, std::size_t size) {
+  return size >= kPrefixLen && memcmp(buffer, kSignature, strlen(kSignature)) == 0;
+}
+
+void* MockProcessor::ProcessGHPairs(std::size_t *size, const std::vector<double>& pairs) {
+  *size = kPrefixLen + pairs.size()*10*8;  // Assume encrypted size is 10x
+
+  int64_t buf_size = *size;
+  // This memory needs to be freed
+  char *buf = static_cast<char *>(calloc(*size, 1));
+  memcpy(buf, kSignature, strlen(kSignature));
+  memcpy(buf + 8, &buf_size, 8);
+  memcpy(buf + 16, &kDataTypeGHPairs, 8);
+
+  // Simulate encryption by duplicating value 10 times
+  int index = kPrefixLen;
+  for (auto value : pairs) {
+    for (std::size_t i = 0; i < 10; i++) {
+      memcpy(buf+index, &value, 8);
+      index += 8;
+    }
+  }
+
+  // Save pairs for future operations
+  this->gh_pairs_ = new std::vector<double>(pairs);
+
+  return buf;
+}
+
+
+void* MockProcessor::HandleGHPairs(std::size_t *size, void *buffer, std::size_t buf_size) {
+  *size = buf_size;
+  if (!ValidDam(buffer, *size)) {
+    return buffer;
+  }
+
+  // For mock, this call is used to set gh_pairs for passive sites
+  if (!active_) {
+    int8_t *ptr = static_cast<int8_t *>(buffer);
+    ptr += kPrefixLen;
+    double *pairs = reinterpret_cast<double *>(ptr);
+    std::size_t num = (buf_size - kPrefixLen) / 8;
+    gh_pairs_ = new std::vector<double>();
+    for (std::size_t i = 0; i < num; i += 10) {
+      gh_pairs_->push_back(pairs[i]);
+    }
+  }
+
+  return buffer;
+}
+
+void *MockProcessor::ProcessAggregation(std::size_t *size, std::map<int, std::vector<int>> nodes) {
+  int total_bin_size = cuts_.back();
+  int histo_size = total_bin_size*2;
+  *size = kPrefixLen + 8*histo_size*nodes.size();
+  int64_t buf_size = *size;
+  int8_t *buf = static_cast<int8_t *>(calloc(buf_size, 1));
+  memcpy(buf, kSignature, strlen(kSignature));
+  memcpy(buf + 8, &buf_size, 8);
+  memcpy(buf + 16, &kDataTypeHisto, 8);
+
+  double *histo = reinterpret_cast<double *>(buf + kPrefixLen);
+  for ( const auto &node : nodes ) {
+    auto rows = node.second;
+    for (const auto &row_id : rows) {
+      auto num = cuts_.size() - 1;
+      for (std::size_t f = 0; f < num; f++) {
+        int slot = slots_[f + num*row_id];
+        if ((slot < 0) || (slot >= total_bin_size)) {
+          continue;
+        }
+
+        auto g = (*gh_pairs_)[row_id*2];
+        auto h = (*gh_pairs_)[row_id*2+1];
+        histo[slot*2] += g;
+        histo[slot*2+1] += h;
+      }
+    }
+    histo += histo_size;
+  }
+
+  return buf;
+}
+
+std::vector<double> MockProcessor::HandleAggregation(void *buffer, std::size_t buf_size) {
+  std::vector<double> result = std::vector<double>();
+
+  int8_t* ptr = static_cast<int8_t *>(buffer);
+  auto rest_size = buf_size;
+
+  while (rest_size > kPrefixLen) {
+    if (!ValidDam(ptr, rest_size)) {
+        break;
+    }
+    int64_t *size_ptr = reinterpret_cast<int64_t *>(ptr + 8);
+    double *array_start = reinterpret_cast<double *>(ptr + kPrefixLen);
+    auto array_size = (*size_ptr - kPrefixLen)/8;
+    result.insert(result.end(), array_start, array_start + array_size);
+    rest_size -= *size_ptr;
+    ptr = ptr + *size_ptr;
+  }
+
+  return result;
+}
+
+void* MockProcessor::ProcessHistograms(std::size_t *size, const std::vector<double>& histograms) {
+  *size = kPrefixLen + histograms.size()*10*8;  // Assume encrypted size is 10x
+
+  int64_t buf_size = *size;
+  // This memory needs to be freed
+  char *buf = static_cast<char *>(malloc(buf_size));
+  memcpy(buf, kSignature, strlen(kSignature));
+  memcpy(buf + 8, &buf_size, 8);
+  memcpy(buf + 16, &kDataTypeAggregatedHisto, 8);
+
+  // Simulate encryption by duplicating value 10 times
+  int index = kPrefixLen;
+  for (auto value : histograms) {
+    for (std::size_t i = 0; i < 10; i++) {
+      memcpy(buf+index, &value, 8);
+      index += 8;
+    }
+  }
+
+  return buf;
+}
+
+std::vector<double> MockProcessor::HandleHistograms(void *buffer, std::size_t buf_size) {
+  std::vector<double> result = std::vector<double>();
+
+  int8_t* ptr = static_cast<int8_t *>(buffer);
+  auto rest_size = buf_size;
+
+  while (rest_size > kPrefixLen) {
+    if (!ValidDam(ptr, rest_size)) {
+      break;
+    }
+    int64_t *size_ptr = reinterpret_cast<int64_t *>(ptr + 8);
+    double *array_start = reinterpret_cast<double *>(ptr + kPrefixLen);
+    auto array_size = (*size_ptr - kPrefixLen)/8;
+    auto empty = result.empty();
+    if (!empty) {
+      if (result.size() != array_size / 10) {
+        std::cout << "Histogram size doesn't match " << result.size()
+        << " != " << array_size << std::endl;
+        return result;
+      }
+    }
+
+    for (std::size_t i = 0; i < array_size/10; i++) {
+      auto value = array_start[i*10];
+      if (empty) {
+        result.push_back(value);
+      } else {
+        result[i] += value;
+      }
+    }
+
+    rest_size -= *size_ptr;
+    ptr = ptr + *size_ptr;
+  }
+
+  return result;
+}