rapidsai · dantegd · Oct 25, 2024 · betatim · Oct 28, 2024 · betatim
@@ -17,6 +17,8 @@
 from cuml.internals.base import Base, UniversalBase
 from cuml.internals.available_devices import is_cuda_available
 
+from cuml.sample.estimator import Estimator
+
 # GPU only packages
 
 if is_cuda_available():

@@ -454,6 +454,15 @@ def strides(self):
     def shape(self):
         return self._array_interface["shape"]
 
+    @property
+    def n_cols(self):
+        if len(self.shape) == 1:
+            return 1
+        elif len(self.shape) == 2:
+            return self.shape[1]
+        else:
+            raise ValueError("Multidimensional tensor")
+
     @property
     def ndim(self):
         return len(self._array_interface["shape"])
@@ -943,6 +952,8 @@ def from_input(
         order="F",
         deepcopy=False,
         check_dtype=False,
+        convert_dtype=False,
+        target_dtype=None,
         convert_to_dtype=False,
         check_mem_type=False,
         convert_to_mem_type=None,

@@ -39,6 +39,7 @@ import cuml.common
 import cuml.internals.logger as logger
 import cuml.internals
 import cuml.internals.input_utils
+from cuml.internals.global_settings import GlobalSettings
 from cuml.internals.available_devices import is_cuda_available
 from cuml.internals.device_type import DeviceType
 from cuml.internals.input_utils import (
@@ -318,6 +319,12 @@ class Base(TagsMixin,
     def __setstate__(self, d):
         self.__dict__.update(d)
 
+    def __getattribute__(self, name):
+        # Check if the attribute has a DynamicDescriptor and fit has not been called
+        if isinstance(getattr(type(self), name, None), DynamicDescriptor) and not self._is_fit:
+            raise AttributeError(f"'{name}' is not set until fit is called.")
+        return object.__getattribute__(self, name)
+
     def __getattr__(self, attr):
         """
         Redirects to `solver_model` if the attribute exists.
@@ -723,3 +730,28 @@ class UniversalBase(Base):
 
             # return function result
             return res
+
+
+
+class DynamicDescriptor:
+    def __init__(self, attribute_name):
+        self.attribute_name = f"_{attribute_name}"
+
+    def __get__(self, obj, objtype=None):
+        if obj is None: 
+            return self
+        ary = getattr(obj, self.attribute_name, None)
+
+        if ary is None:
+            return ary
+
+        else:
+
+            if GlobalSettings().is_internal:
+                return ary
+            else:
+                # need to add logic to check globalsettings output_type
+                return ary.to_output(obj._input_type) 
+
+    def __set__(self, obj, value):
+        setattr(obj, self.attribute_name, value)
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,6 +44,7 @@ def __init__(self):
                 "_device_type": default_device_type,
                 "_memory_type": default_memory_type,
                 "root_cm": None,
+                "internal_counter": 0
             }
         else:
             self.shared_state = {"_output_type": None, "root_cm": None}
@@ -126,3 +127,13 @@ def output_type(self, value):
     @property
     def xpy(self):
         return self.memory_type.xpy
+
+    def increase_arc(self):
+        self.internal_counter += 1
+
+    def decrease_arc(self):
+        self.internal_counter -= 1
+
+    @property
+    def is_internal(self):
+        return self.internal_counter > 0
@@ -0,0 +1,18 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from cuml.sample.estimator import Estimator
@@ -0,0 +1,137 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+
+from cuml.internals.array import CumlArray
+from cuml.internals.global_settings import GlobalSettings
+from cuml.internals.mixins import FMajorInputTagMixin
+from cuml.internals.base import UniversalBase, DynamicDescriptor
+
+
+def io_fit(func):
+    def wrapper(self, *args, **kwargs):
+        # increase global counter to detect we are internal
+        GlobalSettings().increase_arc()
+
+        # check input type of first arg and fit estimator
+        self._set_output_type(args[0])
+        result = func(self, *args, **kwargs) 
+        self._is_fit = True
+
+        # decrease counter after exiting function
+        GlobalSettings().decrease_arc()
+
+        return result
+
+    return wrapper
+
+
+def io_predict_transform_array(func):
+    def wrapper(self, *args, **kwargs):
+        # increase global counter to detect we are internal
+        GlobalSettings().increase_arc()
+
+        result = func(self, *args, **kwargs) 
+
+        # decrease counter after exiting function
+        GlobalSettings().decrease_arc()
+
+        if GlobalSettings().is_internal:
+            return result
+
+        else:
+            # need to add logic to check globalsettings and mirror output_type
+            return result.to_output(self._input_type) 
+
+        return result
+
+    return wrapper
+
+
+class Estimator(UniversalBase,
+                FMajorInputTagMixin):
+    coef_ = DynamicDescriptor("coef_")
+    intercept_ = DynamicDescriptor("intercept_")
+
+    def __init__(self, 
+                 *, 
+                 awesome=True,
+                 output_type=None,
+                 handle=None,
+                 verbose=None):
+
+        super().__init__(handle=handle,
+                         verbose=verbose,
+                         output_type=output_type)
+
+        self.awesome = awesome
+        self._is_fit = False  # this goes in base
+
+    @io_fit
+    def fit(self,
+            X,
+            y,
+            convert_dtype=True):
+
+        input_X = CumlArray.from_input(
+            X,
+            order="C",
+            convert_dtype=convert_dtype,
+            target_dtype=np.float32,
+            check_dtype=[np.float32, np.float64],
+        )
+        self.n_features_in_ = input_X.n_cols
+        self.dtype = input_X.dtype
+
+        input_y = CumlArray.from_input(
+            y,
+            order="C",
+            convert_dtype=convert_dtype,
+            target_dtype=self.dtype,
+            check_dtype=[np.float32, np.float64],
+        )
+
+        self.coef_ = CumlArray.zeros(self.n_features_in_, 
+                                     dtype=self.dtype)
+
+        self.intercept_ = CumlArray.zeros(self.n_features_in_, 
+                                     dtype=self.dtype)
+
+        # do awesome C++ fitting here :) 
+
+        return self
+
+    @io_predict_transform_array
+    def predict(self, 
+                X,
+                convert_dtype=True):
+        input_X = CumlArray.from_input(
+            X,
+            order="C",
+            convert_dtype=convert_dtype,
+            target_dtype=self.dtype,
+            check_dtype=[np.float32, np.float64],
+        )
+        n_rows = input_X.shape[0]
+
+        preds = CumlArray.zeros(n_rows, 
+                                dtype=self.dtype, 
+                                index=input_X.index)
+
+        # more awesome C++ 
+
+        return preds