diff --git a/python/cuml/cuml/__init__.py b/python/cuml/cuml/__init__.py index 62ab93c1b4..d0dc88b353 100644 --- a/python/cuml/cuml/__init__.py +++ b/python/cuml/cuml/__init__.py @@ -17,6 +17,8 @@ from cuml.internals.base import Base, UniversalBase from cuml.internals.available_devices import is_cuda_available +from cuml.sample.estimator import Estimator + # GPU only packages if is_cuda_available(): diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py index c30d609563..4205698252 100644 --- a/python/cuml/cuml/internals/array.py +++ b/python/cuml/cuml/internals/array.py @@ -454,6 +454,15 @@ def strides(self): def shape(self): return self._array_interface["shape"] + @property + def n_cols(self): + if len(self.shape) == 1: + return 1 + elif len(self.shape) == 2: + return self.shape[1] + else: + raise ValueError("Multidimensional tensor") + @property def ndim(self): return len(self._array_interface["shape"]) @@ -943,6 +952,8 @@ def from_input( order="F", deepcopy=False, check_dtype=False, + convert_dtype=False, + target_dtype=None, convert_to_dtype=False, check_mem_type=False, convert_to_mem_type=None, diff --git a/python/cuml/cuml/internals/base.pyx b/python/cuml/cuml/internals/base.pyx index c00ed17f98..dd3cbece99 100644 --- a/python/cuml/cuml/internals/base.pyx +++ b/python/cuml/cuml/internals/base.pyx @@ -39,6 +39,7 @@ import cuml.common import cuml.internals.logger as logger import cuml.internals import cuml.internals.input_utils +from cuml.internals.global_settings import GlobalSettings from cuml.internals.available_devices import is_cuda_available from cuml.internals.device_type import DeviceType from cuml.internals.input_utils import ( @@ -318,6 +319,12 @@ class Base(TagsMixin, def __setstate__(self, d): self.__dict__.update(d) + def __getattribute__(self, name): + # Check if the attribute has a DynamicDescriptor and fit has not been called + if isinstance(getattr(type(self), name, None), DynamicDescriptor) and not self._is_fit: + raise AttributeError(f"'{name}' is not set until fit is called.") + return object.__getattribute__(self, name) + def __getattr__(self, attr): """ Redirects to `solver_model` if the attribute exists. @@ -723,3 +730,28 @@ class UniversalBase(Base): # return function result return res + + + +class DynamicDescriptor: + def __init__(self, attribute_name): + self.attribute_name = f"_{attribute_name}" + + def __get__(self, obj, objtype=None): + if obj is None: + return self + ary = getattr(obj, self.attribute_name, None) + + if ary is None: + return ary + + else: + + if GlobalSettings().is_internal: + return ary + else: + # need to add logic to check globalsettings output_type + return ary.to_output(obj._input_type) + + def __set__(self, obj, value): + setattr(obj, self.attribute_name, value) diff --git a/python/cuml/cuml/internals/global_settings.py b/python/cuml/cuml/internals/global_settings.py index ea899d91b1..dba203ff44 100644 --- a/python/cuml/cuml/internals/global_settings.py +++ b/python/cuml/cuml/internals/global_settings.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,6 +44,7 @@ def __init__(self): "_device_type": default_device_type, "_memory_type": default_memory_type, "root_cm": None, + "internal_counter": 0 } else: self.shared_state = {"_output_type": None, "root_cm": None} @@ -126,3 +127,13 @@ def output_type(self, value): @property def xpy(self): return self.memory_type.xpy + + def increase_arc(self): + self.internal_counter += 1 + + def decrease_arc(self): + self.internal_counter -= 1 + + @property + def is_internal(self): + return self.internal_counter > 0 diff --git a/python/cuml/cuml/sample/__init__.py b/python/cuml/cuml/sample/__init__.py new file mode 100644 index 0000000000..0aacb24b76 --- /dev/null +++ b/python/cuml/cuml/sample/__init__.py @@ -0,0 +1,18 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from cuml.sample.estimator import Estimator \ No newline at end of file diff --git a/python/cuml/cuml/sample/estimator.py b/python/cuml/cuml/sample/estimator.py new file mode 100644 index 0000000000..2d05875f1a --- /dev/null +++ b/python/cuml/cuml/sample/estimator.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np + +from cuml.internals.array import CumlArray +from cuml.internals.global_settings import GlobalSettings +from cuml.internals.mixins import FMajorInputTagMixin +from cuml.internals.base import UniversalBase, DynamicDescriptor + + +def io_fit(func): + def wrapper(self, *args, **kwargs): + # increase global counter to detect we are internal + GlobalSettings().increase_arc() + + # check input type of first arg and fit estimator + self._set_output_type(args[0]) + result = func(self, *args, **kwargs) + self._is_fit = True + + # decrease counter after exiting function + GlobalSettings().decrease_arc() + + return result + + return wrapper + + +def io_predict_transform_array(func): + def wrapper(self, *args, **kwargs): + # increase global counter to detect we are internal + GlobalSettings().increase_arc() + + result = func(self, *args, **kwargs) + + # decrease counter after exiting function + GlobalSettings().decrease_arc() + + if GlobalSettings().is_internal: + return result + + else: + # need to add logic to check globalsettings and mirror output_type + return result.to_output(self._input_type) + + return result + + return wrapper + + +class Estimator(UniversalBase, + FMajorInputTagMixin): + coef_ = DynamicDescriptor("coef_") + intercept_ = DynamicDescriptor("intercept_") + + def __init__(self, + *, + awesome=True, + output_type=None, + handle=None, + verbose=None): + + super().__init__(handle=handle, + verbose=verbose, + output_type=output_type) + + self.awesome = awesome + self._is_fit = False # this goes in base + + @io_fit + def fit(self, + X, + y, + convert_dtype=True): + + input_X = CumlArray.from_input( + X, + order="C", + convert_dtype=convert_dtype, + target_dtype=np.float32, + check_dtype=[np.float32, np.float64], + ) + self.n_features_in_ = input_X.n_cols + self.dtype = input_X.dtype + + input_y = CumlArray.from_input( + y, + order="C", + convert_dtype=convert_dtype, + target_dtype=self.dtype, + check_dtype=[np.float32, np.float64], + ) + + self.coef_ = CumlArray.zeros(self.n_features_in_, + dtype=self.dtype) + + self.intercept_ = CumlArray.zeros(self.n_features_in_, + dtype=self.dtype) + + # do awesome C++ fitting here :) + + return self + + @io_predict_transform_array + def predict(self, + X, + convert_dtype=True): + input_X = CumlArray.from_input( + X, + order="C", + convert_dtype=convert_dtype, + target_dtype=self.dtype, + check_dtype=[np.float32, np.float64], + ) + n_rows = input_X.shape[0] + + preds = CumlArray.zeros(n_rows, + dtype=self.dtype, + index=input_X.index) + + # more awesome C++ + + return preds \ No newline at end of file