Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor to simplify input/output descriptors and decorators #6124

Draft
wants to merge 1 commit into
base: branch-24.12
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/cuml/cuml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from cuml.internals.base import Base, UniversalBase
from cuml.internals.available_devices import is_cuda_available

from cuml.sample.estimator import Estimator

# GPU only packages

if is_cuda_available():
Expand Down
11 changes: 11 additions & 0 deletions python/cuml/cuml/internals/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,15 @@ def strides(self):
def shape(self):
return self._array_interface["shape"]

@property
def n_cols(self):
if len(self.shape) == 1:
return 1
elif len(self.shape) == 2:
return self.shape[1]
else:
raise ValueError("Multidimensional tensor")

@property
def ndim(self):
return len(self._array_interface["shape"])
Expand Down Expand Up @@ -943,6 +952,8 @@ def from_input(
order="F",
deepcopy=False,
check_dtype=False,
convert_dtype=False,
target_dtype=None,
convert_to_dtype=False,
check_mem_type=False,
convert_to_mem_type=None,
Expand Down
32 changes: 32 additions & 0 deletions python/cuml/cuml/internals/base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import cuml.common
import cuml.internals.logger as logger
import cuml.internals
import cuml.internals.input_utils
from cuml.internals.global_settings import GlobalSettings
from cuml.internals.available_devices import is_cuda_available
from cuml.internals.device_type import DeviceType
from cuml.internals.input_utils import (
Expand Down Expand Up @@ -318,6 +319,12 @@ class Base(TagsMixin,
def __setstate__(self, d):
self.__dict__.update(d)

def __getattribute__(self, name):
# Check if the attribute has a DynamicDescriptor and fit has not been called
if isinstance(getattr(type(self), name, None), DynamicDescriptor) and not self._is_fit:
raise AttributeError(f"'{name}' is not set until fit is called.")
return object.__getattribute__(self, name)

def __getattr__(self, attr):
"""
Redirects to `solver_model` if the attribute exists.
Expand Down Expand Up @@ -723,3 +730,28 @@ class UniversalBase(Base):

# return function result
return res



class DynamicDescriptor:
def __init__(self, attribute_name):
self.attribute_name = f"_{attribute_name}"

def __get__(self, obj, objtype=None):
if obj is None:
return self
ary = getattr(obj, self.attribute_name, None)

if ary is None:
return ary

else:

if GlobalSettings().is_internal:
return ary
else:
# need to add logic to check globalsettings output_type
return ary.to_output(obj._input_type)

def __set__(self, obj, value):
setattr(obj, self.attribute_name, value)
13 changes: 12 additions & 1 deletion python/cuml/cuml/internals/global_settings.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,6 +44,7 @@ def __init__(self):
"_device_type": default_device_type,
"_memory_type": default_memory_type,
"root_cm": None,
"internal_counter": 0
}
else:
self.shared_state = {"_output_type": None, "root_cm": None}
Expand Down Expand Up @@ -126,3 +127,13 @@ def output_type(self, value):
@property
def xpy(self):
return self.memory_type.xpy

def increase_arc(self):
self.internal_counter += 1

def decrease_arc(self):
self.internal_counter -= 1

@property
def is_internal(self):
return self.internal_counter > 0
18 changes: 18 additions & 0 deletions python/cuml/cuml/sample/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


from cuml.sample.estimator import Estimator
137 changes: 137 additions & 0 deletions python/cuml/cuml/sample/estimator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import numpy as np

from cuml.internals.array import CumlArray
from cuml.internals.global_settings import GlobalSettings
from cuml.internals.mixins import FMajorInputTagMixin
from cuml.internals.base import UniversalBase, DynamicDescriptor


def io_fit(func):
def wrapper(self, *args, **kwargs):
# increase global counter to detect we are internal
GlobalSettings().increase_arc()

# check input type of first arg and fit estimator
self._set_output_type(args[0])
result = func(self, *args, **kwargs)
self._is_fit = True

# decrease counter after exiting function
GlobalSettings().decrease_arc()

return result

return wrapper


def io_predict_transform_array(func):
def wrapper(self, *args, **kwargs):
# increase global counter to detect we are internal
GlobalSettings().increase_arc()

result = func(self, *args, **kwargs)

# decrease counter after exiting function
GlobalSettings().decrease_arc()

if GlobalSettings().is_internal:
return result

else:
# need to add logic to check globalsettings and mirror output_type
return result.to_output(self._input_type)

return result

return wrapper


class Estimator(UniversalBase,
FMajorInputTagMixin):
coef_ = DynamicDescriptor("coef_")
intercept_ = DynamicDescriptor("intercept_")

def __init__(self,
*,
awesome=True,
output_type=None,
handle=None,
verbose=None):

super().__init__(handle=handle,
verbose=verbose,
output_type=output_type)

self.awesome = awesome
self._is_fit = False # this goes in base

@io_fit
def fit(self,
X,
y,
convert_dtype=True):

input_X = CumlArray.from_input(
X,
order="C",
convert_dtype=convert_dtype,
target_dtype=np.float32,
check_dtype=[np.float32, np.float64],
)
self.n_features_in_ = input_X.n_cols
self.dtype = input_X.dtype

input_y = CumlArray.from_input(
y,
order="C",
convert_dtype=convert_dtype,
target_dtype=self.dtype,
check_dtype=[np.float32, np.float64],
)

self.coef_ = CumlArray.zeros(self.n_features_in_,
dtype=self.dtype)

self.intercept_ = CumlArray.zeros(self.n_features_in_,
dtype=self.dtype)

# do awesome C++ fitting here :)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One weird thing while playing with this a bit: when I add a print(f"{self.coef_=}") here I get the following:

Traceback (most recent call last):
  File "/home/coder/cuml/../ff.py", line 9, in <module>
    e.fit(X, y)
  File "/home/coder/.conda/envs/rapids/lib/python3.12/site-packages/cuml/internals/api_decorators.py", line 190, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/coder/.conda/envs/rapids/lib/python3.12/site-packages/cuml/sample/estimator.py", line 32, in wrapper
    result = func(self, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/coder/.conda/envs/rapids/lib/python3.12/site-packages/cuml/sample/estimator.py", line 115, in fit
    print(f"{self.coef_=}")
             ^^^^^^^^^^
  File "base.pyx", line 337, in cuml.internals.base.Base.__getattr__
AttributeError: coef_. Did you mean: '_coef_'?

I have stared at this for quite a while but can't work out what is going on??

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Realised about 2min after leaving the office: it is because _is_fitted doesn't get set until fit returns. Maybe something to improve as it makes for a tedious to debug thing :D - I'll ponder a suggestion

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is the same behavior a scikit-learn, no?

Copy link
Member

@betatim betatim Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. I was trying to access the coef_ attribute within fit.

In scikit-learn these are normal attributes, so once they are set you can use them. Right now we define __getattribute__ which uses _is_fit. I think it is a bit weird to have code like this fail, mostly because it makes you question your sanity and because the exception doesn't contain a clue (we get to see the AttributeError from __getattr__ not __getattribute__ :():

self.foo_ = 42
print(self.foo_) # Error, `foo_` doesn't exist!

Maybe we can get around the need to checking _is_fit and using __getattribute__ by recording inside the DynamicDescriptor if it has been set or not:

class DynamicDescriptor:
  def __init__(self, attribute_name):
    self.set = False
    self.attribute_name = f"{name}"

  def __get__(self, obj, objtype=None):
    if obj is None:
       return self
    if not self.set:
      raise AttributeError(f"{obj.__class__.__name__} object has no attribute {self.attribute_name}")
    else:
      if GlobalSettings().is_internal
        return self.raw
      else:
        return self.raw.to_output(obj._input_type)

  def __set__(self, obj, value):
    self.set = True
    # we can even store the value inside the descriptor?!
    self.raw = value

This might need a bit of tweaking to make the message in the exception look right ("'Estimator' object has no attribute 'foo_'").


return self

@io_predict_transform_array
def predict(self,
X,
convert_dtype=True):
input_X = CumlArray.from_input(
X,
order="C",
convert_dtype=convert_dtype,
target_dtype=self.dtype,
check_dtype=[np.float32, np.float64],
)
n_rows = input_X.shape[0]

preds = CumlArray.zeros(n_rows,
dtype=self.dtype,
index=input_X.index)

# more awesome C++

return preds
Loading